1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((GET_MODE_SIZE (<MODE>mode) == 16
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || GET_MODE_SIZE (<MODE>mode) == 8)))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1],
125 default: gcc_unreachable ();
128 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
129 neon_logic<q>, neon_to_gp<q>, f_mcr,\
130 mov_reg, neon_move<q>")]
133 (define_insn "*aarch64_simd_mov<VQ:mode>"
134 [(set (match_operand:VQ 0 "nonimmediate_operand"
135 "=w, Umq, m, w, ?r, ?w, ?r, w")
136 (match_operand:VQ 1 "general_operand"
137 "m, Dz, w, w, w, r, r, Dn"))]
139 && (register_operand (operands[0], <MODE>mode)
140 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
142 switch (which_alternative)
145 return "ldr\t%q0, %1";
147 return "stp\txzr, xzr, %0";
149 return "str\t%q1, %0";
151 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
157 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
162 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
163 neon_logic<q>, multiple, multiple,\
164 multiple, neon_move<q>")
165 (set_attr "length" "4,4,4,4,8,8,8,4")]
168 ;; When storing lane zero we can use the normal STR and its more permissive
171 (define_insn "aarch64_store_lane0<mode>"
172 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
173 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
174 (parallel [(match_operand 2 "const_int_operand" "n")])))]
176 && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
177 "str\\t%<Vetype>1, %0"
178 [(set_attr "type" "neon_store1_1reg<q>")]
181 (define_insn "load_pair<mode>"
182 [(set (match_operand:VD 0 "register_operand" "=w")
183 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
184 (set (match_operand:VD 2 "register_operand" "=w")
185 (match_operand:VD 3 "memory_operand" "m"))]
187 && rtx_equal_p (XEXP (operands[3], 0),
188 plus_constant (Pmode,
189 XEXP (operands[1], 0),
190 GET_MODE_SIZE (<MODE>mode)))"
192 [(set_attr "type" "neon_ldp")]
195 (define_insn "store_pair<mode>"
196 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
197 (match_operand:VD 1 "register_operand" "w"))
198 (set (match_operand:VD 2 "memory_operand" "=m")
199 (match_operand:VD 3 "register_operand" "w"))]
201 && rtx_equal_p (XEXP (operands[2], 0),
202 plus_constant (Pmode,
203 XEXP (operands[0], 0),
204 GET_MODE_SIZE (<MODE>mode)))"
206 [(set_attr "type" "neon_stp")]
210 [(set (match_operand:VQ 0 "register_operand" "")
211 (match_operand:VQ 1 "register_operand" ""))]
212 "TARGET_SIMD && reload_completed
213 && GP_REGNUM_P (REGNO (operands[0]))
214 && GP_REGNUM_P (REGNO (operands[1]))"
217 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
222 [(set (match_operand:VQ 0 "register_operand" "")
223 (match_operand:VQ 1 "register_operand" ""))]
224 "TARGET_SIMD && reload_completed
225 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
226 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
229 aarch64_split_simd_move (operands[0], operands[1]);
233 (define_expand "aarch64_split_simd_mov<mode>"
234 [(set (match_operand:VQ 0)
235 (match_operand:VQ 1))]
238 rtx dst = operands[0];
239 rtx src = operands[1];
241 if (GP_REGNUM_P (REGNO (src)))
243 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
244 rtx src_high_part = gen_highpart (<VHALF>mode, src);
247 (gen_move_lo_quad_<mode> (dst, src_low_part));
249 (gen_move_hi_quad_<mode> (dst, src_high_part));
254 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
255 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
256 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
257 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
260 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
262 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
268 (define_insn "aarch64_simd_mov_from_<mode>low"
269 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
271 (match_operand:VQ 1 "register_operand" "w")
272 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
273 "TARGET_SIMD && reload_completed"
275 [(set_attr "type" "neon_to_gp<q>")
276 (set_attr "length" "4")
279 (define_insn "aarch64_simd_mov_from_<mode>high"
280 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
282 (match_operand:VQ 1 "register_operand" "w")
283 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
284 "TARGET_SIMD && reload_completed"
286 [(set_attr "type" "neon_to_gp<q>")
287 (set_attr "length" "4")
290 (define_insn "orn<mode>3"
291 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
292 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
293 (match_operand:VDQ_I 2 "register_operand" "w")))]
295 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
296 [(set_attr "type" "neon_logic<q>")]
299 (define_insn "bic<mode>3"
300 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
301 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
302 (match_operand:VDQ_I 2 "register_operand" "w")))]
304 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
305 [(set_attr "type" "neon_logic<q>")]
308 (define_insn "add<mode>3"
309 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
310 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
311 (match_operand:VDQ_I 2 "register_operand" "w")))]
313 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
314 [(set_attr "type" "neon_add<q>")]
317 (define_insn "sub<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
323 [(set_attr "type" "neon_sub<q>")]
326 (define_insn "mul<mode>3"
327 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
328 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
329 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
331 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
332 [(set_attr "type" "neon_mul_<Vetype><q>")]
335 (define_insn "bswap<mode>2"
336 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
337 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
339 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
340 [(set_attr "type" "neon_rev<q>")]
343 (define_insn "aarch64_rbit<mode>"
344 [(set (match_operand:VB 0 "register_operand" "=w")
345 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
348 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
349 [(set_attr "type" "neon_rbit")]
352 (define_expand "ctz<mode>2"
353 [(set (match_operand:VS 0 "register_operand")
354 (ctz:VS (match_operand:VS 1 "register_operand")))]
357 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
358 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
360 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
361 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
366 (define_expand "xorsign<mode>3"
367 [(match_operand:VHSDF 0 "register_operand")
368 (match_operand:VHSDF 1 "register_operand")
369 (match_operand:VHSDF 2 "register_operand")]
373 machine_mode imode = <V_INT_EQUIV>mode;
374 rtx v_bitmask = gen_reg_rtx (imode);
375 rtx op1x = gen_reg_rtx (imode);
376 rtx op2x = gen_reg_rtx (imode);
378 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
379 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
381 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
383 emit_move_insn (v_bitmask,
384 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
385 HOST_WIDE_INT_M1U << bits));
387 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
388 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
389 emit_move_insn (operands[0],
390 lowpart_subreg (<MODE>mode, op1x, imode));
395 ;; These instructions map to the __builtins for the Dot Product operations.
396 (define_insn "aarch64_<sur>dot<vsi2qi>"
397 [(set (match_operand:VS 0 "register_operand" "=w")
398 (plus:VS (match_operand:VS 1 "register_operand" "0")
399 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
400 (match_operand:<VSI2QI> 3 "register_operand" "w")]
403 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
404 [(set_attr "type" "neon_dot")]
407 ;; These expands map to the Dot Product optab the vectorizer checks for.
408 ;; The auto-vectorizer expects a dot product builtin that also does an
409 ;; accumulation into the provided register.
410 ;; Given the following pattern
412 ;; for (i=0; i<len; i++) {
418 ;; This can be auto-vectorized to
419 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
421 ;; given enough iterations. However the vectorizer can keep unrolling the loop
422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
427 (define_expand "<sur>dot_prod<vsi2qi>"
428 [(set (match_operand:VS 0 "register_operand")
429 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
430 (match_operand:<VSI2QI> 2 "register_operand")]
432 (match_operand:VS 3 "register_operand")))]
436 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
438 emit_insn (gen_rtx_SET (operands[0], operands[3]));
442 ;; These instructions map to the __builtins for the Dot Product
443 ;; indexed operations.
444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
445 [(set (match_operand:VS 0 "register_operand" "=w")
446 (plus:VS (match_operand:VS 1 "register_operand" "0")
447 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
448 (match_operand:V8QI 3 "register_operand" "<h_con>")
449 (match_operand:SI 4 "immediate_operand" "i")]
454 = GEN_INT (ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
455 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
457 [(set_attr "type" "neon_dot")]
460 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
461 [(set (match_operand:VS 0 "register_operand" "=w")
462 (plus:VS (match_operand:VS 1 "register_operand" "0")
463 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
464 (match_operand:V16QI 3 "register_operand" "<h_con>")
465 (match_operand:SI 4 "immediate_operand" "i")]
470 = GEN_INT (ENDIAN_LANE_N (V16QImode, INTVAL (operands[4])));
471 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
473 [(set_attr "type" "neon_dot")]
476 (define_expand "copysign<mode>3"
477 [(match_operand:VHSDF 0 "register_operand")
478 (match_operand:VHSDF 1 "register_operand")
479 (match_operand:VHSDF 2 "register_operand")]
480 "TARGET_FLOAT && TARGET_SIMD"
482 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
483 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
485 emit_move_insn (v_bitmask,
486 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
487 HOST_WIDE_INT_M1U << bits));
488 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
489 operands[2], operands[1]));
494 (define_insn "*aarch64_mul3_elt<mode>"
495 [(set (match_operand:VMUL 0 "register_operand" "=w")
499 (match_operand:VMUL 1 "register_operand" "<h_con>")
500 (parallel [(match_operand:SI 2 "immediate_operand")])))
501 (match_operand:VMUL 3 "register_operand" "w")))]
504 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
505 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
507 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
510 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
511 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
512 (mult:VMUL_CHANGE_NLANES
513 (vec_duplicate:VMUL_CHANGE_NLANES
515 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
516 (parallel [(match_operand:SI 2 "immediate_operand")])))
517 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
520 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
521 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
523 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
526 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
527 [(set (match_operand:VMUL 0 "register_operand" "=w")
530 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
531 (match_operand:VMUL 2 "register_operand" "w")))]
533 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
534 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
537 (define_insn "aarch64_rsqrte<mode>"
538 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
539 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
542 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
543 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
545 (define_insn "aarch64_rsqrts<mode>"
546 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
547 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
548 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
551 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
552 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
554 (define_expand "rsqrt<mode>2"
555 [(set (match_operand:VALLF 0 "register_operand" "=w")
556 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
560 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
564 (define_insn "*aarch64_mul3_elt_to_64v2df"
565 [(set (match_operand:DF 0 "register_operand" "=w")
568 (match_operand:V2DF 1 "register_operand" "w")
569 (parallel [(match_operand:SI 2 "immediate_operand")]))
570 (match_operand:DF 3 "register_operand" "w")))]
573 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
574 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
576 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
579 (define_insn "neg<mode>2"
580 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
581 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
583 "neg\t%0.<Vtype>, %1.<Vtype>"
584 [(set_attr "type" "neon_neg<q>")]
587 (define_insn "abs<mode>2"
588 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
589 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
591 "abs\t%0.<Vtype>, %1.<Vtype>"
592 [(set_attr "type" "neon_abs<q>")]
595 ;; The intrinsic version of integer ABS must not be allowed to
596 ;; combine with any operation with an integerated ABS step, such
598 (define_insn "aarch64_abs<mode>"
599 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
601 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
604 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
605 [(set_attr "type" "neon_abs<q>")]
608 (define_insn "abd<mode>_3"
609 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
610 (abs:VDQ_BHSI (minus:VDQ_BHSI
611 (match_operand:VDQ_BHSI 1 "register_operand" "w")
612 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
614 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
615 [(set_attr "type" "neon_abd<q>")]
618 (define_insn "aba<mode>_3"
619 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
620 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
621 (match_operand:VDQ_BHSI 1 "register_operand" "w")
622 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
623 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
625 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
626 [(set_attr "type" "neon_arith_acc<q>")]
629 (define_insn "fabd<mode>3"
630 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
633 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
634 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
636 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
637 [(set_attr "type" "neon_fp_abd_<stype><q>")]
640 ;; For AND (vector, register) and BIC (vector, immediate)
641 (define_insn "and<mode>3"
642 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
643 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
644 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
647 switch (which_alternative)
650 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
652 return aarch64_output_simd_mov_immediate (operands[2],
653 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
658 [(set_attr "type" "neon_logic<q>")]
661 ;; For ORR (vector, register) and ORR (vector, immediate)
662 (define_insn "ior<mode>3"
663 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
664 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
665 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
668 switch (which_alternative)
671 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
673 return aarch64_output_simd_mov_immediate (operands[2],
674 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
679 [(set_attr "type" "neon_logic<q>")]
682 (define_insn "xor<mode>3"
683 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
684 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
685 (match_operand:VDQ_I 2 "register_operand" "w")))]
687 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
688 [(set_attr "type" "neon_logic<q>")]
691 (define_insn "one_cmpl<mode>2"
692 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
693 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
695 "not\t%0.<Vbtype>, %1.<Vbtype>"
696 [(set_attr "type" "neon_logic<q>")]
699 (define_insn "aarch64_simd_vec_set<mode>"
700 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
702 (vec_duplicate:VDQ_BHSI
703 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
704 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
705 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
708 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
709 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
710 switch (which_alternative)
713 return "ins\\t%0.<Vetype>[%p2], %w1";
715 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
717 return "ld1\\t{%0.<Vetype>}[%p2], %1";
722 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
725 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
726 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
728 (vec_duplicate:VALL_F16
730 (match_operand:VALL_F16 3 "register_operand" "w")
732 [(match_operand:SI 4 "immediate_operand" "i")])))
733 (match_operand:VALL_F16 1 "register_operand" "0")
734 (match_operand:SI 2 "immediate_operand" "i")))]
737 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
738 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
739 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
741 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
743 [(set_attr "type" "neon_ins<q>")]
746 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
747 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
748 (vec_merge:VALL_F16_NO_V2Q
749 (vec_duplicate:VALL_F16_NO_V2Q
751 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
753 [(match_operand:SI 4 "immediate_operand" "i")])))
754 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
755 (match_operand:SI 2 "immediate_operand" "i")))]
758 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
759 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
760 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
761 INTVAL (operands[4])));
763 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
765 [(set_attr "type" "neon_ins<q>")]
768 (define_insn "aarch64_simd_lshr<mode>"
769 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
773 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
774 [(set_attr "type" "neon_shift_imm<q>")]
777 (define_insn "aarch64_simd_ashr<mode>"
778 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
780 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
782 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
783 [(set_attr "type" "neon_shift_imm<q>")]
786 (define_insn "aarch64_simd_imm_shl<mode>"
787 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
788 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
789 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
791 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
792 [(set_attr "type" "neon_shift_imm<q>")]
795 (define_insn "aarch64_simd_reg_sshl<mode>"
796 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
797 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
798 (match_operand:VDQ_I 2 "register_operand" "w")))]
800 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
801 [(set_attr "type" "neon_shift_reg<q>")]
804 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
805 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
806 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
807 (match_operand:VDQ_I 2 "register_operand" "w")]
808 UNSPEC_ASHIFT_UNSIGNED))]
810 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
811 [(set_attr "type" "neon_shift_reg<q>")]
814 (define_insn "aarch64_simd_reg_shl<mode>_signed"
815 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
816 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
817 (match_operand:VDQ_I 2 "register_operand" "w")]
818 UNSPEC_ASHIFT_SIGNED))]
820 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
821 [(set_attr "type" "neon_shift_reg<q>")]
824 (define_expand "ashl<mode>3"
825 [(match_operand:VDQ_I 0 "register_operand" "")
826 (match_operand:VDQ_I 1 "register_operand" "")
827 (match_operand:SI 2 "general_operand" "")]
830 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
833 if (CONST_INT_P (operands[2]))
835 shift_amount = INTVAL (operands[2]);
836 if (shift_amount >= 0 && shift_amount < bit_width)
838 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
840 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
847 operands[2] = force_reg (SImode, operands[2]);
850 else if (MEM_P (operands[2]))
852 operands[2] = force_reg (SImode, operands[2]);
855 if (REG_P (operands[2]))
857 rtx tmp = gen_reg_rtx (<MODE>mode);
858 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
859 convert_to_mode (<VEL>mode,
862 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
871 (define_expand "lshr<mode>3"
872 [(match_operand:VDQ_I 0 "register_operand" "")
873 (match_operand:VDQ_I 1 "register_operand" "")
874 (match_operand:SI 2 "general_operand" "")]
877 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
880 if (CONST_INT_P (operands[2]))
882 shift_amount = INTVAL (operands[2]);
883 if (shift_amount > 0 && shift_amount <= bit_width)
885 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
887 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
893 operands[2] = force_reg (SImode, operands[2]);
895 else if (MEM_P (operands[2]))
897 operands[2] = force_reg (SImode, operands[2]);
900 if (REG_P (operands[2]))
902 rtx tmp = gen_reg_rtx (SImode);
903 rtx tmp1 = gen_reg_rtx (<MODE>mode);
904 emit_insn (gen_negsi2 (tmp, operands[2]));
905 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
906 convert_to_mode (<VEL>mode,
908 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
918 (define_expand "ashr<mode>3"
919 [(match_operand:VDQ_I 0 "register_operand" "")
920 (match_operand:VDQ_I 1 "register_operand" "")
921 (match_operand:SI 2 "general_operand" "")]
924 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
927 if (CONST_INT_P (operands[2]))
929 shift_amount = INTVAL (operands[2]);
930 if (shift_amount > 0 && shift_amount <= bit_width)
932 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
934 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
940 operands[2] = force_reg (SImode, operands[2]);
942 else if (MEM_P (operands[2]))
944 operands[2] = force_reg (SImode, operands[2]);
947 if (REG_P (operands[2]))
949 rtx tmp = gen_reg_rtx (SImode);
950 rtx tmp1 = gen_reg_rtx (<MODE>mode);
951 emit_insn (gen_negsi2 (tmp, operands[2]));
952 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
953 convert_to_mode (<VEL>mode,
955 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
965 (define_expand "vashl<mode>3"
966 [(match_operand:VDQ_I 0 "register_operand" "")
967 (match_operand:VDQ_I 1 "register_operand" "")
968 (match_operand:VDQ_I 2 "register_operand" "")]
971 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
976 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
977 ;; Negating individual lanes most certainly offsets the
978 ;; gain from vectorization.
979 (define_expand "vashr<mode>3"
980 [(match_operand:VDQ_BHSI 0 "register_operand" "")
981 (match_operand:VDQ_BHSI 1 "register_operand" "")
982 (match_operand:VDQ_BHSI 2 "register_operand" "")]
985 rtx neg = gen_reg_rtx (<MODE>mode);
986 emit (gen_neg<mode>2 (neg, operands[2]));
987 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
993 (define_expand "aarch64_ashr_simddi"
994 [(match_operand:DI 0 "register_operand" "=w")
995 (match_operand:DI 1 "register_operand" "w")
996 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
999 /* An arithmetic shift right by 64 fills the result with copies of the sign
1000 bit, just like asr by 63 - however the standard pattern does not handle
1002 if (INTVAL (operands[2]) == 64)
1003 operands[2] = GEN_INT (63);
1004 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1009 (define_expand "vlshr<mode>3"
1010 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1011 (match_operand:VDQ_BHSI 1 "register_operand" "")
1012 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1015 rtx neg = gen_reg_rtx (<MODE>mode);
1016 emit (gen_neg<mode>2 (neg, operands[2]));
1017 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1022 (define_expand "aarch64_lshr_simddi"
1023 [(match_operand:DI 0 "register_operand" "=w")
1024 (match_operand:DI 1 "register_operand" "w")
1025 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1028 if (INTVAL (operands[2]) == 64)
1029 emit_move_insn (operands[0], const0_rtx);
1031 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1036 (define_expand "vec_set<mode>"
1037 [(match_operand:VDQ_BHSI 0 "register_operand")
1038 (match_operand:<VEL> 1 "register_operand")
1039 (match_operand:SI 2 "immediate_operand")]
1042 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1043 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1044 GEN_INT (elem), operands[0]));
1049 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1050 (define_insn "vec_shr_<mode>"
1051 [(set (match_operand:VD 0 "register_operand" "=w")
1052 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1053 (match_operand:SI 2 "immediate_operand" "i")]
1057 if (BYTES_BIG_ENDIAN)
1058 return "shl %d0, %d1, %2";
1060 return "ushr %d0, %d1, %2";
1062 [(set_attr "type" "neon_shift_imm")]
1065 (define_insn "aarch64_simd_vec_setv2di"
1066 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1069 (match_operand:DI 1 "register_operand" "r,w"))
1070 (match_operand:V2DI 3 "register_operand" "0,0")
1071 (match_operand:SI 2 "immediate_operand" "i,i")))]
1074 int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1075 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1076 switch (which_alternative)
1079 return "ins\\t%0.d[%p2], %1";
1081 return "ins\\t%0.d[%p2], %1.d[0]";
1086 [(set_attr "type" "neon_from_gp, neon_ins_q")]
1089 (define_expand "vec_setv2di"
1090 [(match_operand:V2DI 0 "register_operand")
1091 (match_operand:DI 1 "register_operand")
1092 (match_operand:SI 2 "immediate_operand")]
1095 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1096 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1097 GEN_INT (elem), operands[0]));
1102 (define_insn "aarch64_simd_vec_set<mode>"
1103 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1105 (vec_duplicate:VDQF_F16
1106 (match_operand:<VEL> 1 "register_operand" "w"))
1107 (match_operand:VDQF_F16 3 "register_operand" "0")
1108 (match_operand:SI 2 "immediate_operand" "i")))]
1111 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1113 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1114 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1116 [(set_attr "type" "neon_ins<q>")]
1119 (define_expand "vec_set<mode>"
1120 [(match_operand:VDQF_F16 0 "register_operand" "+w")
1121 (match_operand:<VEL> 1 "register_operand" "w")
1122 (match_operand:SI 2 "immediate_operand" "")]
1125 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1126 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1127 GEN_INT (elem), operands[0]));
1133 (define_insn "aarch64_mla<mode>"
1134 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1135 (plus:VDQ_BHSI (mult:VDQ_BHSI
1136 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1137 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1138 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1140 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1141 [(set_attr "type" "neon_mla_<Vetype><q>")]
1144 (define_insn "*aarch64_mla_elt<mode>"
1145 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1148 (vec_duplicate:VDQHS
1150 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1151 (parallel [(match_operand:SI 2 "immediate_operand")])))
1152 (match_operand:VDQHS 3 "register_operand" "w"))
1153 (match_operand:VDQHS 4 "register_operand" "0")))]
1156 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1157 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1159 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1162 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1163 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1166 (vec_duplicate:VDQHS
1168 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1169 (parallel [(match_operand:SI 2 "immediate_operand")])))
1170 (match_operand:VDQHS 3 "register_operand" "w"))
1171 (match_operand:VDQHS 4 "register_operand" "0")))]
1174 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1175 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1177 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1180 (define_insn "*aarch64_mla_elt_merge<mode>"
1181 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1183 (mult:VDQHS (vec_duplicate:VDQHS
1184 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1185 (match_operand:VDQHS 2 "register_operand" "w"))
1186 (match_operand:VDQHS 3 "register_operand" "0")))]
1188 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1189 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1192 (define_insn "aarch64_mls<mode>"
1193 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1194 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1195 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1196 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1198 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1199 [(set_attr "type" "neon_mla_<Vetype><q>")]
1202 (define_insn "*aarch64_mls_elt<mode>"
1203 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1205 (match_operand:VDQHS 4 "register_operand" "0")
1207 (vec_duplicate:VDQHS
1209 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1210 (parallel [(match_operand:SI 2 "immediate_operand")])))
1211 (match_operand:VDQHS 3 "register_operand" "w"))))]
1214 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1215 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1217 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1220 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1221 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1223 (match_operand:VDQHS 4 "register_operand" "0")
1225 (vec_duplicate:VDQHS
1227 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1228 (parallel [(match_operand:SI 2 "immediate_operand")])))
1229 (match_operand:VDQHS 3 "register_operand" "w"))))]
1232 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1233 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1235 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1238 (define_insn "*aarch64_mls_elt_merge<mode>"
1239 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1241 (match_operand:VDQHS 1 "register_operand" "0")
1242 (mult:VDQHS (vec_duplicate:VDQHS
1243 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1244 (match_operand:VDQHS 3 "register_operand" "w"))))]
1246 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1247 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1250 ;; Max/Min operations.
1251 (define_insn "<su><maxmin><mode>3"
1252 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1253 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1254 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1256 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1257 [(set_attr "type" "neon_minmax<q>")]
1260 (define_expand "<su><maxmin>v2di3"
1261 [(set (match_operand:V2DI 0 "register_operand" "")
1262 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1263 (match_operand:V2DI 2 "register_operand" "")))]
1266 enum rtx_code cmp_operator;
1287 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1288 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1289 operands[2], cmp_fmt, operands[1], operands[2]));
1293 ;; Pairwise Integer Max/Min operations.
1294 (define_insn "aarch64_<maxmin_uns>p<mode>"
1295 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1296 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1297 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1300 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1301 [(set_attr "type" "neon_minmax<q>")]
1304 ;; Pairwise FP Max/Min operations.
1305 (define_insn "aarch64_<maxmin_uns>p<mode>"
1306 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1307 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1308 (match_operand:VHSDF 2 "register_operand" "w")]
1311 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1312 [(set_attr "type" "neon_minmax<q>")]
1315 ;; vec_concat gives a new vector with the low elements from operand 1, and
1316 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1317 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1318 ;; What that means, is that the RTL descriptions of the below patterns
1319 ;; need to change depending on endianness.
1321 ;; Move to the low architectural bits of the register.
1322 ;; On little-endian this is { operand, zeroes }
1323 ;; On big-endian this is { zeroes, operand }
1325 (define_insn "move_lo_quad_internal_<mode>"
1326 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1328 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1329 (vec_duplicate:<VHALF> (const_int 0))))]
1330 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1335 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1336 (set_attr "simd" "yes,*,yes")
1337 (set_attr "fp" "*,yes,*")
1338 (set_attr "length" "4")]
1341 (define_insn "move_lo_quad_internal_<mode>"
1342 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1344 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1346 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1351 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1352 (set_attr "simd" "yes,*,yes")
1353 (set_attr "fp" "*,yes,*")
1354 (set_attr "length" "4")]
1357 (define_insn "move_lo_quad_internal_be_<mode>"
1358 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1360 (vec_duplicate:<VHALF> (const_int 0))
1361 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1362 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1367 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1368 (set_attr "simd" "yes,*,yes")
1369 (set_attr "fp" "*,yes,*")
1370 (set_attr "length" "4")]
1373 (define_insn "move_lo_quad_internal_be_<mode>"
1374 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1377 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1378 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1383 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1384 (set_attr "simd" "yes,*,yes")
1385 (set_attr "fp" "*,yes,*")
1386 (set_attr "length" "4")]
1389 (define_expand "move_lo_quad_<mode>"
1390 [(match_operand:VQ 0 "register_operand")
1391 (match_operand:VQ 1 "register_operand")]
1394 if (BYTES_BIG_ENDIAN)
1395 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1397 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1402 ;; Move operand1 to the high architectural bits of the register, keeping
1403 ;; the low architectural bits of operand2.
1404 ;; For little-endian this is { operand2, operand1 }
1405 ;; For big-endian this is { operand1, operand2 }
1407 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1408 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1412 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1413 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1414 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1416 ins\\t%0.d[1], %1.d[0]
1418 [(set_attr "type" "neon_ins")]
1421 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1422 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1424 (match_operand:<VHALF> 1 "register_operand" "w,r")
1427 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1428 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1430 ins\\t%0.d[1], %1.d[0]
1432 [(set_attr "type" "neon_ins")]
1435 (define_expand "move_hi_quad_<mode>"
1436 [(match_operand:VQ 0 "register_operand" "")
1437 (match_operand:<VHALF> 1 "register_operand" "")]
1440 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1441 if (BYTES_BIG_ENDIAN)
1442 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1445 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1450 ;; Narrowing operations.
1453 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1454 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1455 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1457 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1458 [(set_attr "type" "neon_shift_imm_narrow_q")]
1461 (define_expand "vec_pack_trunc_<mode>"
1462 [(match_operand:<VNARROWD> 0 "register_operand" "")
1463 (match_operand:VDN 1 "register_operand" "")
1464 (match_operand:VDN 2 "register_operand" "")]
1467 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1468 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1469 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1471 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1472 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1473 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1479 (define_insn "vec_pack_trunc_<mode>"
1480 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1481 (vec_concat:<VNARROWQ2>
1482 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1483 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1486 if (BYTES_BIG_ENDIAN)
1487 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1489 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1491 [(set_attr "type" "multiple")
1492 (set_attr "length" "8")]
1495 ;; Widening operations.
1497 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1498 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1499 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1500 (match_operand:VQW 1 "register_operand" "w")
1501 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1504 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1505 [(set_attr "type" "neon_shift_imm_long")]
1508 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1509 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1510 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1511 (match_operand:VQW 1 "register_operand" "w")
1512 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1515 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1516 [(set_attr "type" "neon_shift_imm_long")]
1519 (define_expand "vec_unpack<su>_hi_<mode>"
1520 [(match_operand:<VWIDE> 0 "register_operand" "")
1521 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1524 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1525 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1531 (define_expand "vec_unpack<su>_lo_<mode>"
1532 [(match_operand:<VWIDE> 0 "register_operand" "")
1533 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1536 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1537 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1543 ;; Widening arithmetic.
1545 (define_insn "*aarch64_<su>mlal_lo<mode>"
1546 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1549 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1550 (match_operand:VQW 2 "register_operand" "w")
1551 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1552 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1553 (match_operand:VQW 4 "register_operand" "w")
1555 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1557 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1558 [(set_attr "type" "neon_mla_<Vetype>_long")]
1561 (define_insn "*aarch64_<su>mlal_hi<mode>"
1562 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1565 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1566 (match_operand:VQW 2 "register_operand" "w")
1567 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1568 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1569 (match_operand:VQW 4 "register_operand" "w")
1571 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1573 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1574 [(set_attr "type" "neon_mla_<Vetype>_long")]
1577 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1578 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1580 (match_operand:<VWIDE> 1 "register_operand" "0")
1582 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1583 (match_operand:VQW 2 "register_operand" "w")
1584 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1585 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1586 (match_operand:VQW 4 "register_operand" "w")
1589 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1590 [(set_attr "type" "neon_mla_<Vetype>_long")]
1593 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1594 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1596 (match_operand:<VWIDE> 1 "register_operand" "0")
1598 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599 (match_operand:VQW 2 "register_operand" "w")
1600 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1601 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1602 (match_operand:VQW 4 "register_operand" "w")
1605 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1606 [(set_attr "type" "neon_mla_<Vetype>_long")]
1609 (define_insn "*aarch64_<su>mlal<mode>"
1610 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1614 (match_operand:VD_BHSI 1 "register_operand" "w"))
1616 (match_operand:VD_BHSI 2 "register_operand" "w")))
1617 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1619 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1620 [(set_attr "type" "neon_mla_<Vetype>_long")]
1623 (define_insn "*aarch64_<su>mlsl<mode>"
1624 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1626 (match_operand:<VWIDE> 1 "register_operand" "0")
1629 (match_operand:VD_BHSI 2 "register_operand" "w"))
1631 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1633 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1634 [(set_attr "type" "neon_mla_<Vetype>_long")]
1637 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1638 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1639 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1640 (match_operand:VQW 1 "register_operand" "w")
1641 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1642 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1643 (match_operand:VQW 2 "register_operand" "w")
1646 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1647 [(set_attr "type" "neon_mul_<Vetype>_long")]
1650 (define_expand "vec_widen_<su>mult_lo_<mode>"
1651 [(match_operand:<VWIDE> 0 "register_operand" "")
1652 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1653 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1656 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1657 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1664 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1665 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1666 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1667 (match_operand:VQW 1 "register_operand" "w")
1668 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1669 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1670 (match_operand:VQW 2 "register_operand" "w")
1673 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1674 [(set_attr "type" "neon_mul_<Vetype>_long")]
1677 (define_expand "vec_widen_<su>mult_hi_<mode>"
1678 [(match_operand:<VWIDE> 0 "register_operand" "")
1679 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1680 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1683 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1684 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1692 ;; FP vector operations.
1693 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1694 ;; double-precision (64-bit) floating-point data types and arithmetic as
1695 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1696 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1698 ;; Floating-point operations can raise an exception. Vectorizing such
1699 ;; operations are safe because of reasons explained below.
1701 ;; ARMv8 permits an extension to enable trapped floating-point
1702 ;; exception handling, however this is an optional feature. In the
1703 ;; event of a floating-point exception being raised by vectorised
1705 ;; 1. If trapped floating-point exceptions are available, then a trap
1706 ;; will be taken when any lane raises an enabled exception. A trap
1707 ;; handler may determine which lane raised the exception.
1708 ;; 2. Alternatively a sticky exception flag is set in the
1709 ;; floating-point status register (FPSR). Software may explicitly
1710 ;; test the exception flags, in which case the tests will either
1711 ;; prevent vectorisation, allowing precise identification of the
1712 ;; failing operation, or if tested outside of vectorisable regions
1713 ;; then the specific operation and lane are not of interest.
1715 ;; FP arithmetic operations.
1717 (define_insn "add<mode>3"
1718 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1719 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1720 (match_operand:VHSDF 2 "register_operand" "w")))]
1722 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1723 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1726 (define_insn "sub<mode>3"
1727 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1728 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1729 (match_operand:VHSDF 2 "register_operand" "w")))]
1731 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1732 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1735 (define_insn "mul<mode>3"
1736 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1737 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1738 (match_operand:VHSDF 2 "register_operand" "w")))]
1740 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1741 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1744 (define_expand "div<mode>3"
1745 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1746 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1747 (match_operand:VHSDF 2 "register_operand" "w")))]
1750 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1753 operands[1] = force_reg (<MODE>mode, operands[1]);
1756 (define_insn "*div<mode>3"
1757 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1758 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1759 (match_operand:VHSDF 2 "register_operand" "w")))]
1761 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1762 [(set_attr "type" "neon_fp_div_<stype><q>")]
1765 (define_insn "neg<mode>2"
1766 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1767 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1769 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1770 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1773 (define_insn "abs<mode>2"
1774 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1775 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1777 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1778 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1781 (define_insn "fma<mode>4"
1782 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1783 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1784 (match_operand:VHSDF 2 "register_operand" "w")
1785 (match_operand:VHSDF 3 "register_operand" "0")))]
1787 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1788 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1791 (define_insn "*aarch64_fma4_elt<mode>"
1792 [(set (match_operand:VDQF 0 "register_operand" "=w")
1796 (match_operand:VDQF 1 "register_operand" "<h_con>")
1797 (parallel [(match_operand:SI 2 "immediate_operand")])))
1798 (match_operand:VDQF 3 "register_operand" "w")
1799 (match_operand:VDQF 4 "register_operand" "0")))]
1802 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1803 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1805 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1808 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1809 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1811 (vec_duplicate:VDQSF
1813 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1814 (parallel [(match_operand:SI 2 "immediate_operand")])))
1815 (match_operand:VDQSF 3 "register_operand" "w")
1816 (match_operand:VDQSF 4 "register_operand" "0")))]
1819 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1820 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1822 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1825 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1826 [(set (match_operand:VMUL 0 "register_operand" "=w")
1829 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1830 (match_operand:VMUL 2 "register_operand" "w")
1831 (match_operand:VMUL 3 "register_operand" "0")))]
1833 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1834 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1837 (define_insn "*aarch64_fma4_elt_to_64v2df"
1838 [(set (match_operand:DF 0 "register_operand" "=w")
1841 (match_operand:V2DF 1 "register_operand" "w")
1842 (parallel [(match_operand:SI 2 "immediate_operand")]))
1843 (match_operand:DF 3 "register_operand" "w")
1844 (match_operand:DF 4 "register_operand" "0")))]
1847 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1848 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1850 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1853 (define_insn "fnma<mode>4"
1854 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1856 (match_operand:VHSDF 1 "register_operand" "w")
1858 (match_operand:VHSDF 2 "register_operand" "w"))
1859 (match_operand:VHSDF 3 "register_operand" "0")))]
1861 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1862 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1865 (define_insn "*aarch64_fnma4_elt<mode>"
1866 [(set (match_operand:VDQF 0 "register_operand" "=w")
1869 (match_operand:VDQF 3 "register_operand" "w"))
1872 (match_operand:VDQF 1 "register_operand" "<h_con>")
1873 (parallel [(match_operand:SI 2 "immediate_operand")])))
1874 (match_operand:VDQF 4 "register_operand" "0")))]
1877 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1878 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1880 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1883 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1884 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1887 (match_operand:VDQSF 3 "register_operand" "w"))
1888 (vec_duplicate:VDQSF
1890 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1891 (parallel [(match_operand:SI 2 "immediate_operand")])))
1892 (match_operand:VDQSF 4 "register_operand" "0")))]
1895 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1896 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1898 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1901 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1902 [(set (match_operand:VMUL 0 "register_operand" "=w")
1905 (match_operand:VMUL 2 "register_operand" "w"))
1907 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1908 (match_operand:VMUL 3 "register_operand" "0")))]
1910 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1911 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1914 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1915 [(set (match_operand:DF 0 "register_operand" "=w")
1918 (match_operand:V2DF 1 "register_operand" "w")
1919 (parallel [(match_operand:SI 2 "immediate_operand")]))
1921 (match_operand:DF 3 "register_operand" "w"))
1922 (match_operand:DF 4 "register_operand" "0")))]
1925 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1926 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1928 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1931 ;; Vector versions of the floating-point frint patterns.
1932 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1933 (define_insn "<frint_pattern><mode>2"
1934 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1935 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1938 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1939 [(set_attr "type" "neon_fp_round_<stype><q>")]
1942 ;; Vector versions of the fcvt standard patterns.
1943 ;; Expands to lbtrunc, lround, lceil, lfloor
1944 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1945 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1946 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1947 [(match_operand:VHSDF 1 "register_operand" "w")]
1950 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1951 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1954 ;; HF Scalar variants of related SIMD instructions.
1955 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1956 [(set (match_operand:HI 0 "register_operand" "=w")
1957 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1959 "TARGET_SIMD_F16INST"
1960 "fcvt<frint_suffix><su>\t%h0, %h1"
1961 [(set_attr "type" "neon_fp_to_int_s")]
1964 (define_insn "<optab>_trunchfhi2"
1965 [(set (match_operand:HI 0 "register_operand" "=w")
1966 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1967 "TARGET_SIMD_F16INST"
1968 "fcvtz<su>\t%h0, %h1"
1969 [(set_attr "type" "neon_fp_to_int_s")]
1972 (define_insn "<optab>hihf2"
1973 [(set (match_operand:HF 0 "register_operand" "=w")
1974 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1975 "TARGET_SIMD_F16INST"
1976 "<su_optab>cvtf\t%h0, %h1"
1977 [(set_attr "type" "neon_int_to_fp_s")]
1980 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1981 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1982 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1984 (match_operand:VDQF 1 "register_operand" "w")
1985 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1988 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1989 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1991 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1993 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1994 output_asm_insn (buf, operands);
1997 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2000 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2001 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2002 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2003 [(match_operand:VHSDF 1 "register_operand")]
2008 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2009 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2010 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2011 [(match_operand:VHSDF 1 "register_operand")]
2016 (define_expand "ftrunc<VHSDF:mode>2"
2017 [(set (match_operand:VHSDF 0 "register_operand")
2018 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2023 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2024 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2026 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2028 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2029 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2032 ;; Conversions between vectors of floats and doubles.
2033 ;; Contains a mix of patterns to match standard pattern names
2034 ;; and those for intrinsics.
2036 ;; Float widening operations.
2038 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2039 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2040 (float_extend:<VWIDE> (vec_select:<VHALF>
2041 (match_operand:VQ_HSF 1 "register_operand" "w")
2042 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2045 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2046 [(set_attr "type" "neon_fp_cvt_widen_s")]
2049 ;; Convert between fixed-point and floating-point (vector modes)
2051 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2052 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2053 (unspec:<VHSDF:FCVT_TARGET>
2054 [(match_operand:VHSDF 1 "register_operand" "w")
2055 (match_operand:SI 2 "immediate_operand" "i")]
2058 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2059 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2062 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2063 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2064 (unspec:<VDQ_HSDI:FCVT_TARGET>
2065 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2066 (match_operand:SI 2 "immediate_operand" "i")]
2069 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2070 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2073 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2074 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2075 ;; the meaning of HI and LO changes depending on the target endianness.
2076 ;; While elsewhere we map the higher numbered elements of a vector to
2077 ;; the lower architectural lanes of the vector, for these patterns we want
2078 ;; to always treat "hi" as referring to the higher architectural lanes.
2079 ;; Consequently, while the patterns below look inconsistent with our
2080 ;; other big-endian patterns their behavior is as required.
2082 (define_expand "vec_unpacks_lo_<mode>"
2083 [(match_operand:<VWIDE> 0 "register_operand" "")
2084 (match_operand:VQ_HSF 1 "register_operand" "")]
2087 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2088 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2094 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2095 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2096 (float_extend:<VWIDE> (vec_select:<VHALF>
2097 (match_operand:VQ_HSF 1 "register_operand" "w")
2098 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2101 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2102 [(set_attr "type" "neon_fp_cvt_widen_s")]
2105 (define_expand "vec_unpacks_hi_<mode>"
2106 [(match_operand:<VWIDE> 0 "register_operand" "")
2107 (match_operand:VQ_HSF 1 "register_operand" "")]
2110 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2111 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2116 (define_insn "aarch64_float_extend_lo_<Vwide>"
2117 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2118 (float_extend:<VWIDE>
2119 (match_operand:VDF 1 "register_operand" "w")))]
2121 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2122 [(set_attr "type" "neon_fp_cvt_widen_s")]
2125 ;; Float narrowing operations.
2127 (define_insn "aarch64_float_truncate_lo_<mode>"
2128 [(set (match_operand:VDF 0 "register_operand" "=w")
2130 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2132 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2133 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2136 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2137 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2139 (match_operand:VDF 1 "register_operand" "0")
2141 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2142 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2143 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2144 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2147 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2148 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2151 (match_operand:<VWIDE> 2 "register_operand" "w"))
2152 (match_operand:VDF 1 "register_operand" "0")))]
2153 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2154 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2155 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2158 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2159 [(match_operand:<VDBL> 0 "register_operand" "=w")
2160 (match_operand:VDF 1 "register_operand" "0")
2161 (match_operand:<VWIDE> 2 "register_operand" "w")]
2164 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2165 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2166 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2167 emit_insn (gen (operands[0], operands[1], operands[2]));
2172 (define_expand "vec_pack_trunc_v2df"
2173 [(set (match_operand:V4SF 0 "register_operand")
2175 (float_truncate:V2SF
2176 (match_operand:V2DF 1 "register_operand"))
2177 (float_truncate:V2SF
2178 (match_operand:V2DF 2 "register_operand"))
2182 rtx tmp = gen_reg_rtx (V2SFmode);
2183 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2184 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2186 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2187 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2188 tmp, operands[hi]));
2193 (define_expand "vec_pack_trunc_df"
2194 [(set (match_operand:V2SF 0 "register_operand")
2197 (match_operand:DF 1 "register_operand"))
2199 (match_operand:DF 2 "register_operand"))
2203 rtx tmp = gen_reg_rtx (V2SFmode);
2204 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2205 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2207 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2208 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2209 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2215 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2217 ;; a = (b < c) ? b : c;
2218 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2219 ;; either explicitly or indirectly via -ffast-math.
2221 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2222 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2223 ;; operand will be returned when both operands are zero (i.e. they may not
2224 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2225 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2228 (define_insn "<su><maxmin><mode>3"
2229 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2230 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2231 (match_operand:VHSDF 2 "register_operand" "w")))]
2233 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2234 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2237 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2238 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2239 ;; which implement the IEEE fmax ()/fmin () functions.
2240 (define_insn "<maxmin_uns><mode>3"
2241 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2242 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2243 (match_operand:VHSDF 2 "register_operand" "w")]
2246 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2247 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2250 ;; 'across lanes' add.
2252 (define_expand "reduc_plus_scal_<mode>"
2253 [(match_operand:<VEL> 0 "register_operand" "=w")
2254 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2258 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2259 rtx scratch = gen_reg_rtx (<MODE>mode);
2260 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2261 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2266 (define_insn "aarch64_faddp<mode>"
2267 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2268 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2269 (match_operand:VHSDF 2 "register_operand" "w")]
2272 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2273 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2276 (define_insn "aarch64_reduc_plus_internal<mode>"
2277 [(set (match_operand:VDQV 0 "register_operand" "=w")
2278 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2281 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2282 [(set_attr "type" "neon_reduc_add<q>")]
2285 (define_insn "aarch64_reduc_plus_internalv2si"
2286 [(set (match_operand:V2SI 0 "register_operand" "=w")
2287 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2290 "addp\\t%0.2s, %1.2s, %1.2s"
2291 [(set_attr "type" "neon_reduc_add")]
2294 (define_insn "reduc_plus_scal_<mode>"
2295 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2296 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2299 "faddp\\t%<Vetype>0, %1.<Vtype>"
2300 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2303 (define_expand "reduc_plus_scal_v4sf"
2304 [(set (match_operand:SF 0 "register_operand")
2305 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2309 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2310 rtx scratch = gen_reg_rtx (V4SFmode);
2311 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2312 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2313 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2317 (define_insn "clrsb<mode>2"
2318 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2319 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2321 "cls\\t%0.<Vtype>, %1.<Vtype>"
2322 [(set_attr "type" "neon_cls<q>")]
2325 (define_insn "clz<mode>2"
2326 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2327 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2329 "clz\\t%0.<Vtype>, %1.<Vtype>"
2330 [(set_attr "type" "neon_cls<q>")]
2333 (define_insn "popcount<mode>2"
2334 [(set (match_operand:VB 0 "register_operand" "=w")
2335 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2337 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2338 [(set_attr "type" "neon_cnt<q>")]
2341 ;; 'across lanes' max and min ops.
2343 ;; Template for outputting a scalar, so we can create __builtins which can be
2344 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2345 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2346 [(match_operand:<VEL> 0 "register_operand")
2347 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2351 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2352 rtx scratch = gen_reg_rtx (<MODE>mode);
2353 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2355 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2360 ;; Likewise for integer cases, signed and unsigned.
2361 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2362 [(match_operand:<VEL> 0 "register_operand")
2363 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2367 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2368 rtx scratch = gen_reg_rtx (<MODE>mode);
2369 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2371 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2376 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2377 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2378 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2381 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2382 [(set_attr "type" "neon_reduc_minmax<q>")]
2385 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2386 [(set (match_operand:V2SI 0 "register_operand" "=w")
2387 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2390 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2391 [(set_attr "type" "neon_reduc_minmax")]
2394 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2395 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2396 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2399 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2400 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2403 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2405 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2408 ;; Thus our BSL is of the form:
2409 ;; op0 = bsl (mask, op2, op3)
2410 ;; We can use any of:
2413 ;; bsl mask, op1, op2
2414 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2415 ;; bit op0, op2, mask
2416 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2417 ;; bif op0, op1, mask
2419 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2420 ;; Some forms of straight-line code may generate the equivalent form
2421 ;; in *aarch64_simd_bsl<mode>_alt.
2423 (define_insn "aarch64_simd_bsl<mode>_internal"
2424 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2428 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2429 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2430 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2431 (match_dup:<V_INT_EQUIV> 3)
2435 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2436 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2437 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2438 [(set_attr "type" "neon_bsl<q>")]
2441 ;; We need this form in addition to the above pattern to match the case
2442 ;; when combine tries merging three insns such that the second operand of
2443 ;; the outer XOR matches the second operand of the inner XOR rather than
2444 ;; the first. The two are equivalent but since recog doesn't try all
2445 ;; permutations of commutative operations, we have to have a separate pattern.
2447 (define_insn "*aarch64_simd_bsl<mode>_alt"
2448 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2452 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2453 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2454 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2455 (match_dup:VSDQ_I_DI 2)))]
2458 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2459 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2460 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2461 [(set_attr "type" "neon_bsl<q>")]
2464 (define_expand "aarch64_simd_bsl<mode>"
2465 [(match_operand:VALLDIF 0 "register_operand")
2466 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2467 (match_operand:VALLDIF 2 "register_operand")
2468 (match_operand:VALLDIF 3 "register_operand")]
2471 /* We can't alias operands together if they have different modes. */
2472 rtx tmp = operands[0];
2473 if (FLOAT_MODE_P (<MODE>mode))
2475 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2476 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2477 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2479 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2480 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2484 if (tmp != operands[0])
2485 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2490 (define_expand "vcond_mask_<mode><v_int_equiv>"
2491 [(match_operand:VALLDI 0 "register_operand")
2492 (match_operand:VALLDI 1 "nonmemory_operand")
2493 (match_operand:VALLDI 2 "nonmemory_operand")
2494 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2497 /* If we have (a = (P) ? -1 : 0);
2498 Then we can simply move the generated mask (result must be int). */
2499 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2500 && operands[2] == CONST0_RTX (<MODE>mode))
2501 emit_move_insn (operands[0], operands[3]);
2502 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2503 else if (operands[1] == CONST0_RTX (<MODE>mode)
2504 && operands[2] == CONSTM1_RTX (<MODE>mode))
2505 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2508 if (!REG_P (operands[1]))
2509 operands[1] = force_reg (<MODE>mode, operands[1]);
2510 if (!REG_P (operands[2]))
2511 operands[2] = force_reg (<MODE>mode, operands[2]);
2512 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2513 operands[1], operands[2]));
2519 ;; Patterns comparing two vectors to produce a mask.
2521 (define_expand "vec_cmp<mode><mode>"
2522 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2523 (match_operator 1 "comparison_operator"
2524 [(match_operand:VSDQ_I_DI 2 "register_operand")
2525 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2528 rtx mask = operands[0];
2529 enum rtx_code code = GET_CODE (operands[1]);
2539 if (operands[3] == CONST0_RTX (<MODE>mode))
2544 if (!REG_P (operands[3]))
2545 operands[3] = force_reg (<MODE>mode, operands[3]);
2553 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2557 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2561 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2565 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2569 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2573 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2577 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2581 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2585 /* Handle NE as !EQ. */
2586 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2587 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2591 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2601 (define_expand "vec_cmp<mode><v_int_equiv>"
2602 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2603 (match_operator 1 "comparison_operator"
2604 [(match_operand:VDQF 2 "register_operand")
2605 (match_operand:VDQF 3 "nonmemory_operand")]))]
2608 int use_zero_form = 0;
2609 enum rtx_code code = GET_CODE (operands[1]);
2610 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2612 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2621 if (operands[3] == CONST0_RTX (<MODE>mode))
2628 if (!REG_P (operands[3]))
2629 operands[3] = force_reg (<MODE>mode, operands[3]);
2639 comparison = gen_aarch64_cmlt<mode>;
2644 std::swap (operands[2], operands[3]);
2648 comparison = gen_aarch64_cmgt<mode>;
2653 comparison = gen_aarch64_cmle<mode>;
2658 std::swap (operands[2], operands[3]);
2662 comparison = gen_aarch64_cmge<mode>;
2666 comparison = gen_aarch64_cmeq<mode>;
2683 /* FCM returns false for lanes which are unordered, so if we use
2684 the inverse of the comparison we actually want to emit, then
2685 invert the result, we will end up with the correct result.
2686 Note that a NE NaN and NaN NE b are true for all a, b.
2688 Our transformations are:
2689 a UNGE b -> !(b GT a)
2690 a UNGT b -> !(b GE a)
2691 a UNLE b -> !(a GT b)
2692 a UNLT b -> !(a GE b)
2693 a NE b -> !(a EQ b) */
2694 gcc_assert (comparison != NULL);
2695 emit_insn (comparison (operands[0], operands[2], operands[3]));
2696 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2704 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2705 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2711 gcc_assert (comparison != NULL);
2712 emit_insn (comparison (operands[0], operands[2], operands[3]));
2716 /* We first check (a > b || b > a) which is !UNEQ, inverting
2717 this result will then give us (a == b || a UNORDERED b). */
2718 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2719 operands[2], operands[3]));
2720 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2721 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2722 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2726 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2727 UNORDERED as !ORDERED. */
2728 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2729 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2730 operands[3], operands[2]));
2731 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2732 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2736 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2737 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2738 operands[3], operands[2]));
2739 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2749 (define_expand "vec_cmpu<mode><mode>"
2750 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2751 (match_operator 1 "comparison_operator"
2752 [(match_operand:VSDQ_I_DI 2 "register_operand")
2753 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2756 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2757 operands[2], operands[3]));
2761 (define_expand "vcond<mode><mode>"
2762 [(set (match_operand:VALLDI 0 "register_operand")
2763 (if_then_else:VALLDI
2764 (match_operator 3 "comparison_operator"
2765 [(match_operand:VALLDI 4 "register_operand")
2766 (match_operand:VALLDI 5 "nonmemory_operand")])
2767 (match_operand:VALLDI 1 "nonmemory_operand")
2768 (match_operand:VALLDI 2 "nonmemory_operand")))]
2771 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2772 enum rtx_code code = GET_CODE (operands[3]);
2774 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2775 it as well as switch operands 1/2 in order to avoid the additional
2779 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2780 operands[4], operands[5]);
2781 std::swap (operands[1], operands[2]);
2783 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2784 operands[4], operands[5]));
2785 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2786 operands[2], mask));
2791 (define_expand "vcond<v_cmp_mixed><mode>"
2792 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2793 (if_then_else:<V_cmp_mixed>
2794 (match_operator 3 "comparison_operator"
2795 [(match_operand:VDQF_COND 4 "register_operand")
2796 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2797 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2798 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2801 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2802 enum rtx_code code = GET_CODE (operands[3]);
2804 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2805 it as well as switch operands 1/2 in order to avoid the additional
2809 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2810 operands[4], operands[5]);
2811 std::swap (operands[1], operands[2]);
2813 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2814 operands[4], operands[5]));
2815 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2816 operands[0], operands[1],
2817 operands[2], mask));
2822 (define_expand "vcondu<mode><mode>"
2823 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2824 (if_then_else:VSDQ_I_DI
2825 (match_operator 3 "comparison_operator"
2826 [(match_operand:VSDQ_I_DI 4 "register_operand")
2827 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2828 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2829 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2832 rtx mask = gen_reg_rtx (<MODE>mode);
2833 enum rtx_code code = GET_CODE (operands[3]);
2835 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2836 it as well as switch operands 1/2 in order to avoid the additional
2840 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2841 operands[4], operands[5]);
2842 std::swap (operands[1], operands[2]);
2844 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2845 operands[4], operands[5]));
2846 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2847 operands[2], mask));
2851 (define_expand "vcondu<mode><v_cmp_mixed>"
2852 [(set (match_operand:VDQF 0 "register_operand")
2854 (match_operator 3 "comparison_operator"
2855 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2856 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2857 (match_operand:VDQF 1 "nonmemory_operand")
2858 (match_operand:VDQF 2 "nonmemory_operand")))]
2861 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2862 enum rtx_code code = GET_CODE (operands[3]);
2864 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2865 it as well as switch operands 1/2 in order to avoid the additional
2869 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2870 operands[4], operands[5]);
2871 std::swap (operands[1], operands[2]);
2873 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2875 operands[4], operands[5]));
2876 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2877 operands[2], mask));
2881 ;; Patterns for AArch64 SIMD Intrinsics.
2883 ;; Lane extraction with sign extension to general purpose register.
2884 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2885 [(set (match_operand:GPI 0 "register_operand" "=r")
2888 (match_operand:VDQQH 1 "register_operand" "w")
2889 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2892 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2893 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2895 [(set_attr "type" "neon_to_gp<q>")]
2898 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2899 [(set (match_operand:SI 0 "register_operand" "=r")
2902 (match_operand:VDQQH 1 "register_operand" "w")
2903 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2906 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2907 return "umov\\t%w0, %1.<Vetype>[%2]";
2909 [(set_attr "type" "neon_to_gp<q>")]
2912 ;; Lane extraction of a value, neither sign nor zero extension
2913 ;; is guaranteed so upper bits should be considered undefined.
2914 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2915 (define_insn "aarch64_get_lane<mode>"
2916 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2918 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2919 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2922 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2923 switch (which_alternative)
2926 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2928 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2930 return "st1\\t{%1.<Vetype>}[%2], %0";
2935 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2938 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2941 (define_insn "*aarch64_combinez<mode>"
2942 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2944 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2945 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2946 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2951 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2952 (set_attr "simd" "yes,*,yes")
2953 (set_attr "fp" "*,yes,*")]
2956 (define_insn "*aarch64_combinez_be<mode>"
2957 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2959 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2960 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2961 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2966 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2967 (set_attr "simd" "yes,*,yes")
2968 (set_attr "fp" "*,yes,*")]
2971 (define_expand "aarch64_combine<mode>"
2972 [(match_operand:<VDBL> 0 "register_operand")
2973 (match_operand:VDC 1 "register_operand")
2974 (match_operand:VDC 2 "register_operand")]
2977 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2983 (define_expand "aarch64_simd_combine<mode>"
2984 [(match_operand:<VDBL> 0 "register_operand")
2985 (match_operand:VDC 1 "register_operand")
2986 (match_operand:VDC 2 "register_operand")]
2989 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2990 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2993 [(set_attr "type" "multiple")]
2996 ;; <su><addsub>l<q>.
2998 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2999 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3000 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3001 (match_operand:VQW 1 "register_operand" "w")
3002 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3003 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3004 (match_operand:VQW 2 "register_operand" "w")
3007 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3008 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3011 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3012 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3013 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3014 (match_operand:VQW 1 "register_operand" "w")
3015 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3016 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3017 (match_operand:VQW 2 "register_operand" "w")
3020 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3021 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3025 (define_expand "aarch64_saddl2<mode>"
3026 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3027 (match_operand:VQW 1 "register_operand" "w")
3028 (match_operand:VQW 2 "register_operand" "w")]
3031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3032 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3037 (define_expand "aarch64_uaddl2<mode>"
3038 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3039 (match_operand:VQW 1 "register_operand" "w")
3040 (match_operand:VQW 2 "register_operand" "w")]
3043 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3044 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3049 (define_expand "aarch64_ssubl2<mode>"
3050 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3051 (match_operand:VQW 1 "register_operand" "w")
3052 (match_operand:VQW 2 "register_operand" "w")]
3055 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3056 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3061 (define_expand "aarch64_usubl2<mode>"
3062 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3063 (match_operand:VQW 1 "register_operand" "w")
3064 (match_operand:VQW 2 "register_operand" "w")]
3067 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3068 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3073 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3074 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3075 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3076 (match_operand:VD_BHSI 1 "register_operand" "w"))
3078 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3080 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3081 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3084 ;; <su><addsub>w<q>.
3086 (define_expand "widen_ssum<mode>3"
3087 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3088 (plus:<VDBLW> (sign_extend:<VDBLW>
3089 (match_operand:VQW 1 "register_operand" ""))
3090 (match_operand:<VDBLW> 2 "register_operand" "")))]
3093 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3094 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3096 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3098 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3103 (define_expand "widen_ssum<mode>3"
3104 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3105 (plus:<VWIDE> (sign_extend:<VWIDE>
3106 (match_operand:VD_BHSI 1 "register_operand" ""))
3107 (match_operand:<VWIDE> 2 "register_operand" "")))]
3110 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3114 (define_expand "widen_usum<mode>3"
3115 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3116 (plus:<VDBLW> (zero_extend:<VDBLW>
3117 (match_operand:VQW 1 "register_operand" ""))
3118 (match_operand:<VDBLW> 2 "register_operand" "")))]
3121 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3122 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3124 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3126 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3131 (define_expand "widen_usum<mode>3"
3132 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3133 (plus:<VWIDE> (zero_extend:<VWIDE>
3134 (match_operand:VD_BHSI 1 "register_operand" ""))
3135 (match_operand:<VWIDE> 2 "register_operand" "")))]
3138 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3142 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3143 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3144 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3146 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3148 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3149 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3152 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3153 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3154 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3157 (match_operand:VQW 2 "register_operand" "w")
3158 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3160 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3161 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3164 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3165 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3166 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3169 (match_operand:VQW 2 "register_operand" "w")
3170 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3172 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3173 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3176 (define_expand "aarch64_saddw2<mode>"
3177 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3178 (match_operand:<VWIDE> 1 "register_operand" "w")
3179 (match_operand:VQW 2 "register_operand" "w")]
3182 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3183 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3188 (define_expand "aarch64_uaddw2<mode>"
3189 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3190 (match_operand:<VWIDE> 1 "register_operand" "w")
3191 (match_operand:VQW 2 "register_operand" "w")]
3194 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3195 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3201 (define_expand "aarch64_ssubw2<mode>"
3202 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3203 (match_operand:<VWIDE> 1 "register_operand" "w")
3204 (match_operand:VQW 2 "register_operand" "w")]
3207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3208 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3213 (define_expand "aarch64_usubw2<mode>"
3214 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3215 (match_operand:<VWIDE> 1 "register_operand" "w")
3216 (match_operand:VQW 2 "register_operand" "w")]
3219 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3220 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3225 ;; <su><r>h<addsub>.
3227 (define_insn "aarch64_<sur>h<addsub><mode>"
3228 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3229 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3230 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3233 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3234 [(set_attr "type" "neon_<addsub>_halve<q>")]
3237 ;; <r><addsub>hn<q>.
3239 (define_insn "aarch64_<sur><addsub>hn<mode>"
3240 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3241 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3242 (match_operand:VQN 2 "register_operand" "w")]
3245 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3246 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3249 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3250 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3251 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3252 (match_operand:VQN 2 "register_operand" "w")
3253 (match_operand:VQN 3 "register_operand" "w")]
3256 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3257 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3262 (define_insn "aarch64_pmul<mode>"
3263 [(set (match_operand:VB 0 "register_operand" "=w")
3264 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3265 (match_operand:VB 2 "register_operand" "w")]
3268 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3269 [(set_attr "type" "neon_mul_<Vetype><q>")]
3274 (define_insn "aarch64_fmulx<mode>"
3275 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3277 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3278 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3281 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3282 [(set_attr "type" "neon_fp_mul_<stype>")]
3285 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3287 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3288 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3290 [(match_operand:VDQSF 1 "register_operand" "w")
3291 (vec_duplicate:VDQSF
3293 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3294 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3298 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3299 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3301 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3304 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3306 (define_insn "*aarch64_mulx_elt<mode>"
3307 [(set (match_operand:VDQF 0 "register_operand" "=w")
3309 [(match_operand:VDQF 1 "register_operand" "w")
3312 (match_operand:VDQF 2 "register_operand" "w")
3313 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3317 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3318 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3320 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3325 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3326 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3328 [(match_operand:VHSDF 1 "register_operand" "w")
3329 (vec_duplicate:VHSDF
3330 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3333 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3334 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3337 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3338 ;; vmulxd_lane_f64 == vmulx_lane_f64
3339 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3341 (define_insn "*aarch64_vgetfmulx<mode>"
3342 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3344 [(match_operand:<VEL> 1 "register_operand" "w")
3346 (match_operand:VDQF 2 "register_operand" "w")
3347 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3351 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3352 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3354 [(set_attr "type" "fmul<Vetype>")]
3358 (define_insn "aarch64_<su_optab><optab><mode>"
3359 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3360 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3361 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3363 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3364 [(set_attr "type" "neon_<optab><q>")]
3367 ;; suqadd and usqadd
3369 (define_insn "aarch64_<sur>qadd<mode>"
3370 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3371 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3372 (match_operand:VSDQ_I 2 "register_operand" "w")]
3375 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3376 [(set_attr "type" "neon_qadd<q>")]
3381 (define_insn "aarch64_sqmovun<mode>"
3382 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3383 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3386 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3387 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3390 ;; sqmovn and uqmovn
3392 (define_insn "aarch64_<sur>qmovn<mode>"
3393 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3394 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3397 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3398 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3403 (define_insn "aarch64_s<optab><mode>"
3404 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3406 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3408 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3409 [(set_attr "type" "neon_<optab><q>")]
3414 (define_insn "aarch64_sq<r>dmulh<mode>"
3415 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3417 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3418 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3421 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3422 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3427 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3428 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3430 [(match_operand:VDQHS 1 "register_operand" "w")
3432 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3433 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3437 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3438 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3439 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3442 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3443 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3445 [(match_operand:VDQHS 1 "register_operand" "w")
3447 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3448 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3452 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3453 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3454 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3457 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3458 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3460 [(match_operand:SD_HSI 1 "register_operand" "w")
3462 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3463 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3467 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3468 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3469 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3472 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3473 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3475 [(match_operand:SD_HSI 1 "register_operand" "w")
3477 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3478 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3482 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3483 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3484 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3489 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3490 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3492 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3493 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3494 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3497 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3498 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3501 ;; sqrdml[as]h_lane.
3503 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3504 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3506 [(match_operand:VDQHS 1 "register_operand" "0")
3507 (match_operand:VDQHS 2 "register_operand" "w")
3509 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3510 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3514 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3516 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3518 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3521 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3522 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3524 [(match_operand:SD_HSI 1 "register_operand" "0")
3525 (match_operand:SD_HSI 2 "register_operand" "w")
3527 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3528 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3532 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3534 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3536 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3539 ;; sqrdml[as]h_laneq.
3541 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3542 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3544 [(match_operand:VDQHS 1 "register_operand" "0")
3545 (match_operand:VDQHS 2 "register_operand" "w")
3547 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3548 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3552 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3554 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3556 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3559 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3560 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3562 [(match_operand:SD_HSI 1 "register_operand" "0")
3563 (match_operand:SD_HSI 2 "register_operand" "w")
3565 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3566 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3570 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3572 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3574 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3579 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3580 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3582 (match_operand:<VWIDE> 1 "register_operand" "0")
3585 (sign_extend:<VWIDE>
3586 (match_operand:VSD_HSI 2 "register_operand" "w"))
3587 (sign_extend:<VWIDE>
3588 (match_operand:VSD_HSI 3 "register_operand" "w")))
3591 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3592 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3597 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3598 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3600 (match_operand:<VWIDE> 1 "register_operand" "0")
3603 (sign_extend:<VWIDE>
3604 (match_operand:VD_HSI 2 "register_operand" "w"))
3605 (sign_extend:<VWIDE>
3606 (vec_duplicate:VD_HSI
3608 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3609 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3614 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3616 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3618 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3621 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3622 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3624 (match_operand:<VWIDE> 1 "register_operand" "0")
3627 (sign_extend:<VWIDE>
3628 (match_operand:VD_HSI 2 "register_operand" "w"))
3629 (sign_extend:<VWIDE>
3630 (vec_duplicate:VD_HSI
3632 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3633 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3638 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3640 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3642 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3645 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3646 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3648 (match_operand:<VWIDE> 1 "register_operand" "0")
3651 (sign_extend:<VWIDE>
3652 (match_operand:SD_HSI 2 "register_operand" "w"))
3653 (sign_extend:<VWIDE>
3655 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3656 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3661 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3663 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3665 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3668 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3669 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3671 (match_operand:<VWIDE> 1 "register_operand" "0")
3674 (sign_extend:<VWIDE>
3675 (match_operand:SD_HSI 2 "register_operand" "w"))
3676 (sign_extend:<VWIDE>
3678 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3679 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3684 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3686 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3688 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3693 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3694 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3696 (match_operand:<VWIDE> 1 "register_operand" "0")
3699 (sign_extend:<VWIDE>
3700 (match_operand:VD_HSI 2 "register_operand" "w"))
3701 (sign_extend:<VWIDE>
3702 (vec_duplicate:VD_HSI
3703 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3706 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3707 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3712 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3713 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3715 (match_operand:<VWIDE> 1 "register_operand" "0")
3718 (sign_extend:<VWIDE>
3720 (match_operand:VQ_HSI 2 "register_operand" "w")
3721 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3722 (sign_extend:<VWIDE>
3724 (match_operand:VQ_HSI 3 "register_operand" "w")
3728 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3729 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3732 (define_expand "aarch64_sqdmlal2<mode>"
3733 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3734 (match_operand:<VWIDE> 1 "register_operand" "w")
3735 (match_operand:VQ_HSI 2 "register_operand" "w")
3736 (match_operand:VQ_HSI 3 "register_operand" "w")]
3739 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3740 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3741 operands[2], operands[3], p));
3745 (define_expand "aarch64_sqdmlsl2<mode>"
3746 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3747 (match_operand:<VWIDE> 1 "register_operand" "w")
3748 (match_operand:VQ_HSI 2 "register_operand" "w")
3749 (match_operand:VQ_HSI 3 "register_operand" "w")]
3752 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3753 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3754 operands[2], operands[3], p));
3760 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3761 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3763 (match_operand:<VWIDE> 1 "register_operand" "0")
3766 (sign_extend:<VWIDE>
3768 (match_operand:VQ_HSI 2 "register_operand" "w")
3769 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3770 (sign_extend:<VWIDE>
3771 (vec_duplicate:<VHALF>
3773 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3774 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3779 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3781 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3783 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3786 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3787 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3789 (match_operand:<VWIDE> 1 "register_operand" "0")
3792 (sign_extend:<VWIDE>
3794 (match_operand:VQ_HSI 2 "register_operand" "w")
3795 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3796 (sign_extend:<VWIDE>
3797 (vec_duplicate:<VHALF>
3799 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3800 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3805 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3807 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3809 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3812 (define_expand "aarch64_sqdmlal2_lane<mode>"
3813 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3814 (match_operand:<VWIDE> 1 "register_operand" "w")
3815 (match_operand:VQ_HSI 2 "register_operand" "w")
3816 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3817 (match_operand:SI 4 "immediate_operand" "i")]
3820 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3821 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3822 operands[2], operands[3],
3827 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3828 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3829 (match_operand:<VWIDE> 1 "register_operand" "w")
3830 (match_operand:VQ_HSI 2 "register_operand" "w")
3831 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3832 (match_operand:SI 4 "immediate_operand" "i")]
3835 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3836 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3837 operands[2], operands[3],
3842 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3843 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3844 (match_operand:<VWIDE> 1 "register_operand" "w")
3845 (match_operand:VQ_HSI 2 "register_operand" "w")
3846 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3847 (match_operand:SI 4 "immediate_operand" "i")]
3850 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3851 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3852 operands[2], operands[3],
3857 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3858 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3859 (match_operand:<VWIDE> 1 "register_operand" "w")
3860 (match_operand:VQ_HSI 2 "register_operand" "w")
3861 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3862 (match_operand:SI 4 "immediate_operand" "i")]
3865 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3866 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3867 operands[2], operands[3],
3872 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3873 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3875 (match_operand:<VWIDE> 1 "register_operand" "0")
3878 (sign_extend:<VWIDE>
3880 (match_operand:VQ_HSI 2 "register_operand" "w")
3881 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3882 (sign_extend:<VWIDE>
3883 (vec_duplicate:<VHALF>
3884 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3887 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3888 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3891 (define_expand "aarch64_sqdmlal2_n<mode>"
3892 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3893 (match_operand:<VWIDE> 1 "register_operand" "w")
3894 (match_operand:VQ_HSI 2 "register_operand" "w")
3895 (match_operand:<VEL> 3 "register_operand" "w")]
3898 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3899 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3900 operands[2], operands[3],
3905 (define_expand "aarch64_sqdmlsl2_n<mode>"
3906 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3907 (match_operand:<VWIDE> 1 "register_operand" "w")
3908 (match_operand:VQ_HSI 2 "register_operand" "w")
3909 (match_operand:<VEL> 3 "register_operand" "w")]
3912 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3913 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3914 operands[2], operands[3],
3921 (define_insn "aarch64_sqdmull<mode>"
3922 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3925 (sign_extend:<VWIDE>
3926 (match_operand:VSD_HSI 1 "register_operand" "w"))
3927 (sign_extend:<VWIDE>
3928 (match_operand:VSD_HSI 2 "register_operand" "w")))
3931 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3932 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3937 (define_insn "aarch64_sqdmull_lane<mode>"
3938 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3941 (sign_extend:<VWIDE>
3942 (match_operand:VD_HSI 1 "register_operand" "w"))
3943 (sign_extend:<VWIDE>
3944 (vec_duplicate:VD_HSI
3946 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3947 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3952 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3953 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3955 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3958 (define_insn "aarch64_sqdmull_laneq<mode>"
3959 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3962 (sign_extend:<VWIDE>
3963 (match_operand:VD_HSI 1 "register_operand" "w"))
3964 (sign_extend:<VWIDE>
3965 (vec_duplicate:VD_HSI
3967 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3968 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3973 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3974 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3976 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3979 (define_insn "aarch64_sqdmull_lane<mode>"
3980 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3983 (sign_extend:<VWIDE>
3984 (match_operand:SD_HSI 1 "register_operand" "w"))
3985 (sign_extend:<VWIDE>
3987 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3988 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3993 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3994 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3996 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3999 (define_insn "aarch64_sqdmull_laneq<mode>"
4000 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4003 (sign_extend:<VWIDE>
4004 (match_operand:SD_HSI 1 "register_operand" "w"))
4005 (sign_extend:<VWIDE>
4007 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4008 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4013 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4014 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4016 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4021 (define_insn "aarch64_sqdmull_n<mode>"
4022 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4025 (sign_extend:<VWIDE>
4026 (match_operand:VD_HSI 1 "register_operand" "w"))
4027 (sign_extend:<VWIDE>
4028 (vec_duplicate:VD_HSI
4029 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4033 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4034 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4041 (define_insn "aarch64_sqdmull2<mode>_internal"
4042 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4045 (sign_extend:<VWIDE>
4047 (match_operand:VQ_HSI 1 "register_operand" "w")
4048 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4049 (sign_extend:<VWIDE>
4051 (match_operand:VQ_HSI 2 "register_operand" "w")
4056 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4057 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4060 (define_expand "aarch64_sqdmull2<mode>"
4061 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4062 (match_operand:VQ_HSI 1 "register_operand" "w")
4063 (match_operand:VQ_HSI 2 "register_operand" "w")]
4066 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4067 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4074 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4075 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4078 (sign_extend:<VWIDE>
4080 (match_operand:VQ_HSI 1 "register_operand" "w")
4081 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4082 (sign_extend:<VWIDE>
4083 (vec_duplicate:<VHALF>
4085 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4086 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4091 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4092 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4094 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4097 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4098 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4101 (sign_extend:<VWIDE>
4103 (match_operand:VQ_HSI 1 "register_operand" "w")
4104 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4105 (sign_extend:<VWIDE>
4106 (vec_duplicate:<VHALF>
4108 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4109 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4114 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4115 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4117 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4120 (define_expand "aarch64_sqdmull2_lane<mode>"
4121 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4122 (match_operand:VQ_HSI 1 "register_operand" "w")
4123 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4124 (match_operand:SI 3 "immediate_operand" "i")]
4127 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4128 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4129 operands[2], operands[3],
4134 (define_expand "aarch64_sqdmull2_laneq<mode>"
4135 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4136 (match_operand:VQ_HSI 1 "register_operand" "w")
4137 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4138 (match_operand:SI 3 "immediate_operand" "i")]
4141 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4142 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4143 operands[2], operands[3],
4150 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4151 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4154 (sign_extend:<VWIDE>
4156 (match_operand:VQ_HSI 1 "register_operand" "w")
4157 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4158 (sign_extend:<VWIDE>
4159 (vec_duplicate:<VHALF>
4160 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4164 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4165 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4168 (define_expand "aarch64_sqdmull2_n<mode>"
4169 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4170 (match_operand:VQ_HSI 1 "register_operand" "w")
4171 (match_operand:<VEL> 2 "register_operand" "w")]
4174 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4175 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4182 (define_insn "aarch64_<sur>shl<mode>"
4183 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4185 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4186 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4189 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4190 [(set_attr "type" "neon_shift_reg<q>")]
4196 (define_insn "aarch64_<sur>q<r>shl<mode>"
4197 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4199 [(match_operand:VSDQ_I 1 "register_operand" "w")
4200 (match_operand:VSDQ_I 2 "register_operand" "w")]
4203 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4204 [(set_attr "type" "neon_sat_shift_reg<q>")]
4209 (define_insn "aarch64_<sur>shll_n<mode>"
4210 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4211 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4213 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4217 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4218 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4220 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4222 [(set_attr "type" "neon_shift_imm_long")]
4227 (define_insn "aarch64_<sur>shll2_n<mode>"
4228 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4229 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4230 (match_operand:SI 2 "immediate_operand" "i")]
4234 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4235 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4237 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4239 [(set_attr "type" "neon_shift_imm_long")]
4244 (define_insn "aarch64_<sur>shr_n<mode>"
4245 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4246 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4248 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4251 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4252 [(set_attr "type" "neon_sat_shift_imm<q>")]
4257 (define_insn "aarch64_<sur>sra_n<mode>"
4258 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4259 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4260 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4262 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4265 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4266 [(set_attr "type" "neon_shift_acc<q>")]
4271 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4272 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4273 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4274 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4276 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4279 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4280 [(set_attr "type" "neon_shift_imm<q>")]
4285 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4286 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4287 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4289 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4292 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4293 [(set_attr "type" "neon_sat_shift_imm<q>")]
4299 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4300 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4301 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4303 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4306 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4307 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4311 ;; cm(eq|ge|gt|lt|le)
4312 ;; Note, we have constraints for Dz and Z as different expanders
4313 ;; have different ideas of what should be passed to this pattern.
4315 (define_insn "aarch64_cm<optab><mode>"
4316 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4318 (COMPARISONS:<V_INT_EQUIV>
4319 (match_operand:VDQ_I 1 "register_operand" "w,w")
4320 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4324 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4325 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4326 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4329 (define_insn_and_split "aarch64_cm<optab>di"
4330 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4333 (match_operand:DI 1 "register_operand" "w,w,r")
4334 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4336 (clobber (reg:CC CC_REGNUM))]
4340 [(set (match_operand:DI 0 "register_operand")
4343 (match_operand:DI 1 "register_operand")
4344 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4347 /* If we are in the general purpose register file,
4348 we split to a sequence of comparison and store. */
4349 if (GP_REGNUM_P (REGNO (operands[0]))
4350 && GP_REGNUM_P (REGNO (operands[1])))
4352 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4353 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4354 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4355 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4358 /* Otherwise, we expand to a similar pattern which does not
4359 clobber CC_REGNUM. */
4361 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4364 (define_insn "*aarch64_cm<optab>di"
4365 [(set (match_operand:DI 0 "register_operand" "=w,w")
4368 (match_operand:DI 1 "register_operand" "w,w")
4369 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4371 "TARGET_SIMD && reload_completed"
4373 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4374 cm<optab>\t%d0, %d1, #0"
4375 [(set_attr "type" "neon_compare, neon_compare_zero")]
4380 (define_insn "aarch64_cm<optab><mode>"
4381 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4383 (UCOMPARISONS:<V_INT_EQUIV>
4384 (match_operand:VDQ_I 1 "register_operand" "w")
4385 (match_operand:VDQ_I 2 "register_operand" "w")
4388 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4389 [(set_attr "type" "neon_compare<q>")]
4392 (define_insn_and_split "aarch64_cm<optab>di"
4393 [(set (match_operand:DI 0 "register_operand" "=w,r")
4396 (match_operand:DI 1 "register_operand" "w,r")
4397 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4399 (clobber (reg:CC CC_REGNUM))]
4403 [(set (match_operand:DI 0 "register_operand")
4406 (match_operand:DI 1 "register_operand")
4407 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4410 /* If we are in the general purpose register file,
4411 we split to a sequence of comparison and store. */
4412 if (GP_REGNUM_P (REGNO (operands[0]))
4413 && GP_REGNUM_P (REGNO (operands[1])))
4415 machine_mode mode = CCmode;
4416 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4417 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4418 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4421 /* Otherwise, we expand to a similar pattern which does not
4422 clobber CC_REGNUM. */
4424 [(set_attr "type" "neon_compare,multiple")]
4427 (define_insn "*aarch64_cm<optab>di"
4428 [(set (match_operand:DI 0 "register_operand" "=w")
4431 (match_operand:DI 1 "register_operand" "w")
4432 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4434 "TARGET_SIMD && reload_completed"
4435 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4436 [(set_attr "type" "neon_compare")]
4441 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4442 ;; we don't have any insns using ne, and aarch64_vcond outputs
4443 ;; not (neg (eq (and x y) 0))
4444 ;; which is rewritten by simplify_rtx as
4445 ;; plus (eq (and x y) 0) -1.
4447 (define_insn "aarch64_cmtst<mode>"
4448 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4452 (match_operand:VDQ_I 1 "register_operand" "w")
4453 (match_operand:VDQ_I 2 "register_operand" "w"))
4454 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4455 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4458 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4459 [(set_attr "type" "neon_tst<q>")]
4462 (define_insn_and_split "aarch64_cmtstdi"
4463 [(set (match_operand:DI 0 "register_operand" "=w,r")
4467 (match_operand:DI 1 "register_operand" "w,r")
4468 (match_operand:DI 2 "register_operand" "w,r"))
4470 (clobber (reg:CC CC_REGNUM))]
4474 [(set (match_operand:DI 0 "register_operand")
4478 (match_operand:DI 1 "register_operand")
4479 (match_operand:DI 2 "register_operand"))
4482 /* If we are in the general purpose register file,
4483 we split to a sequence of comparison and store. */
4484 if (GP_REGNUM_P (REGNO (operands[0]))
4485 && GP_REGNUM_P (REGNO (operands[1])))
4487 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4488 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4489 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4490 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4491 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4494 /* Otherwise, we expand to a similar pattern which does not
4495 clobber CC_REGNUM. */
4497 [(set_attr "type" "neon_tst,multiple")]
4500 (define_insn "*aarch64_cmtstdi"
4501 [(set (match_operand:DI 0 "register_operand" "=w")
4505 (match_operand:DI 1 "register_operand" "w")
4506 (match_operand:DI 2 "register_operand" "w"))
4509 "cmtst\t%d0, %d1, %d2"
4510 [(set_attr "type" "neon_tst")]
4513 ;; fcm(eq|ge|gt|le|lt)
4515 (define_insn "aarch64_cm<optab><mode>"
4516 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4518 (COMPARISONS:<V_INT_EQUIV>
4519 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4520 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4524 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4525 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4526 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4530 ;; Note we can also handle what would be fac(le|lt) by
4531 ;; generating fac(ge|gt).
4533 (define_insn "aarch64_fac<optab><mode>"
4534 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4536 (FAC_COMPARISONS:<V_INT_EQUIV>
4538 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4540 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4543 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4544 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4549 (define_insn "aarch64_addp<mode>"
4550 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4552 [(match_operand:VD_BHSI 1 "register_operand" "w")
4553 (match_operand:VD_BHSI 2 "register_operand" "w")]
4556 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4557 [(set_attr "type" "neon_reduc_add<q>")]
4560 (define_insn "aarch64_addpdi"
4561 [(set (match_operand:DI 0 "register_operand" "=w")
4563 [(match_operand:V2DI 1 "register_operand" "w")]
4567 [(set_attr "type" "neon_reduc_add")]
4572 (define_expand "sqrt<mode>2"
4573 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4574 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4577 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4581 (define_insn "*sqrt<mode>2"
4582 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4583 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4585 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4586 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4589 ;; Patterns for vector struct loads and stores.
4591 (define_insn "aarch64_simd_ld2<mode>"
4592 [(set (match_operand:OI 0 "register_operand" "=w")
4593 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4594 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4597 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4598 [(set_attr "type" "neon_load2_2reg<q>")]
4601 (define_insn "aarch64_simd_ld2r<mode>"
4602 [(set (match_operand:OI 0 "register_operand" "=w")
4603 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4604 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4607 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4608 [(set_attr "type" "neon_load2_all_lanes<q>")]
4611 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4612 [(set (match_operand:OI 0 "register_operand" "=w")
4613 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4614 (match_operand:OI 2 "register_operand" "0")
4615 (match_operand:SI 3 "immediate_operand" "i")
4616 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4620 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4621 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4623 [(set_attr "type" "neon_load2_one_lane")]
4626 (define_expand "vec_load_lanesoi<mode>"
4627 [(set (match_operand:OI 0 "register_operand" "=w")
4628 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4629 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4633 if (BYTES_BIG_ENDIAN)
4635 rtx tmp = gen_reg_rtx (OImode);
4636 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4637 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4638 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4641 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4645 (define_insn "aarch64_simd_st2<mode>"
4646 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4647 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4648 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4651 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4652 [(set_attr "type" "neon_store2_2reg<q>")]
4655 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4656 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4657 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4658 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4659 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4660 (match_operand:SI 2 "immediate_operand" "i")]
4664 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4665 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4667 [(set_attr "type" "neon_store2_one_lane<q>")]
4670 (define_expand "vec_store_lanesoi<mode>"
4671 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4672 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4673 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4677 if (BYTES_BIG_ENDIAN)
4679 rtx tmp = gen_reg_rtx (OImode);
4680 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4681 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4682 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4685 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4689 (define_insn "aarch64_simd_ld3<mode>"
4690 [(set (match_operand:CI 0 "register_operand" "=w")
4691 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4692 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4695 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4696 [(set_attr "type" "neon_load3_3reg<q>")]
4699 (define_insn "aarch64_simd_ld3r<mode>"
4700 [(set (match_operand:CI 0 "register_operand" "=w")
4701 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4702 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4705 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4706 [(set_attr "type" "neon_load3_all_lanes<q>")]
4709 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4710 [(set (match_operand:CI 0 "register_operand" "=w")
4711 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4712 (match_operand:CI 2 "register_operand" "0")
4713 (match_operand:SI 3 "immediate_operand" "i")
4714 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4718 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4719 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4721 [(set_attr "type" "neon_load3_one_lane")]
4724 (define_expand "vec_load_lanesci<mode>"
4725 [(set (match_operand:CI 0 "register_operand" "=w")
4726 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4727 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4731 if (BYTES_BIG_ENDIAN)
4733 rtx tmp = gen_reg_rtx (CImode);
4734 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4735 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4736 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4739 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4743 (define_insn "aarch64_simd_st3<mode>"
4744 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4745 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4746 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4749 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4750 [(set_attr "type" "neon_store3_3reg<q>")]
4753 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4754 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4755 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4756 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4757 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4758 (match_operand:SI 2 "immediate_operand" "i")]
4762 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4763 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4765 [(set_attr "type" "neon_store3_one_lane<q>")]
4768 (define_expand "vec_store_lanesci<mode>"
4769 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4770 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4771 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4775 if (BYTES_BIG_ENDIAN)
4777 rtx tmp = gen_reg_rtx (CImode);
4778 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4779 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4780 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4783 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4787 (define_insn "aarch64_simd_ld4<mode>"
4788 [(set (match_operand:XI 0 "register_operand" "=w")
4789 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4790 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4793 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4794 [(set_attr "type" "neon_load4_4reg<q>")]
4797 (define_insn "aarch64_simd_ld4r<mode>"
4798 [(set (match_operand:XI 0 "register_operand" "=w")
4799 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4800 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4803 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4804 [(set_attr "type" "neon_load4_all_lanes<q>")]
4807 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4808 [(set (match_operand:XI 0 "register_operand" "=w")
4809 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4810 (match_operand:XI 2 "register_operand" "0")
4811 (match_operand:SI 3 "immediate_operand" "i")
4812 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4816 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4817 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4819 [(set_attr "type" "neon_load4_one_lane")]
4822 (define_expand "vec_load_lanesxi<mode>"
4823 [(set (match_operand:XI 0 "register_operand" "=w")
4824 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4825 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4829 if (BYTES_BIG_ENDIAN)
4831 rtx tmp = gen_reg_rtx (XImode);
4832 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4833 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4834 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4837 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4841 (define_insn "aarch64_simd_st4<mode>"
4842 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4843 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4844 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4847 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4848 [(set_attr "type" "neon_store4_4reg<q>")]
4851 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4852 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4853 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4854 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4855 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4856 (match_operand:SI 2 "immediate_operand" "i")]
4860 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4861 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4863 [(set_attr "type" "neon_store4_one_lane<q>")]
4866 (define_expand "vec_store_lanesxi<mode>"
4867 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4868 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4869 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4873 if (BYTES_BIG_ENDIAN)
4875 rtx tmp = gen_reg_rtx (XImode);
4876 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4877 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4878 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4881 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4885 (define_insn_and_split "aarch64_rev_reglist<mode>"
4886 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4888 [(match_operand:VSTRUCT 1 "register_operand" "w")
4889 (match_operand:V16QI 2 "register_operand" "w")]
4890 UNSPEC_REV_REGLIST))]
4893 "&& reload_completed"
4897 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4898 for (i = 0; i < nregs; i++)
4900 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4901 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4902 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4906 [(set_attr "type" "neon_tbl1_q")
4907 (set_attr "length" "<insn_count>")]
4910 ;; Reload patterns for AdvSIMD register list operands.
4912 (define_expand "mov<mode>"
4913 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4914 (match_operand:VSTRUCT 1 "general_operand" ""))]
4917 if (can_create_pseudo_p ())
4919 if (GET_CODE (operands[0]) != REG)
4920 operands[1] = force_reg (<MODE>mode, operands[1]);
4924 (define_insn "*aarch64_mov<mode>"
4925 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4926 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4927 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4928 && (register_operand (operands[0], <MODE>mode)
4929 || register_operand (operands[1], <MODE>mode))"
4932 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4933 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4934 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4935 neon_load<nregs>_<nregs>reg_q")
4936 (set_attr "length" "<insn_count>,4,4")]
4939 (define_insn "aarch64_be_ld1<mode>"
4940 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4941 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4942 "aarch64_simd_struct_operand" "Utv")]
4945 "ld1\\t{%0<Vmtype>}, %1"
4946 [(set_attr "type" "neon_load1_1reg<q>")]
4949 (define_insn "aarch64_be_st1<mode>"
4950 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4951 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4954 "st1\\t{%1<Vmtype>}, %0"
4955 [(set_attr "type" "neon_store1_1reg<q>")]
4958 (define_insn "*aarch64_be_movoi"
4959 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4960 (match_operand:OI 1 "general_operand" " w,w,m"))]
4961 "TARGET_SIMD && BYTES_BIG_ENDIAN
4962 && (register_operand (operands[0], OImode)
4963 || register_operand (operands[1], OImode))"
4968 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4969 (set_attr "length" "8,4,4")]
4972 (define_insn "*aarch64_be_movci"
4973 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4974 (match_operand:CI 1 "general_operand" " w,w,o"))]
4975 "TARGET_SIMD && BYTES_BIG_ENDIAN
4976 && (register_operand (operands[0], CImode)
4977 || register_operand (operands[1], CImode))"
4979 [(set_attr "type" "multiple")
4980 (set_attr "length" "12,4,4")]
4983 (define_insn "*aarch64_be_movxi"
4984 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4985 (match_operand:XI 1 "general_operand" " w,w,o"))]
4986 "TARGET_SIMD && BYTES_BIG_ENDIAN
4987 && (register_operand (operands[0], XImode)
4988 || register_operand (operands[1], XImode))"
4990 [(set_attr "type" "multiple")
4991 (set_attr "length" "16,4,4")]
4995 [(set (match_operand:OI 0 "register_operand")
4996 (match_operand:OI 1 "register_operand"))]
4997 "TARGET_SIMD && reload_completed"
5000 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5005 [(set (match_operand:CI 0 "nonimmediate_operand")
5006 (match_operand:CI 1 "general_operand"))]
5007 "TARGET_SIMD && reload_completed"
5010 if (register_operand (operands[0], CImode)
5011 && register_operand (operands[1], CImode))
5013 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5016 else if (BYTES_BIG_ENDIAN)
5018 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5019 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5020 emit_move_insn (gen_lowpart (V16QImode,
5021 simplify_gen_subreg (TImode, operands[0],
5023 gen_lowpart (V16QImode,
5024 simplify_gen_subreg (TImode, operands[1],
5033 [(set (match_operand:XI 0 "nonimmediate_operand")
5034 (match_operand:XI 1 "general_operand"))]
5035 "TARGET_SIMD && reload_completed"
5038 if (register_operand (operands[0], XImode)
5039 && register_operand (operands[1], XImode))
5041 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5044 else if (BYTES_BIG_ENDIAN)
5046 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5047 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5048 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5049 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5056 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5057 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5058 (match_operand:DI 1 "register_operand" "w")
5059 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5062 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5063 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5066 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5071 (define_insn "aarch64_ld2<mode>_dreg"
5072 [(set (match_operand:OI 0 "register_operand" "=w")
5073 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5074 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5077 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5078 [(set_attr "type" "neon_load2_2reg<q>")]
5081 (define_insn "aarch64_ld2<mode>_dreg"
5082 [(set (match_operand:OI 0 "register_operand" "=w")
5083 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5084 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5087 "ld1\\t{%S0.1d - %T0.1d}, %1"
5088 [(set_attr "type" "neon_load1_2reg<q>")]
5091 (define_insn "aarch64_ld3<mode>_dreg"
5092 [(set (match_operand:CI 0 "register_operand" "=w")
5093 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5094 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5097 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5098 [(set_attr "type" "neon_load3_3reg<q>")]
5101 (define_insn "aarch64_ld3<mode>_dreg"
5102 [(set (match_operand:CI 0 "register_operand" "=w")
5103 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5104 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5107 "ld1\\t{%S0.1d - %U0.1d}, %1"
5108 [(set_attr "type" "neon_load1_3reg<q>")]
5111 (define_insn "aarch64_ld4<mode>_dreg"
5112 [(set (match_operand:XI 0 "register_operand" "=w")
5113 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5114 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5117 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5118 [(set_attr "type" "neon_load4_4reg<q>")]
5121 (define_insn "aarch64_ld4<mode>_dreg"
5122 [(set (match_operand:XI 0 "register_operand" "=w")
5123 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5124 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5127 "ld1\\t{%S0.1d - %V0.1d}, %1"
5128 [(set_attr "type" "neon_load1_4reg<q>")]
5131 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5132 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5133 (match_operand:DI 1 "register_operand" "r")
5134 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5137 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5138 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5140 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5144 (define_expand "aarch64_ld1<VALL_F16:mode>"
5145 [(match_operand:VALL_F16 0 "register_operand")
5146 (match_operand:DI 1 "register_operand")]
5149 machine_mode mode = <VALL_F16:MODE>mode;
5150 rtx mem = gen_rtx_MEM (mode, operands[1]);
5152 if (BYTES_BIG_ENDIAN)
5153 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5155 emit_move_insn (operands[0], mem);
5159 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5160 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5161 (match_operand:DI 1 "register_operand" "r")
5162 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5165 machine_mode mode = <VSTRUCT:MODE>mode;
5166 rtx mem = gen_rtx_MEM (mode, operands[1]);
5168 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5172 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5173 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5174 (match_operand:DI 1 "register_operand" "w")
5175 (match_operand:VSTRUCT 2 "register_operand" "0")
5176 (match_operand:SI 3 "immediate_operand" "i")
5177 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5180 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5181 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5184 aarch64_simd_lane_bounds (operands[3], 0,
5185 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5187 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5188 operands[0], mem, operands[2], operands[3]));
5192 ;; Expanders for builtins to extract vector registers from large
5193 ;; opaque integer modes.
5197 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5198 [(match_operand:VDC 0 "register_operand" "=w")
5199 (match_operand:VSTRUCT 1 "register_operand" "w")
5200 (match_operand:SI 2 "immediate_operand" "i")]
5203 int part = INTVAL (operands[2]);
5204 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5205 int offset = part * 16;
5207 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5208 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5214 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5215 [(match_operand:VQ 0 "register_operand" "=w")
5216 (match_operand:VSTRUCT 1 "register_operand" "w")
5217 (match_operand:SI 2 "immediate_operand" "i")]
5220 int part = INTVAL (operands[2]);
5221 int offset = part * 16;
5223 emit_move_insn (operands[0],
5224 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5228 ;; Permuted-store expanders for neon intrinsics.
5230 ;; Permute instructions
5234 (define_expand "vec_perm_const<mode>"
5235 [(match_operand:VALL_F16 0 "register_operand")
5236 (match_operand:VALL_F16 1 "register_operand")
5237 (match_operand:VALL_F16 2 "register_operand")
5238 (match_operand:<V_INT_EQUIV> 3)]
5241 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5242 operands[2], operands[3]))
5248 (define_expand "vec_perm<mode>"
5249 [(match_operand:VB 0 "register_operand")
5250 (match_operand:VB 1 "register_operand")
5251 (match_operand:VB 2 "register_operand")
5252 (match_operand:VB 3 "register_operand")]
5255 aarch64_expand_vec_perm (operands[0], operands[1],
5256 operands[2], operands[3]);
5260 (define_insn "aarch64_tbl1<mode>"
5261 [(set (match_operand:VB 0 "register_operand" "=w")
5262 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5263 (match_operand:VB 2 "register_operand" "w")]
5266 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5267 [(set_attr "type" "neon_tbl1<q>")]
5270 ;; Two source registers.
5272 (define_insn "aarch64_tbl2v16qi"
5273 [(set (match_operand:V16QI 0 "register_operand" "=w")
5274 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5275 (match_operand:V16QI 2 "register_operand" "w")]
5278 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5279 [(set_attr "type" "neon_tbl2_q")]
5282 (define_insn "aarch64_tbl3<mode>"
5283 [(set (match_operand:VB 0 "register_operand" "=w")
5284 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5285 (match_operand:VB 2 "register_operand" "w")]
5288 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5289 [(set_attr "type" "neon_tbl3")]
5292 (define_insn "aarch64_tbx4<mode>"
5293 [(set (match_operand:VB 0 "register_operand" "=w")
5294 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5295 (match_operand:OI 2 "register_operand" "w")
5296 (match_operand:VB 3 "register_operand" "w")]
5299 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5300 [(set_attr "type" "neon_tbl4")]
5303 ;; Three source registers.
5305 (define_insn "aarch64_qtbl3<mode>"
5306 [(set (match_operand:VB 0 "register_operand" "=w")
5307 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5308 (match_operand:VB 2 "register_operand" "w")]
5311 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5312 [(set_attr "type" "neon_tbl3")]
5315 (define_insn "aarch64_qtbx3<mode>"
5316 [(set (match_operand:VB 0 "register_operand" "=w")
5317 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5318 (match_operand:CI 2 "register_operand" "w")
5319 (match_operand:VB 3 "register_operand" "w")]
5322 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5323 [(set_attr "type" "neon_tbl3")]
5326 ;; Four source registers.
5328 (define_insn "aarch64_qtbl4<mode>"
5329 [(set (match_operand:VB 0 "register_operand" "=w")
5330 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5331 (match_operand:VB 2 "register_operand" "w")]
5334 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5335 [(set_attr "type" "neon_tbl4")]
5338 (define_insn "aarch64_qtbx4<mode>"
5339 [(set (match_operand:VB 0 "register_operand" "=w")
5340 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5341 (match_operand:XI 2 "register_operand" "w")
5342 (match_operand:VB 3 "register_operand" "w")]
5345 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5346 [(set_attr "type" "neon_tbl4")]
5349 (define_insn_and_split "aarch64_combinev16qi"
5350 [(set (match_operand:OI 0 "register_operand" "=w")
5351 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5352 (match_operand:V16QI 2 "register_operand" "w")]
5356 "&& reload_completed"
5359 aarch64_split_combinev16qi (operands);
5362 [(set_attr "type" "multiple")]
5365 ;; This instruction's pattern is generated directly by
5366 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5367 ;; need corresponding changes there.
5368 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5369 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5370 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5371 (match_operand:VALL_F16 2 "register_operand" "w")]
5374 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5375 [(set_attr "type" "neon_permute<q>")]
5378 ;; This instruction's pattern is generated directly by
5379 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5380 ;; need corresponding changes there. Note that the immediate (third)
5381 ;; operand is a lane index not a byte index.
5382 (define_insn "aarch64_ext<mode>"
5383 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5384 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5385 (match_operand:VALL_F16 2 "register_operand" "w")
5386 (match_operand:SI 3 "immediate_operand" "i")]
5390 operands[3] = GEN_INT (INTVAL (operands[3])
5391 * GET_MODE_UNIT_SIZE (<MODE>mode));
5392 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5394 [(set_attr "type" "neon_ext<q>")]
5397 ;; This instruction's pattern is generated directly by
5398 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5399 ;; need corresponding changes there.
5400 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5401 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5402 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5405 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5406 [(set_attr "type" "neon_rev<q>")]
5409 (define_insn "aarch64_st2<mode>_dreg"
5410 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5411 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5412 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5415 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5416 [(set_attr "type" "neon_store2_2reg")]
5419 (define_insn "aarch64_st2<mode>_dreg"
5420 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5421 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5422 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5425 "st1\\t{%S1.1d - %T1.1d}, %0"
5426 [(set_attr "type" "neon_store1_2reg")]
5429 (define_insn "aarch64_st3<mode>_dreg"
5430 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5431 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5432 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5435 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5436 [(set_attr "type" "neon_store3_3reg")]
5439 (define_insn "aarch64_st3<mode>_dreg"
5440 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5441 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5442 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5445 "st1\\t{%S1.1d - %U1.1d}, %0"
5446 [(set_attr "type" "neon_store1_3reg")]
5449 (define_insn "aarch64_st4<mode>_dreg"
5450 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5451 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5452 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5455 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5456 [(set_attr "type" "neon_store4_4reg")]
5459 (define_insn "aarch64_st4<mode>_dreg"
5460 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5461 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5462 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5465 "st1\\t{%S1.1d - %V1.1d}, %0"
5466 [(set_attr "type" "neon_store1_4reg")]
5469 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5470 [(match_operand:DI 0 "register_operand" "r")
5471 (match_operand:VSTRUCT 1 "register_operand" "w")
5472 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5475 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5476 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5478 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5482 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5483 [(match_operand:DI 0 "register_operand" "r")
5484 (match_operand:VSTRUCT 1 "register_operand" "w")
5485 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5488 machine_mode mode = <VSTRUCT:MODE>mode;
5489 rtx mem = gen_rtx_MEM (mode, operands[0]);
5491 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5495 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5496 [(match_operand:DI 0 "register_operand" "r")
5497 (match_operand:VSTRUCT 1 "register_operand" "w")
5498 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5499 (match_operand:SI 2 "immediate_operand")]
5502 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5503 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5506 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5507 mem, operands[1], operands[2]));
5511 (define_expand "aarch64_st1<VALL_F16:mode>"
5512 [(match_operand:DI 0 "register_operand")
5513 (match_operand:VALL_F16 1 "register_operand")]
5516 machine_mode mode = <VALL_F16:MODE>mode;
5517 rtx mem = gen_rtx_MEM (mode, operands[0]);
5519 if (BYTES_BIG_ENDIAN)
5520 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5522 emit_move_insn (mem, operands[1]);
5526 ;; Expander for builtins to insert vector registers into large
5527 ;; opaque integer modes.
5529 ;; Q-register list. We don't need a D-reg inserter as we zero
5530 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5532 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5533 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5534 (match_operand:VSTRUCT 1 "register_operand" "0")
5535 (match_operand:VQ 2 "register_operand" "w")
5536 (match_operand:SI 3 "immediate_operand" "i")]
5539 int part = INTVAL (operands[3]);
5540 int offset = part * 16;
5542 emit_move_insn (operands[0], operands[1]);
5543 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5548 ;; Standard pattern name vec_init<mode><Vel>.
5550 (define_expand "vec_init<mode><Vel>"
5551 [(match_operand:VALL_F16 0 "register_operand" "")
5552 (match_operand 1 "" "")]
5555 aarch64_expand_vector_init (operands[0], operands[1]);
5559 (define_insn "*aarch64_simd_ld1r<mode>"
5560 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5561 (vec_duplicate:VALL_F16
5562 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5564 "ld1r\\t{%0.<Vtype>}, %1"
5565 [(set_attr "type" "neon_load1_all_lanes")]
5568 (define_insn "aarch64_frecpe<mode>"
5569 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5570 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5573 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5574 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5577 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5578 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5579 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5582 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5583 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5586 (define_insn "aarch64_frecps<mode>"
5587 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5589 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5590 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5593 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5594 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5597 (define_insn "aarch64_urecpe<mode>"
5598 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5599 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5602 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5603 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5605 ;; Standard pattern name vec_extract<mode><Vel>.
5607 (define_expand "vec_extract<mode><Vel>"
5608 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5609 (match_operand:VALL_F16 1 "register_operand" "")
5610 (match_operand:SI 2 "immediate_operand" "")]
5614 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5620 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5621 [(set (match_operand:V16QI 0 "register_operand" "=w")
5622 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5623 (match_operand:V16QI 2 "register_operand" "w")]
5625 "TARGET_SIMD && TARGET_CRYPTO"
5626 "aes<aes_op>\\t%0.16b, %2.16b"
5627 [(set_attr "type" "crypto_aese")]
5630 ;; When AES/AESMC fusion is enabled we want the register allocation to
5634 ;; So prefer to tie operand 1 to operand 0 when fusing.
5636 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5637 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5638 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5640 "TARGET_SIMD && TARGET_CRYPTO"
5641 "aes<aesmc_op>\\t%0.16b, %1.16b"
5642 [(set_attr "type" "crypto_aesmc")
5643 (set_attr_alternative "enabled"
5644 [(if_then_else (match_test
5645 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5646 (const_string "yes" )
5647 (const_string "no"))
5648 (const_string "yes")])]
5653 (define_insn "aarch64_crypto_sha1hsi"
5654 [(set (match_operand:SI 0 "register_operand" "=w")
5655 (unspec:SI [(match_operand:SI 1
5656 "register_operand" "w")]
5658 "TARGET_SIMD && TARGET_CRYPTO"
5660 [(set_attr "type" "crypto_sha1_fast")]
5663 (define_insn "aarch64_crypto_sha1hv4si"
5664 [(set (match_operand:SI 0 "register_operand" "=w")
5665 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5666 (parallel [(const_int 0)]))]
5668 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5670 [(set_attr "type" "crypto_sha1_fast")]
5673 (define_insn "aarch64_be_crypto_sha1hv4si"
5674 [(set (match_operand:SI 0 "register_operand" "=w")
5675 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5676 (parallel [(const_int 3)]))]
5678 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5680 [(set_attr "type" "crypto_sha1_fast")]
5683 (define_insn "aarch64_crypto_sha1su1v4si"
5684 [(set (match_operand:V4SI 0 "register_operand" "=w")
5685 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5686 (match_operand:V4SI 2 "register_operand" "w")]
5688 "TARGET_SIMD && TARGET_CRYPTO"
5689 "sha1su1\\t%0.4s, %2.4s"
5690 [(set_attr "type" "crypto_sha1_fast")]
5693 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5694 [(set (match_operand:V4SI 0 "register_operand" "=w")
5695 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5696 (match_operand:SI 2 "register_operand" "w")
5697 (match_operand:V4SI 3 "register_operand" "w")]
5699 "TARGET_SIMD && TARGET_CRYPTO"
5700 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5701 [(set_attr "type" "crypto_sha1_slow")]
5704 (define_insn "aarch64_crypto_sha1su0v4si"
5705 [(set (match_operand:V4SI 0 "register_operand" "=w")
5706 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5707 (match_operand:V4SI 2 "register_operand" "w")
5708 (match_operand:V4SI 3 "register_operand" "w")]
5710 "TARGET_SIMD && TARGET_CRYPTO"
5711 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5712 [(set_attr "type" "crypto_sha1_xor")]
5717 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5718 [(set (match_operand:V4SI 0 "register_operand" "=w")
5719 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5720 (match_operand:V4SI 2 "register_operand" "w")
5721 (match_operand:V4SI 3 "register_operand" "w")]
5723 "TARGET_SIMD && TARGET_CRYPTO"
5724 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5725 [(set_attr "type" "crypto_sha256_slow")]
5728 (define_insn "aarch64_crypto_sha256su0v4si"
5729 [(set (match_operand:V4SI 0 "register_operand" "=w")
5730 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5731 (match_operand:V4SI 2 "register_operand" "w")]
5733 "TARGET_SIMD &&TARGET_CRYPTO"
5734 "sha256su0\\t%0.4s, %2.4s"
5735 [(set_attr "type" "crypto_sha256_fast")]
5738 (define_insn "aarch64_crypto_sha256su1v4si"
5739 [(set (match_operand:V4SI 0 "register_operand" "=w")
5740 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5741 (match_operand:V4SI 2 "register_operand" "w")
5742 (match_operand:V4SI 3 "register_operand" "w")]
5744 "TARGET_SIMD &&TARGET_CRYPTO"
5745 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5746 [(set_attr "type" "crypto_sha256_slow")]
5751 (define_insn "aarch64_crypto_pmulldi"
5752 [(set (match_operand:TI 0 "register_operand" "=w")
5753 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5754 (match_operand:DI 2 "register_operand" "w")]
5756 "TARGET_SIMD && TARGET_CRYPTO"
5757 "pmull\\t%0.1q, %1.1d, %2.1d"
5758 [(set_attr "type" "crypto_pmull")]
5761 (define_insn "aarch64_crypto_pmullv2di"
5762 [(set (match_operand:TI 0 "register_operand" "=w")
5763 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5764 (match_operand:V2DI 2 "register_operand" "w")]
5766 "TARGET_SIMD && TARGET_CRYPTO"
5767 "pmull2\\t%0.1q, %1.2d, %2.2d"
5768 [(set_attr "type" "crypto_pmull")]