1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((GET_MODE_SIZE (<MODE>mode) == 16
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || GET_MODE_SIZE (<MODE>mode) == 8)))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1],
125 default: gcc_unreachable ();
128 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
129 neon_logic<q>, neon_to_gp<q>, f_mcr,\
130 mov_reg, neon_move<q>")]
133 (define_insn "*aarch64_simd_mov<VQ:mode>"
134 [(set (match_operand:VQ 0 "nonimmediate_operand"
135 "=w, Umq, m, w, ?r, ?w, ?r, w")
136 (match_operand:VQ 1 "general_operand"
137 "m, Dz, w, w, w, r, r, Dn"))]
139 && (register_operand (operands[0], <MODE>mode)
140 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
142 switch (which_alternative)
145 return "ldr\t%q0, %1";
147 return "stp\txzr, xzr, %0";
149 return "str\t%q1, %0";
151 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
157 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
162 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
163 neon_logic<q>, multiple, multiple,\
164 multiple, neon_move<q>")
165 (set_attr "length" "4,4,4,4,8,8,8,4")]
168 ;; When storing lane zero we can use the normal STR and its more permissive
171 (define_insn "aarch64_store_lane0<mode>"
172 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
173 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
174 (parallel [(match_operand 2 "const_int_operand" "n")])))]
176 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
177 "str\\t%<Vetype>1, %0"
178 [(set_attr "type" "neon_store1_1reg<q>")]
181 (define_insn "load_pair<mode>"
182 [(set (match_operand:VD 0 "register_operand" "=w")
183 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
184 (set (match_operand:VD 2 "register_operand" "=w")
185 (match_operand:VD 3 "memory_operand" "m"))]
187 && rtx_equal_p (XEXP (operands[3], 0),
188 plus_constant (Pmode,
189 XEXP (operands[1], 0),
190 GET_MODE_SIZE (<MODE>mode)))"
192 [(set_attr "type" "neon_ldp")]
195 (define_insn "store_pair<mode>"
196 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
197 (match_operand:VD 1 "register_operand" "w"))
198 (set (match_operand:VD 2 "memory_operand" "=m")
199 (match_operand:VD 3 "register_operand" "w"))]
201 && rtx_equal_p (XEXP (operands[2], 0),
202 plus_constant (Pmode,
203 XEXP (operands[0], 0),
204 GET_MODE_SIZE (<MODE>mode)))"
206 [(set_attr "type" "neon_stp")]
210 [(set (match_operand:VQ 0 "register_operand" "")
211 (match_operand:VQ 1 "register_operand" ""))]
212 "TARGET_SIMD && reload_completed
213 && GP_REGNUM_P (REGNO (operands[0]))
214 && GP_REGNUM_P (REGNO (operands[1]))"
217 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
222 [(set (match_operand:VQ 0 "register_operand" "")
223 (match_operand:VQ 1 "register_operand" ""))]
224 "TARGET_SIMD && reload_completed
225 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
226 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
229 aarch64_split_simd_move (operands[0], operands[1]);
233 (define_expand "aarch64_split_simd_mov<mode>"
234 [(set (match_operand:VQ 0)
235 (match_operand:VQ 1))]
238 rtx dst = operands[0];
239 rtx src = operands[1];
241 if (GP_REGNUM_P (REGNO (src)))
243 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
244 rtx src_high_part = gen_highpart (<VHALF>mode, src);
247 (gen_move_lo_quad_<mode> (dst, src_low_part));
249 (gen_move_hi_quad_<mode> (dst, src_high_part));
254 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
255 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
256 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
257 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
260 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
262 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
268 (define_insn "aarch64_simd_mov_from_<mode>low"
269 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
271 (match_operand:VQ 1 "register_operand" "w")
272 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
273 "TARGET_SIMD && reload_completed"
275 [(set_attr "type" "neon_to_gp<q>")
276 (set_attr "length" "4")
279 (define_insn "aarch64_simd_mov_from_<mode>high"
280 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
282 (match_operand:VQ 1 "register_operand" "w")
283 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
284 "TARGET_SIMD && reload_completed"
286 [(set_attr "type" "neon_to_gp<q>")
287 (set_attr "length" "4")
290 (define_insn "orn<mode>3"
291 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
292 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
293 (match_operand:VDQ_I 2 "register_operand" "w")))]
295 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
296 [(set_attr "type" "neon_logic<q>")]
299 (define_insn "bic<mode>3"
300 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
301 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
302 (match_operand:VDQ_I 2 "register_operand" "w")))]
304 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
305 [(set_attr "type" "neon_logic<q>")]
308 (define_insn "add<mode>3"
309 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
310 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
311 (match_operand:VDQ_I 2 "register_operand" "w")))]
313 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
314 [(set_attr "type" "neon_add<q>")]
317 (define_insn "sub<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
323 [(set_attr "type" "neon_sub<q>")]
326 (define_insn "mul<mode>3"
327 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
328 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
329 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
331 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
332 [(set_attr "type" "neon_mul_<Vetype><q>")]
335 (define_insn "bswap<mode>2"
336 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
337 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
339 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
340 [(set_attr "type" "neon_rev<q>")]
343 (define_insn "aarch64_rbit<mode>"
344 [(set (match_operand:VB 0 "register_operand" "=w")
345 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
348 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
349 [(set_attr "type" "neon_rbit")]
352 (define_expand "ctz<mode>2"
353 [(set (match_operand:VS 0 "register_operand")
354 (ctz:VS (match_operand:VS 1 "register_operand")))]
357 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
358 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
360 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
361 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
366 (define_expand "xorsign<mode>3"
367 [(match_operand:VHSDF 0 "register_operand")
368 (match_operand:VHSDF 1 "register_operand")
369 (match_operand:VHSDF 2 "register_operand")]
373 machine_mode imode = <V_INT_EQUIV>mode;
374 rtx v_bitmask = gen_reg_rtx (imode);
375 rtx op1x = gen_reg_rtx (imode);
376 rtx op2x = gen_reg_rtx (imode);
378 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
379 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
381 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
383 emit_move_insn (v_bitmask,
384 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
385 HOST_WIDE_INT_M1U << bits));
387 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
388 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
389 emit_move_insn (operands[0],
390 lowpart_subreg (<MODE>mode, op1x, imode));
395 ;; These instructions map to the __builtins for the Dot Product operations.
396 (define_insn "aarch64_<sur>dot<vsi2qi>"
397 [(set (match_operand:VS 0 "register_operand" "=w")
398 (plus:VS (match_operand:VS 1 "register_operand" "0")
399 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
400 (match_operand:<VSI2QI> 3 "register_operand" "w")]
403 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
404 [(set_attr "type" "neon_dot")]
407 ;; These expands map to the Dot Product optab the vectorizer checks for.
408 ;; The auto-vectorizer expects a dot product builtin that also does an
409 ;; accumulation into the provided register.
410 ;; Given the following pattern
412 ;; for (i=0; i<len; i++) {
418 ;; This can be auto-vectorized to
419 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
421 ;; given enough iterations. However the vectorizer can keep unrolling the loop
422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
427 (define_expand "<sur>dot_prod<vsi2qi>"
428 [(set (match_operand:VS 0 "register_operand")
429 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
430 (match_operand:<VSI2QI> 2 "register_operand")]
432 (match_operand:VS 3 "register_operand")))]
436 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
438 emit_insn (gen_rtx_SET (operands[0], operands[3]));
442 ;; These instructions map to the __builtins for the Dot Product
443 ;; indexed operations.
444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
445 [(set (match_operand:VS 0 "register_operand" "=w")
446 (plus:VS (match_operand:VS 1 "register_operand" "0")
447 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
448 (match_operand:V8QI 3 "register_operand" "<h_con>")
449 (match_operand:SI 4 "immediate_operand" "i")]
453 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
454 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
456 [(set_attr "type" "neon_dot")]
459 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
460 [(set (match_operand:VS 0 "register_operand" "=w")
461 (plus:VS (match_operand:VS 1 "register_operand" "0")
462 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
463 (match_operand:V16QI 3 "register_operand" "<h_con>")
464 (match_operand:SI 4 "immediate_operand" "i")]
468 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
469 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
471 [(set_attr "type" "neon_dot")]
474 (define_expand "copysign<mode>3"
475 [(match_operand:VHSDF 0 "register_operand")
476 (match_operand:VHSDF 1 "register_operand")
477 (match_operand:VHSDF 2 "register_operand")]
478 "TARGET_FLOAT && TARGET_SIMD"
480 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
481 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
483 emit_move_insn (v_bitmask,
484 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
485 HOST_WIDE_INT_M1U << bits));
486 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
487 operands[2], operands[1]));
492 (define_insn "*aarch64_mul3_elt<mode>"
493 [(set (match_operand:VMUL 0 "register_operand" "=w")
497 (match_operand:VMUL 1 "register_operand" "<h_con>")
498 (parallel [(match_operand:SI 2 "immediate_operand")])))
499 (match_operand:VMUL 3 "register_operand" "w")))]
502 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
503 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
505 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
508 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
509 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
510 (mult:VMUL_CHANGE_NLANES
511 (vec_duplicate:VMUL_CHANGE_NLANES
513 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
514 (parallel [(match_operand:SI 2 "immediate_operand")])))
515 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
518 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
519 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
521 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
524 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
525 [(set (match_operand:VMUL 0 "register_operand" "=w")
528 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
529 (match_operand:VMUL 2 "register_operand" "w")))]
531 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
535 (define_insn "aarch64_rsqrte<mode>"
536 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
537 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
540 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
541 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
543 (define_insn "aarch64_rsqrts<mode>"
544 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
545 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
546 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
549 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
550 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
552 (define_expand "rsqrt<mode>2"
553 [(set (match_operand:VALLF 0 "register_operand" "=w")
554 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
558 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
562 (define_insn "*aarch64_mul3_elt_to_64v2df"
563 [(set (match_operand:DF 0 "register_operand" "=w")
566 (match_operand:V2DF 1 "register_operand" "w")
567 (parallel [(match_operand:SI 2 "immediate_operand")]))
568 (match_operand:DF 3 "register_operand" "w")))]
571 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
572 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
574 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
577 (define_insn "neg<mode>2"
578 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
579 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
581 "neg\t%0.<Vtype>, %1.<Vtype>"
582 [(set_attr "type" "neon_neg<q>")]
585 (define_insn "abs<mode>2"
586 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
587 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
589 "abs\t%0.<Vtype>, %1.<Vtype>"
590 [(set_attr "type" "neon_abs<q>")]
593 ;; The intrinsic version of integer ABS must not be allowed to
594 ;; combine with any operation with an integerated ABS step, such
596 (define_insn "aarch64_abs<mode>"
597 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
599 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
602 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
603 [(set_attr "type" "neon_abs<q>")]
606 (define_insn "abd<mode>_3"
607 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
608 (abs:VDQ_BHSI (minus:VDQ_BHSI
609 (match_operand:VDQ_BHSI 1 "register_operand" "w")
610 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
612 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
613 [(set_attr "type" "neon_abd<q>")]
616 (define_insn "aba<mode>_3"
617 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
618 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
619 (match_operand:VDQ_BHSI 1 "register_operand" "w")
620 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
621 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
623 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
624 [(set_attr "type" "neon_arith_acc<q>")]
627 (define_insn "fabd<mode>3"
628 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
631 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
632 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
634 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
635 [(set_attr "type" "neon_fp_abd_<stype><q>")]
638 ;; For AND (vector, register) and BIC (vector, immediate)
639 (define_insn "and<mode>3"
640 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
641 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
642 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
645 switch (which_alternative)
648 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
650 return aarch64_output_simd_mov_immediate (operands[2],
651 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
656 [(set_attr "type" "neon_logic<q>")]
659 ;; For ORR (vector, register) and ORR (vector, immediate)
660 (define_insn "ior<mode>3"
661 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
662 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
663 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
666 switch (which_alternative)
669 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
671 return aarch64_output_simd_mov_immediate (operands[2],
672 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
677 [(set_attr "type" "neon_logic<q>")]
680 (define_insn "xor<mode>3"
681 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
682 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
683 (match_operand:VDQ_I 2 "register_operand" "w")))]
685 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
686 [(set_attr "type" "neon_logic<q>")]
689 (define_insn "one_cmpl<mode>2"
690 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
691 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
693 "not\t%0.<Vbtype>, %1.<Vbtype>"
694 [(set_attr "type" "neon_logic<q>")]
697 (define_insn "aarch64_simd_vec_set<mode>"
698 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
700 (vec_duplicate:VDQ_BHSI
701 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
702 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
703 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
706 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
707 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
708 switch (which_alternative)
711 return "ins\\t%0.<Vetype>[%p2], %w1";
713 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
715 return "ld1\\t{%0.<Vetype>}[%p2], %1";
720 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
723 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
724 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
726 (vec_duplicate:VALL_F16
728 (match_operand:VALL_F16 3 "register_operand" "w")
730 [(match_operand:SI 4 "immediate_operand" "i")])))
731 (match_operand:VALL_F16 1 "register_operand" "0")
732 (match_operand:SI 2 "immediate_operand" "i")))]
735 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
736 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
737 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
739 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
741 [(set_attr "type" "neon_ins<q>")]
744 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
745 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
746 (vec_merge:VALL_F16_NO_V2Q
747 (vec_duplicate:VALL_F16_NO_V2Q
749 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
751 [(match_operand:SI 4 "immediate_operand" "i")])))
752 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
753 (match_operand:SI 2 "immediate_operand" "i")))]
756 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
757 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
758 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
759 INTVAL (operands[4]));
761 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
763 [(set_attr "type" "neon_ins<q>")]
766 (define_insn "aarch64_simd_lshr<mode>"
767 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
768 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
769 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
771 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
772 [(set_attr "type" "neon_shift_imm<q>")]
775 (define_insn "aarch64_simd_ashr<mode>"
776 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
777 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
778 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
780 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
781 [(set_attr "type" "neon_shift_imm<q>")]
784 (define_insn "aarch64_simd_imm_shl<mode>"
785 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
786 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
787 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
789 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
790 [(set_attr "type" "neon_shift_imm<q>")]
793 (define_insn "aarch64_simd_reg_sshl<mode>"
794 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
795 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
796 (match_operand:VDQ_I 2 "register_operand" "w")))]
798 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
799 [(set_attr "type" "neon_shift_reg<q>")]
802 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
803 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
804 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
805 (match_operand:VDQ_I 2 "register_operand" "w")]
806 UNSPEC_ASHIFT_UNSIGNED))]
808 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
809 [(set_attr "type" "neon_shift_reg<q>")]
812 (define_insn "aarch64_simd_reg_shl<mode>_signed"
813 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
814 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
815 (match_operand:VDQ_I 2 "register_operand" "w")]
816 UNSPEC_ASHIFT_SIGNED))]
818 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
819 [(set_attr "type" "neon_shift_reg<q>")]
822 (define_expand "ashl<mode>3"
823 [(match_operand:VDQ_I 0 "register_operand" "")
824 (match_operand:VDQ_I 1 "register_operand" "")
825 (match_operand:SI 2 "general_operand" "")]
828 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
831 if (CONST_INT_P (operands[2]))
833 shift_amount = INTVAL (operands[2]);
834 if (shift_amount >= 0 && shift_amount < bit_width)
836 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
838 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
845 operands[2] = force_reg (SImode, operands[2]);
848 else if (MEM_P (operands[2]))
850 operands[2] = force_reg (SImode, operands[2]);
853 if (REG_P (operands[2]))
855 rtx tmp = gen_reg_rtx (<MODE>mode);
856 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
857 convert_to_mode (<VEL>mode,
860 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
869 (define_expand "lshr<mode>3"
870 [(match_operand:VDQ_I 0 "register_operand" "")
871 (match_operand:VDQ_I 1 "register_operand" "")
872 (match_operand:SI 2 "general_operand" "")]
875 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
878 if (CONST_INT_P (operands[2]))
880 shift_amount = INTVAL (operands[2]);
881 if (shift_amount > 0 && shift_amount <= bit_width)
883 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
885 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
891 operands[2] = force_reg (SImode, operands[2]);
893 else if (MEM_P (operands[2]))
895 operands[2] = force_reg (SImode, operands[2]);
898 if (REG_P (operands[2]))
900 rtx tmp = gen_reg_rtx (SImode);
901 rtx tmp1 = gen_reg_rtx (<MODE>mode);
902 emit_insn (gen_negsi2 (tmp, operands[2]));
903 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
904 convert_to_mode (<VEL>mode,
906 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
916 (define_expand "ashr<mode>3"
917 [(match_operand:VDQ_I 0 "register_operand" "")
918 (match_operand:VDQ_I 1 "register_operand" "")
919 (match_operand:SI 2 "general_operand" "")]
922 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
925 if (CONST_INT_P (operands[2]))
927 shift_amount = INTVAL (operands[2]);
928 if (shift_amount > 0 && shift_amount <= bit_width)
930 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
932 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
938 operands[2] = force_reg (SImode, operands[2]);
940 else if (MEM_P (operands[2]))
942 operands[2] = force_reg (SImode, operands[2]);
945 if (REG_P (operands[2]))
947 rtx tmp = gen_reg_rtx (SImode);
948 rtx tmp1 = gen_reg_rtx (<MODE>mode);
949 emit_insn (gen_negsi2 (tmp, operands[2]));
950 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
951 convert_to_mode (<VEL>mode,
953 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
963 (define_expand "vashl<mode>3"
964 [(match_operand:VDQ_I 0 "register_operand" "")
965 (match_operand:VDQ_I 1 "register_operand" "")
966 (match_operand:VDQ_I 2 "register_operand" "")]
969 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
974 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
975 ;; Negating individual lanes most certainly offsets the
976 ;; gain from vectorization.
977 (define_expand "vashr<mode>3"
978 [(match_operand:VDQ_BHSI 0 "register_operand" "")
979 (match_operand:VDQ_BHSI 1 "register_operand" "")
980 (match_operand:VDQ_BHSI 2 "register_operand" "")]
983 rtx neg = gen_reg_rtx (<MODE>mode);
984 emit (gen_neg<mode>2 (neg, operands[2]));
985 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
991 (define_expand "aarch64_ashr_simddi"
992 [(match_operand:DI 0 "register_operand" "=w")
993 (match_operand:DI 1 "register_operand" "w")
994 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
997 /* An arithmetic shift right by 64 fills the result with copies of the sign
998 bit, just like asr by 63 - however the standard pattern does not handle
1000 if (INTVAL (operands[2]) == 64)
1001 operands[2] = GEN_INT (63);
1002 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1007 (define_expand "vlshr<mode>3"
1008 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1009 (match_operand:VDQ_BHSI 1 "register_operand" "")
1010 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1013 rtx neg = gen_reg_rtx (<MODE>mode);
1014 emit (gen_neg<mode>2 (neg, operands[2]));
1015 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1020 (define_expand "aarch64_lshr_simddi"
1021 [(match_operand:DI 0 "register_operand" "=w")
1022 (match_operand:DI 1 "register_operand" "w")
1023 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1026 if (INTVAL (operands[2]) == 64)
1027 emit_move_insn (operands[0], const0_rtx);
1029 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1034 (define_expand "vec_set<mode>"
1035 [(match_operand:VDQ_BHSI 0 "register_operand")
1036 (match_operand:<VEL> 1 "register_operand")
1037 (match_operand:SI 2 "immediate_operand")]
1040 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1041 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1042 GEN_INT (elem), operands[0]));
1047 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1048 (define_insn "vec_shr_<mode>"
1049 [(set (match_operand:VD 0 "register_operand" "=w")
1050 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1051 (match_operand:SI 2 "immediate_operand" "i")]
1055 if (BYTES_BIG_ENDIAN)
1056 return "shl %d0, %d1, %2";
1058 return "ushr %d0, %d1, %2";
1060 [(set_attr "type" "neon_shift_imm")]
1063 (define_insn "aarch64_simd_vec_setv2di"
1064 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1067 (match_operand:DI 1 "register_operand" "r,w"))
1068 (match_operand:V2DI 3 "register_operand" "0,0")
1069 (match_operand:SI 2 "immediate_operand" "i,i")))]
1072 int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1073 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1074 switch (which_alternative)
1077 return "ins\\t%0.d[%p2], %1";
1079 return "ins\\t%0.d[%p2], %1.d[0]";
1084 [(set_attr "type" "neon_from_gp, neon_ins_q")]
1087 (define_expand "vec_setv2di"
1088 [(match_operand:V2DI 0 "register_operand")
1089 (match_operand:DI 1 "register_operand")
1090 (match_operand:SI 2 "immediate_operand")]
1093 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1094 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1095 GEN_INT (elem), operands[0]));
1100 (define_insn "aarch64_simd_vec_set<mode>"
1101 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1103 (vec_duplicate:VDQF_F16
1104 (match_operand:<VEL> 1 "register_operand" "w"))
1105 (match_operand:VDQF_F16 3 "register_operand" "0")
1106 (match_operand:SI 2 "immediate_operand" "i")))]
1109 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1111 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1112 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1114 [(set_attr "type" "neon_ins<q>")]
1117 (define_expand "vec_set<mode>"
1118 [(match_operand:VDQF_F16 0 "register_operand" "+w")
1119 (match_operand:<VEL> 1 "register_operand" "w")
1120 (match_operand:SI 2 "immediate_operand" "")]
1123 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1124 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1125 GEN_INT (elem), operands[0]));
1131 (define_insn "aarch64_mla<mode>"
1132 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1133 (plus:VDQ_BHSI (mult:VDQ_BHSI
1134 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1135 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1136 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1138 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1139 [(set_attr "type" "neon_mla_<Vetype><q>")]
1142 (define_insn "*aarch64_mla_elt<mode>"
1143 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1146 (vec_duplicate:VDQHS
1148 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1149 (parallel [(match_operand:SI 2 "immediate_operand")])))
1150 (match_operand:VDQHS 3 "register_operand" "w"))
1151 (match_operand:VDQHS 4 "register_operand" "0")))]
1154 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1155 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1157 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1160 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1161 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1164 (vec_duplicate:VDQHS
1166 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1167 (parallel [(match_operand:SI 2 "immediate_operand")])))
1168 (match_operand:VDQHS 3 "register_operand" "w"))
1169 (match_operand:VDQHS 4 "register_operand" "0")))]
1172 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1173 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1175 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1178 (define_insn "*aarch64_mla_elt_merge<mode>"
1179 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1181 (mult:VDQHS (vec_duplicate:VDQHS
1182 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1183 (match_operand:VDQHS 2 "register_operand" "w"))
1184 (match_operand:VDQHS 3 "register_operand" "0")))]
1186 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1187 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1190 (define_insn "aarch64_mls<mode>"
1191 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1192 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1193 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1194 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1196 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1197 [(set_attr "type" "neon_mla_<Vetype><q>")]
1200 (define_insn "*aarch64_mls_elt<mode>"
1201 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1203 (match_operand:VDQHS 4 "register_operand" "0")
1205 (vec_duplicate:VDQHS
1207 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1208 (parallel [(match_operand:SI 2 "immediate_operand")])))
1209 (match_operand:VDQHS 3 "register_operand" "w"))))]
1212 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1213 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1215 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1218 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1219 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1221 (match_operand:VDQHS 4 "register_operand" "0")
1223 (vec_duplicate:VDQHS
1225 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1226 (parallel [(match_operand:SI 2 "immediate_operand")])))
1227 (match_operand:VDQHS 3 "register_operand" "w"))))]
1230 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1231 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1233 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1236 (define_insn "*aarch64_mls_elt_merge<mode>"
1237 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1239 (match_operand:VDQHS 1 "register_operand" "0")
1240 (mult:VDQHS (vec_duplicate:VDQHS
1241 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1242 (match_operand:VDQHS 3 "register_operand" "w"))))]
1244 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1245 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1248 ;; Max/Min operations.
1249 (define_insn "<su><maxmin><mode>3"
1250 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1251 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1252 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1254 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1255 [(set_attr "type" "neon_minmax<q>")]
1258 (define_expand "<su><maxmin>v2di3"
1259 [(set (match_operand:V2DI 0 "register_operand" "")
1260 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1261 (match_operand:V2DI 2 "register_operand" "")))]
1264 enum rtx_code cmp_operator;
1285 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1286 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1287 operands[2], cmp_fmt, operands[1], operands[2]));
1291 ;; Pairwise Integer Max/Min operations.
1292 (define_insn "aarch64_<maxmin_uns>p<mode>"
1293 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1294 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1295 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1298 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1299 [(set_attr "type" "neon_minmax<q>")]
1302 ;; Pairwise FP Max/Min operations.
1303 (define_insn "aarch64_<maxmin_uns>p<mode>"
1304 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1305 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1306 (match_operand:VHSDF 2 "register_operand" "w")]
1309 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1310 [(set_attr "type" "neon_minmax<q>")]
1313 ;; vec_concat gives a new vector with the low elements from operand 1, and
1314 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1315 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1316 ;; What that means, is that the RTL descriptions of the below patterns
1317 ;; need to change depending on endianness.
1319 ;; Move to the low architectural bits of the register.
1320 ;; On little-endian this is { operand, zeroes }
1321 ;; On big-endian this is { zeroes, operand }
1323 (define_insn "move_lo_quad_internal_<mode>"
1324 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1326 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1327 (vec_duplicate:<VHALF> (const_int 0))))]
1328 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1333 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1334 (set_attr "simd" "yes,*,yes")
1335 (set_attr "fp" "*,yes,*")
1336 (set_attr "length" "4")]
1339 (define_insn "move_lo_quad_internal_<mode>"
1340 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1342 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1344 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1349 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1350 (set_attr "simd" "yes,*,yes")
1351 (set_attr "fp" "*,yes,*")
1352 (set_attr "length" "4")]
1355 (define_insn "move_lo_quad_internal_be_<mode>"
1356 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1358 (vec_duplicate:<VHALF> (const_int 0))
1359 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1360 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1365 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1366 (set_attr "simd" "yes,*,yes")
1367 (set_attr "fp" "*,yes,*")
1368 (set_attr "length" "4")]
1371 (define_insn "move_lo_quad_internal_be_<mode>"
1372 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1375 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1376 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1381 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1382 (set_attr "simd" "yes,*,yes")
1383 (set_attr "fp" "*,yes,*")
1384 (set_attr "length" "4")]
1387 (define_expand "move_lo_quad_<mode>"
1388 [(match_operand:VQ 0 "register_operand")
1389 (match_operand:VQ 1 "register_operand")]
1392 if (BYTES_BIG_ENDIAN)
1393 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1395 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1400 ;; Move operand1 to the high architectural bits of the register, keeping
1401 ;; the low architectural bits of operand2.
1402 ;; For little-endian this is { operand2, operand1 }
1403 ;; For big-endian this is { operand1, operand2 }
1405 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1406 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1410 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1411 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1412 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1414 ins\\t%0.d[1], %1.d[0]
1416 [(set_attr "type" "neon_ins")]
1419 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1420 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1422 (match_operand:<VHALF> 1 "register_operand" "w,r")
1425 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1426 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1428 ins\\t%0.d[1], %1.d[0]
1430 [(set_attr "type" "neon_ins")]
1433 (define_expand "move_hi_quad_<mode>"
1434 [(match_operand:VQ 0 "register_operand" "")
1435 (match_operand:<VHALF> 1 "register_operand" "")]
1438 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1439 if (BYTES_BIG_ENDIAN)
1440 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1443 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1448 ;; Narrowing operations.
1451 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1452 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1453 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1455 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1456 [(set_attr "type" "neon_shift_imm_narrow_q")]
1459 (define_expand "vec_pack_trunc_<mode>"
1460 [(match_operand:<VNARROWD> 0 "register_operand" "")
1461 (match_operand:VDN 1 "register_operand" "")
1462 (match_operand:VDN 2 "register_operand" "")]
1465 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1466 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1467 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1469 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1470 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1471 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1477 (define_insn "vec_pack_trunc_<mode>"
1478 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1479 (vec_concat:<VNARROWQ2>
1480 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1481 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1484 if (BYTES_BIG_ENDIAN)
1485 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1487 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1489 [(set_attr "type" "multiple")
1490 (set_attr "length" "8")]
1493 ;; Widening operations.
1495 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1496 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1497 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1498 (match_operand:VQW 1 "register_operand" "w")
1499 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1502 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1503 [(set_attr "type" "neon_shift_imm_long")]
1506 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1507 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1508 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1509 (match_operand:VQW 1 "register_operand" "w")
1510 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1513 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1514 [(set_attr "type" "neon_shift_imm_long")]
1517 (define_expand "vec_unpack<su>_hi_<mode>"
1518 [(match_operand:<VWIDE> 0 "register_operand" "")
1519 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1522 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1523 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1529 (define_expand "vec_unpack<su>_lo_<mode>"
1530 [(match_operand:<VWIDE> 0 "register_operand" "")
1531 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1534 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1535 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1541 ;; Widening arithmetic.
1543 (define_insn "*aarch64_<su>mlal_lo<mode>"
1544 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1547 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1548 (match_operand:VQW 2 "register_operand" "w")
1549 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1550 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1551 (match_operand:VQW 4 "register_operand" "w")
1553 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1555 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1556 [(set_attr "type" "neon_mla_<Vetype>_long")]
1559 (define_insn "*aarch64_<su>mlal_hi<mode>"
1560 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1563 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1564 (match_operand:VQW 2 "register_operand" "w")
1565 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1566 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1567 (match_operand:VQW 4 "register_operand" "w")
1569 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1571 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1572 [(set_attr "type" "neon_mla_<Vetype>_long")]
1575 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1576 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1578 (match_operand:<VWIDE> 1 "register_operand" "0")
1580 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581 (match_operand:VQW 2 "register_operand" "w")
1582 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1583 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584 (match_operand:VQW 4 "register_operand" "w")
1587 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1588 [(set_attr "type" "neon_mla_<Vetype>_long")]
1591 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1592 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1594 (match_operand:<VWIDE> 1 "register_operand" "0")
1596 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1597 (match_operand:VQW 2 "register_operand" "w")
1598 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1599 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1600 (match_operand:VQW 4 "register_operand" "w")
1603 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1604 [(set_attr "type" "neon_mla_<Vetype>_long")]
1607 (define_insn "*aarch64_<su>mlal<mode>"
1608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1612 (match_operand:VD_BHSI 1 "register_operand" "w"))
1614 (match_operand:VD_BHSI 2 "register_operand" "w")))
1615 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1617 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1618 [(set_attr "type" "neon_mla_<Vetype>_long")]
1621 (define_insn "*aarch64_<su>mlsl<mode>"
1622 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1624 (match_operand:<VWIDE> 1 "register_operand" "0")
1627 (match_operand:VD_BHSI 2 "register_operand" "w"))
1629 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1631 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1632 [(set_attr "type" "neon_mla_<Vetype>_long")]
1635 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1636 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1637 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1638 (match_operand:VQW 1 "register_operand" "w")
1639 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1640 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1641 (match_operand:VQW 2 "register_operand" "w")
1644 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1645 [(set_attr "type" "neon_mul_<Vetype>_long")]
1648 (define_expand "vec_widen_<su>mult_lo_<mode>"
1649 [(match_operand:<VWIDE> 0 "register_operand" "")
1650 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1651 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1654 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1655 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1662 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1663 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1664 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665 (match_operand:VQW 1 "register_operand" "w")
1666 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668 (match_operand:VQW 2 "register_operand" "w")
1671 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1672 [(set_attr "type" "neon_mul_<Vetype>_long")]
1675 (define_expand "vec_widen_<su>mult_hi_<mode>"
1676 [(match_operand:<VWIDE> 0 "register_operand" "")
1677 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1678 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1681 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1682 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1690 ;; FP vector operations.
1691 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1692 ;; double-precision (64-bit) floating-point data types and arithmetic as
1693 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1694 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1696 ;; Floating-point operations can raise an exception. Vectorizing such
1697 ;; operations are safe because of reasons explained below.
1699 ;; ARMv8 permits an extension to enable trapped floating-point
1700 ;; exception handling, however this is an optional feature. In the
1701 ;; event of a floating-point exception being raised by vectorised
1703 ;; 1. If trapped floating-point exceptions are available, then a trap
1704 ;; will be taken when any lane raises an enabled exception. A trap
1705 ;; handler may determine which lane raised the exception.
1706 ;; 2. Alternatively a sticky exception flag is set in the
1707 ;; floating-point status register (FPSR). Software may explicitly
1708 ;; test the exception flags, in which case the tests will either
1709 ;; prevent vectorisation, allowing precise identification of the
1710 ;; failing operation, or if tested outside of vectorisable regions
1711 ;; then the specific operation and lane are not of interest.
1713 ;; FP arithmetic operations.
1715 (define_insn "add<mode>3"
1716 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1717 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1718 (match_operand:VHSDF 2 "register_operand" "w")))]
1720 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1721 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1724 (define_insn "sub<mode>3"
1725 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1726 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1727 (match_operand:VHSDF 2 "register_operand" "w")))]
1729 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1730 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1733 (define_insn "mul<mode>3"
1734 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1735 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1736 (match_operand:VHSDF 2 "register_operand" "w")))]
1738 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1739 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1742 (define_expand "div<mode>3"
1743 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1744 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1745 (match_operand:VHSDF 2 "register_operand" "w")))]
1748 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1751 operands[1] = force_reg (<MODE>mode, operands[1]);
1754 (define_insn "*div<mode>3"
1755 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757 (match_operand:VHSDF 2 "register_operand" "w")))]
1759 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760 [(set_attr "type" "neon_fp_div_<stype><q>")]
1763 (define_insn "neg<mode>2"
1764 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1767 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1768 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1771 (define_insn "abs<mode>2"
1772 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1775 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1776 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1779 (define_insn "fma<mode>4"
1780 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1781 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1782 (match_operand:VHSDF 2 "register_operand" "w")
1783 (match_operand:VHSDF 3 "register_operand" "0")))]
1785 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1789 (define_insn "*aarch64_fma4_elt<mode>"
1790 [(set (match_operand:VDQF 0 "register_operand" "=w")
1794 (match_operand:VDQF 1 "register_operand" "<h_con>")
1795 (parallel [(match_operand:SI 2 "immediate_operand")])))
1796 (match_operand:VDQF 3 "register_operand" "w")
1797 (match_operand:VDQF 4 "register_operand" "0")))]
1800 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1801 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1803 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1806 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1807 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1809 (vec_duplicate:VDQSF
1811 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1812 (parallel [(match_operand:SI 2 "immediate_operand")])))
1813 (match_operand:VDQSF 3 "register_operand" "w")
1814 (match_operand:VDQSF 4 "register_operand" "0")))]
1817 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1818 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1820 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1823 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1824 [(set (match_operand:VMUL 0 "register_operand" "=w")
1827 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1828 (match_operand:VMUL 2 "register_operand" "w")
1829 (match_operand:VMUL 3 "register_operand" "0")))]
1831 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1832 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1835 (define_insn "*aarch64_fma4_elt_to_64v2df"
1836 [(set (match_operand:DF 0 "register_operand" "=w")
1839 (match_operand:V2DF 1 "register_operand" "w")
1840 (parallel [(match_operand:SI 2 "immediate_operand")]))
1841 (match_operand:DF 3 "register_operand" "w")
1842 (match_operand:DF 4 "register_operand" "0")))]
1845 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1846 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1848 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1851 (define_insn "fnma<mode>4"
1852 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1854 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1855 (match_operand:VHSDF 2 "register_operand" "w")
1856 (match_operand:VHSDF 3 "register_operand" "0")))]
1858 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1859 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1862 (define_insn "*aarch64_fnma4_elt<mode>"
1863 [(set (match_operand:VDQF 0 "register_operand" "=w")
1866 (match_operand:VDQF 3 "register_operand" "w"))
1869 (match_operand:VDQF 1 "register_operand" "<h_con>")
1870 (parallel [(match_operand:SI 2 "immediate_operand")])))
1871 (match_operand:VDQF 4 "register_operand" "0")))]
1874 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1875 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1877 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1880 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1881 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1884 (match_operand:VDQSF 3 "register_operand" "w"))
1885 (vec_duplicate:VDQSF
1887 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1888 (parallel [(match_operand:SI 2 "immediate_operand")])))
1889 (match_operand:VDQSF 4 "register_operand" "0")))]
1892 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1893 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1895 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1898 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1899 [(set (match_operand:VMUL 0 "register_operand" "=w")
1902 (match_operand:VMUL 2 "register_operand" "w"))
1904 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1905 (match_operand:VMUL 3 "register_operand" "0")))]
1907 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1908 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1911 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1912 [(set (match_operand:DF 0 "register_operand" "=w")
1915 (match_operand:V2DF 1 "register_operand" "w")
1916 (parallel [(match_operand:SI 2 "immediate_operand")]))
1918 (match_operand:DF 3 "register_operand" "w"))
1919 (match_operand:DF 4 "register_operand" "0")))]
1922 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1923 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1925 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1928 ;; Vector versions of the floating-point frint patterns.
1929 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1930 (define_insn "<frint_pattern><mode>2"
1931 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1932 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1935 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1936 [(set_attr "type" "neon_fp_round_<stype><q>")]
1939 ;; Vector versions of the fcvt standard patterns.
1940 ;; Expands to lbtrunc, lround, lceil, lfloor
1941 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1942 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1943 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1944 [(match_operand:VHSDF 1 "register_operand" "w")]
1947 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1948 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1951 ;; HF Scalar variants of related SIMD instructions.
1952 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1953 [(set (match_operand:HI 0 "register_operand" "=w")
1954 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1956 "TARGET_SIMD_F16INST"
1957 "fcvt<frint_suffix><su>\t%h0, %h1"
1958 [(set_attr "type" "neon_fp_to_int_s")]
1961 (define_insn "<optab>_trunchfhi2"
1962 [(set (match_operand:HI 0 "register_operand" "=w")
1963 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1964 "TARGET_SIMD_F16INST"
1965 "fcvtz<su>\t%h0, %h1"
1966 [(set_attr "type" "neon_fp_to_int_s")]
1969 (define_insn "<optab>hihf2"
1970 [(set (match_operand:HF 0 "register_operand" "=w")
1971 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1972 "TARGET_SIMD_F16INST"
1973 "<su_optab>cvtf\t%h0, %h1"
1974 [(set_attr "type" "neon_int_to_fp_s")]
1977 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1978 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1979 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1981 (match_operand:VDQF 1 "register_operand" "w")
1982 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1985 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1986 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1988 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1990 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1991 output_asm_insn (buf, operands);
1994 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1997 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1998 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1999 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2000 [(match_operand:VHSDF 1 "register_operand")]
2005 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2006 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2007 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2008 [(match_operand:VHSDF 1 "register_operand")]
2013 (define_expand "ftrunc<VHSDF:mode>2"
2014 [(set (match_operand:VHSDF 0 "register_operand")
2015 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2020 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2021 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2023 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2025 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2026 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2029 ;; Conversions between vectors of floats and doubles.
2030 ;; Contains a mix of patterns to match standard pattern names
2031 ;; and those for intrinsics.
2033 ;; Float widening operations.
2035 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2036 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2037 (float_extend:<VWIDE> (vec_select:<VHALF>
2038 (match_operand:VQ_HSF 1 "register_operand" "w")
2039 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2042 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2043 [(set_attr "type" "neon_fp_cvt_widen_s")]
2046 ;; Convert between fixed-point and floating-point (vector modes)
2048 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2049 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2050 (unspec:<VHSDF:FCVT_TARGET>
2051 [(match_operand:VHSDF 1 "register_operand" "w")
2052 (match_operand:SI 2 "immediate_operand" "i")]
2055 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2056 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2059 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2060 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2061 (unspec:<VDQ_HSDI:FCVT_TARGET>
2062 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2063 (match_operand:SI 2 "immediate_operand" "i")]
2066 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2067 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2070 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2071 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2072 ;; the meaning of HI and LO changes depending on the target endianness.
2073 ;; While elsewhere we map the higher numbered elements of a vector to
2074 ;; the lower architectural lanes of the vector, for these patterns we want
2075 ;; to always treat "hi" as referring to the higher architectural lanes.
2076 ;; Consequently, while the patterns below look inconsistent with our
2077 ;; other big-endian patterns their behavior is as required.
2079 (define_expand "vec_unpacks_lo_<mode>"
2080 [(match_operand:<VWIDE> 0 "register_operand" "")
2081 (match_operand:VQ_HSF 1 "register_operand" "")]
2084 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2085 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2091 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093 (float_extend:<VWIDE> (vec_select:<VHALF>
2094 (match_operand:VQ_HSF 1 "register_operand" "w")
2095 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2098 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2099 [(set_attr "type" "neon_fp_cvt_widen_s")]
2102 (define_expand "vec_unpacks_hi_<mode>"
2103 [(match_operand:<VWIDE> 0 "register_operand" "")
2104 (match_operand:VQ_HSF 1 "register_operand" "")]
2107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2113 (define_insn "aarch64_float_extend_lo_<Vwide>"
2114 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2115 (float_extend:<VWIDE>
2116 (match_operand:VDF 1 "register_operand" "w")))]
2118 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2119 [(set_attr "type" "neon_fp_cvt_widen_s")]
2122 ;; Float narrowing operations.
2124 (define_insn "aarch64_float_truncate_lo_<mode>"
2125 [(set (match_operand:VDF 0 "register_operand" "=w")
2127 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2129 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2130 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2133 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2134 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2136 (match_operand:VDF 1 "register_operand" "0")
2138 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2139 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2140 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2141 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2144 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2145 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2148 (match_operand:<VWIDE> 2 "register_operand" "w"))
2149 (match_operand:VDF 1 "register_operand" "0")))]
2150 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2151 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2152 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2155 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2156 [(match_operand:<VDBL> 0 "register_operand" "=w")
2157 (match_operand:VDF 1 "register_operand" "0")
2158 (match_operand:<VWIDE> 2 "register_operand" "w")]
2161 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2162 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2163 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2164 emit_insn (gen (operands[0], operands[1], operands[2]));
2169 (define_expand "vec_pack_trunc_v2df"
2170 [(set (match_operand:V4SF 0 "register_operand")
2172 (float_truncate:V2SF
2173 (match_operand:V2DF 1 "register_operand"))
2174 (float_truncate:V2SF
2175 (match_operand:V2DF 2 "register_operand"))
2179 rtx tmp = gen_reg_rtx (V2SFmode);
2180 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2181 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2183 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2184 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2185 tmp, operands[hi]));
2190 (define_expand "vec_pack_trunc_df"
2191 [(set (match_operand:V2SF 0 "register_operand")
2194 (match_operand:DF 1 "register_operand"))
2196 (match_operand:DF 2 "register_operand"))
2200 rtx tmp = gen_reg_rtx (V2SFmode);
2201 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2204 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2205 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2206 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2212 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2214 ;; a = (b < c) ? b : c;
2215 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2216 ;; either explicitly or indirectly via -ffast-math.
2218 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2219 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2220 ;; operand will be returned when both operands are zero (i.e. they may not
2221 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2222 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2225 (define_insn "<su><maxmin><mode>3"
2226 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2227 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2228 (match_operand:VHSDF 2 "register_operand" "w")))]
2230 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2231 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2234 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2235 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2236 ;; which implement the IEEE fmax ()/fmin () functions.
2237 (define_insn "<maxmin_uns><mode>3"
2238 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2239 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2240 (match_operand:VHSDF 2 "register_operand" "w")]
2243 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2244 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2247 ;; 'across lanes' add.
2249 (define_expand "reduc_plus_scal_<mode>"
2250 [(match_operand:<VEL> 0 "register_operand" "=w")
2251 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2255 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2256 rtx scratch = gen_reg_rtx (<MODE>mode);
2257 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2258 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2263 (define_insn "aarch64_faddp<mode>"
2264 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2265 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2266 (match_operand:VHSDF 2 "register_operand" "w")]
2269 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2270 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2273 (define_insn "aarch64_reduc_plus_internal<mode>"
2274 [(set (match_operand:VDQV 0 "register_operand" "=w")
2275 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2278 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2279 [(set_attr "type" "neon_reduc_add<q>")]
2282 (define_insn "aarch64_reduc_plus_internalv2si"
2283 [(set (match_operand:V2SI 0 "register_operand" "=w")
2284 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2287 "addp\\t%0.2s, %1.2s, %1.2s"
2288 [(set_attr "type" "neon_reduc_add")]
2291 (define_insn "reduc_plus_scal_<mode>"
2292 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2293 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2296 "faddp\\t%<Vetype>0, %1.<Vtype>"
2297 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2300 (define_expand "reduc_plus_scal_v4sf"
2301 [(set (match_operand:SF 0 "register_operand")
2302 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2306 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2307 rtx scratch = gen_reg_rtx (V4SFmode);
2308 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2309 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2310 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2314 (define_insn "clrsb<mode>2"
2315 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2316 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2318 "cls\\t%0.<Vtype>, %1.<Vtype>"
2319 [(set_attr "type" "neon_cls<q>")]
2322 (define_insn "clz<mode>2"
2323 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2324 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2326 "clz\\t%0.<Vtype>, %1.<Vtype>"
2327 [(set_attr "type" "neon_cls<q>")]
2330 (define_insn "popcount<mode>2"
2331 [(set (match_operand:VB 0 "register_operand" "=w")
2332 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2334 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2335 [(set_attr "type" "neon_cnt<q>")]
2338 ;; 'across lanes' max and min ops.
2340 ;; Template for outputting a scalar, so we can create __builtins which can be
2341 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2342 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2343 [(match_operand:<VEL> 0 "register_operand")
2344 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2348 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2349 rtx scratch = gen_reg_rtx (<MODE>mode);
2350 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2352 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2357 ;; Likewise for integer cases, signed and unsigned.
2358 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2359 [(match_operand:<VEL> 0 "register_operand")
2360 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2364 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2365 rtx scratch = gen_reg_rtx (<MODE>mode);
2366 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2368 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2373 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2374 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2375 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2378 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2379 [(set_attr "type" "neon_reduc_minmax<q>")]
2382 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2383 [(set (match_operand:V2SI 0 "register_operand" "=w")
2384 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2387 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2388 [(set_attr "type" "neon_reduc_minmax")]
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2393 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2396 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2397 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2400 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2402 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2405 ;; Thus our BSL is of the form:
2406 ;; op0 = bsl (mask, op2, op3)
2407 ;; We can use any of:
2410 ;; bsl mask, op1, op2
2411 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2412 ;; bit op0, op2, mask
2413 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2414 ;; bif op0, op1, mask
2416 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2417 ;; Some forms of straight-line code may generate the equivalent form
2418 ;; in *aarch64_simd_bsl<mode>_alt.
2420 (define_insn "aarch64_simd_bsl<mode>_internal"
2421 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2425 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2426 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2427 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2428 (match_dup:<V_INT_EQUIV> 3)
2432 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2433 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2434 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2435 [(set_attr "type" "neon_bsl<q>")]
2438 ;; We need this form in addition to the above pattern to match the case
2439 ;; when combine tries merging three insns such that the second operand of
2440 ;; the outer XOR matches the second operand of the inner XOR rather than
2441 ;; the first. The two are equivalent but since recog doesn't try all
2442 ;; permutations of commutative operations, we have to have a separate pattern.
2444 (define_insn "*aarch64_simd_bsl<mode>_alt"
2445 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2449 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2450 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2451 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2452 (match_dup:<V_INT_EQUIV> 2)))]
2455 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2456 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2457 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2458 [(set_attr "type" "neon_bsl<q>")]
2461 ;; DImode is special, we want to avoid computing operations which are
2462 ;; more naturally computed in general purpose registers in the vector
2463 ;; registers. If we do that, we need to move all three operands from general
2464 ;; purpose registers to vector registers, then back again. However, we
2465 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2466 ;; optimizations based on the component operations of a BSL.
2468 ;; That means we need a splitter back to the individual operations, if they
2469 ;; would be better calculated on the integer side.
2471 (define_insn_and_split "aarch64_simd_bsldi_internal"
2472 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2476 (match_operand:DI 3 "register_operand" "w,0,w,r")
2477 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2478 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2483 bsl\\t%0.8b, %2.8b, %3.8b
2484 bit\\t%0.8b, %2.8b, %1.8b
2485 bif\\t%0.8b, %3.8b, %1.8b
2487 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2488 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2490 /* Split back to individual operations. If we're before reload, and
2491 able to create a temporary register, do so. If we're after reload,
2492 we've got an early-clobber destination register, so use that.
2493 Otherwise, we can't create pseudos and we can't yet guarantee that
2494 operands[0] is safe to write, so FAIL to split. */
2497 if (reload_completed)
2498 scratch = operands[0];
2499 else if (can_create_pseudo_p ())
2500 scratch = gen_reg_rtx (DImode);
2504 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2505 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2506 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2509 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2510 (set_attr "length" "4,4,4,12")]
2513 (define_insn_and_split "aarch64_simd_bsldi_alt"
2514 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2518 (match_operand:DI 3 "register_operand" "w,w,0,r")
2519 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2520 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2525 bsl\\t%0.8b, %3.8b, %2.8b
2526 bit\\t%0.8b, %3.8b, %1.8b
2527 bif\\t%0.8b, %2.8b, %1.8b
2529 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2530 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2532 /* Split back to individual operations. If we're before reload, and
2533 able to create a temporary register, do so. If we're after reload,
2534 we've got an early-clobber destination register, so use that.
2535 Otherwise, we can't create pseudos and we can't yet guarantee that
2536 operands[0] is safe to write, so FAIL to split. */
2539 if (reload_completed)
2540 scratch = operands[0];
2541 else if (can_create_pseudo_p ())
2542 scratch = gen_reg_rtx (DImode);
2546 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2547 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2548 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2551 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2552 (set_attr "length" "4,4,4,12")]
2555 (define_expand "aarch64_simd_bsl<mode>"
2556 [(match_operand:VALLDIF 0 "register_operand")
2557 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2558 (match_operand:VALLDIF 2 "register_operand")
2559 (match_operand:VALLDIF 3 "register_operand")]
2562 /* We can't alias operands together if they have different modes. */
2563 rtx tmp = operands[0];
2564 if (FLOAT_MODE_P (<MODE>mode))
2566 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2567 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2568 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2570 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2571 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2575 if (tmp != operands[0])
2576 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2581 (define_expand "vcond_mask_<mode><v_int_equiv>"
2582 [(match_operand:VALLDI 0 "register_operand")
2583 (match_operand:VALLDI 1 "nonmemory_operand")
2584 (match_operand:VALLDI 2 "nonmemory_operand")
2585 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2588 /* If we have (a = (P) ? -1 : 0);
2589 Then we can simply move the generated mask (result must be int). */
2590 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2591 && operands[2] == CONST0_RTX (<MODE>mode))
2592 emit_move_insn (operands[0], operands[3]);
2593 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2594 else if (operands[1] == CONST0_RTX (<MODE>mode)
2595 && operands[2] == CONSTM1_RTX (<MODE>mode))
2596 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2599 if (!REG_P (operands[1]))
2600 operands[1] = force_reg (<MODE>mode, operands[1]);
2601 if (!REG_P (operands[2]))
2602 operands[2] = force_reg (<MODE>mode, operands[2]);
2603 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2604 operands[1], operands[2]));
2610 ;; Patterns comparing two vectors to produce a mask.
2612 (define_expand "vec_cmp<mode><mode>"
2613 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2614 (match_operator 1 "comparison_operator"
2615 [(match_operand:VSDQ_I_DI 2 "register_operand")
2616 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2619 rtx mask = operands[0];
2620 enum rtx_code code = GET_CODE (operands[1]);
2630 if (operands[3] == CONST0_RTX (<MODE>mode))
2635 if (!REG_P (operands[3]))
2636 operands[3] = force_reg (<MODE>mode, operands[3]);
2644 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2648 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2652 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2656 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2660 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2664 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2668 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2672 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2676 /* Handle NE as !EQ. */
2677 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2678 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2682 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2692 (define_expand "vec_cmp<mode><v_int_equiv>"
2693 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2694 (match_operator 1 "comparison_operator"
2695 [(match_operand:VDQF 2 "register_operand")
2696 (match_operand:VDQF 3 "nonmemory_operand")]))]
2699 int use_zero_form = 0;
2700 enum rtx_code code = GET_CODE (operands[1]);
2701 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2703 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2712 if (operands[3] == CONST0_RTX (<MODE>mode))
2719 if (!REG_P (operands[3]))
2720 operands[3] = force_reg (<MODE>mode, operands[3]);
2730 comparison = gen_aarch64_cmlt<mode>;
2735 std::swap (operands[2], operands[3]);
2739 comparison = gen_aarch64_cmgt<mode>;
2744 comparison = gen_aarch64_cmle<mode>;
2749 std::swap (operands[2], operands[3]);
2753 comparison = gen_aarch64_cmge<mode>;
2757 comparison = gen_aarch64_cmeq<mode>;
2775 /* FCM returns false for lanes which are unordered, so if we use
2776 the inverse of the comparison we actually want to emit, then
2777 invert the result, we will end up with the correct result.
2778 Note that a NE NaN and NaN NE b are true for all a, b.
2780 Our transformations are:
2781 a UNGE b -> !(b GT a)
2782 a UNGT b -> !(b GE a)
2783 a UNLE b -> !(a GT b)
2784 a UNLT b -> !(a GE b)
2785 a NE b -> !(a EQ b) */
2786 gcc_assert (comparison != NULL);
2787 emit_insn (comparison (operands[0], operands[2], operands[3]));
2788 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2796 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2797 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2803 gcc_assert (comparison != NULL);
2804 emit_insn (comparison (operands[0], operands[2], operands[3]));
2808 /* We first check (a > b || b > a) which is !UNEQ, inverting
2809 this result will then give us (a == b || a UNORDERED b). */
2810 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2811 operands[2], operands[3]));
2812 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2813 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2814 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2818 /* LTGT is not guranteed to not generate a FP exception. So let's
2819 go the faster way : ((a > b) || (b > a)). */
2820 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2821 operands[2], operands[3]));
2822 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2823 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2827 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2828 UNORDERED as !ORDERED. */
2829 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2830 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2831 operands[3], operands[2]));
2832 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2833 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2837 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2838 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2839 operands[3], operands[2]));
2840 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2850 (define_expand "vec_cmpu<mode><mode>"
2851 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2852 (match_operator 1 "comparison_operator"
2853 [(match_operand:VSDQ_I_DI 2 "register_operand")
2854 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2857 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2858 operands[2], operands[3]));
2862 (define_expand "vcond<mode><mode>"
2863 [(set (match_operand:VALLDI 0 "register_operand")
2864 (if_then_else:VALLDI
2865 (match_operator 3 "comparison_operator"
2866 [(match_operand:VALLDI 4 "register_operand")
2867 (match_operand:VALLDI 5 "nonmemory_operand")])
2868 (match_operand:VALLDI 1 "nonmemory_operand")
2869 (match_operand:VALLDI 2 "nonmemory_operand")))]
2872 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2873 enum rtx_code code = GET_CODE (operands[3]);
2875 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2876 it as well as switch operands 1/2 in order to avoid the additional
2880 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2881 operands[4], operands[5]);
2882 std::swap (operands[1], operands[2]);
2884 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2885 operands[4], operands[5]));
2886 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2887 operands[2], mask));
2892 (define_expand "vcond<v_cmp_mixed><mode>"
2893 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2894 (if_then_else:<V_cmp_mixed>
2895 (match_operator 3 "comparison_operator"
2896 [(match_operand:VDQF_COND 4 "register_operand")
2897 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2898 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2899 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2902 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2903 enum rtx_code code = GET_CODE (operands[3]);
2905 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2906 it as well as switch operands 1/2 in order to avoid the additional
2910 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2911 operands[4], operands[5]);
2912 std::swap (operands[1], operands[2]);
2914 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2915 operands[4], operands[5]));
2916 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2917 operands[0], operands[1],
2918 operands[2], mask));
2923 (define_expand "vcondu<mode><mode>"
2924 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2925 (if_then_else:VSDQ_I_DI
2926 (match_operator 3 "comparison_operator"
2927 [(match_operand:VSDQ_I_DI 4 "register_operand")
2928 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2929 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2930 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2933 rtx mask = gen_reg_rtx (<MODE>mode);
2934 enum rtx_code code = GET_CODE (operands[3]);
2936 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2937 it as well as switch operands 1/2 in order to avoid the additional
2941 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2942 operands[4], operands[5]);
2943 std::swap (operands[1], operands[2]);
2945 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2946 operands[4], operands[5]));
2947 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2948 operands[2], mask));
2952 (define_expand "vcondu<mode><v_cmp_mixed>"
2953 [(set (match_operand:VDQF 0 "register_operand")
2955 (match_operator 3 "comparison_operator"
2956 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2957 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2958 (match_operand:VDQF 1 "nonmemory_operand")
2959 (match_operand:VDQF 2 "nonmemory_operand")))]
2962 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2963 enum rtx_code code = GET_CODE (operands[3]);
2965 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2966 it as well as switch operands 1/2 in order to avoid the additional
2970 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2971 operands[4], operands[5]);
2972 std::swap (operands[1], operands[2]);
2974 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2976 operands[4], operands[5]));
2977 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2978 operands[2], mask));
2982 ;; Patterns for AArch64 SIMD Intrinsics.
2984 ;; Lane extraction with sign extension to general purpose register.
2985 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2986 [(set (match_operand:GPI 0 "register_operand" "=r")
2989 (match_operand:VDQQH 1 "register_operand" "w")
2990 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2993 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2994 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2996 [(set_attr "type" "neon_to_gp<q>")]
2999 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3000 [(set (match_operand:SI 0 "register_operand" "=r")
3003 (match_operand:VDQQH 1 "register_operand" "w")
3004 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3007 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3008 return "umov\\t%w0, %1.<Vetype>[%2]";
3010 [(set_attr "type" "neon_to_gp<q>")]
3013 ;; Lane extraction of a value, neither sign nor zero extension
3014 ;; is guaranteed so upper bits should be considered undefined.
3015 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3016 (define_insn "aarch64_get_lane<mode>"
3017 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3019 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3020 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3023 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3024 switch (which_alternative)
3027 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3029 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3031 return "st1\\t{%1.<Vetype>}[%2], %0";
3036 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3039 (define_insn "load_pair_lanes<mode>"
3040 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3042 (match_operand:VDC 1 "memory_operand" "Utq")
3043 (match_operand:VDC 2 "memory_operand" "m")))]
3044 "TARGET_SIMD && !STRICT_ALIGNMENT
3045 && rtx_equal_p (XEXP (operands[2], 0),
3046 plus_constant (Pmode,
3047 XEXP (operands[1], 0),
3048 GET_MODE_SIZE (<MODE>mode)))"
3050 [(set_attr "type" "neon_load1_1reg_q")]
3053 (define_insn "store_pair_lanes<mode>"
3054 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3056 (match_operand:VDC 1 "register_operand" "w, r")
3057 (match_operand:VDC 2 "register_operand" "w, r")))]
3061 stp\\t%x1, %x2, %y0"
3062 [(set_attr "type" "neon_stp, store_16")]
3065 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3068 (define_insn "*aarch64_combinez<mode>"
3069 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3071 (match_operand:VDC 1 "general_operand" "w,?r,m")
3072 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3073 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3078 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3079 (set_attr "simd" "yes,*,yes")
3080 (set_attr "fp" "*,yes,*")]
3083 (define_insn "*aarch64_combinez_be<mode>"
3084 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3086 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3087 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3088 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3093 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3094 (set_attr "simd" "yes,*,yes")
3095 (set_attr "fp" "*,yes,*")]
3098 (define_expand "aarch64_combine<mode>"
3099 [(match_operand:<VDBL> 0 "register_operand")
3100 (match_operand:VDC 1 "register_operand")
3101 (match_operand:VDC 2 "register_operand")]
3104 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3110 (define_expand "aarch64_simd_combine<mode>"
3111 [(match_operand:<VDBL> 0 "register_operand")
3112 (match_operand:VDC 1 "register_operand")
3113 (match_operand:VDC 2 "register_operand")]
3116 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3117 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3120 [(set_attr "type" "multiple")]
3123 ;; <su><addsub>l<q>.
3125 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3126 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3127 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3128 (match_operand:VQW 1 "register_operand" "w")
3129 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3130 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3131 (match_operand:VQW 2 "register_operand" "w")
3134 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3135 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3138 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3139 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3140 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3141 (match_operand:VQW 1 "register_operand" "w")
3142 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3143 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3144 (match_operand:VQW 2 "register_operand" "w")
3147 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3148 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3152 (define_expand "aarch64_saddl2<mode>"
3153 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3154 (match_operand:VQW 1 "register_operand" "w")
3155 (match_operand:VQW 2 "register_operand" "w")]
3158 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3159 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3164 (define_expand "aarch64_uaddl2<mode>"
3165 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3166 (match_operand:VQW 1 "register_operand" "w")
3167 (match_operand:VQW 2 "register_operand" "w")]
3170 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3171 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3176 (define_expand "aarch64_ssubl2<mode>"
3177 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3178 (match_operand:VQW 1 "register_operand" "w")
3179 (match_operand:VQW 2 "register_operand" "w")]
3182 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3183 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3188 (define_expand "aarch64_usubl2<mode>"
3189 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3190 (match_operand:VQW 1 "register_operand" "w")
3191 (match_operand:VQW 2 "register_operand" "w")]
3194 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3195 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3200 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3201 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3202 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3203 (match_operand:VD_BHSI 1 "register_operand" "w"))
3205 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3207 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3208 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3211 ;; <su><addsub>w<q>.
3213 (define_expand "widen_ssum<mode>3"
3214 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3215 (plus:<VDBLW> (sign_extend:<VDBLW>
3216 (match_operand:VQW 1 "register_operand" ""))
3217 (match_operand:<VDBLW> 2 "register_operand" "")))]
3220 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3221 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3223 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3225 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3230 (define_expand "widen_ssum<mode>3"
3231 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3232 (plus:<VWIDE> (sign_extend:<VWIDE>
3233 (match_operand:VD_BHSI 1 "register_operand" ""))
3234 (match_operand:<VWIDE> 2 "register_operand" "")))]
3237 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3241 (define_expand "widen_usum<mode>3"
3242 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3243 (plus:<VDBLW> (zero_extend:<VDBLW>
3244 (match_operand:VQW 1 "register_operand" ""))
3245 (match_operand:<VDBLW> 2 "register_operand" "")))]
3248 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3249 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3251 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3253 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3258 (define_expand "widen_usum<mode>3"
3259 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3260 (plus:<VWIDE> (zero_extend:<VWIDE>
3261 (match_operand:VD_BHSI 1 "register_operand" ""))
3262 (match_operand:<VWIDE> 2 "register_operand" "")))]
3265 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3269 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3270 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3271 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3273 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3275 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3276 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3279 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3280 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3281 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3284 (match_operand:VQW 2 "register_operand" "w")
3285 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3287 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3288 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3291 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3292 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3293 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3296 (match_operand:VQW 2 "register_operand" "w")
3297 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3299 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3300 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3303 (define_expand "aarch64_saddw2<mode>"
3304 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3305 (match_operand:<VWIDE> 1 "register_operand" "w")
3306 (match_operand:VQW 2 "register_operand" "w")]
3309 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3310 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3315 (define_expand "aarch64_uaddw2<mode>"
3316 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3317 (match_operand:<VWIDE> 1 "register_operand" "w")
3318 (match_operand:VQW 2 "register_operand" "w")]
3321 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3322 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3328 (define_expand "aarch64_ssubw2<mode>"
3329 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3330 (match_operand:<VWIDE> 1 "register_operand" "w")
3331 (match_operand:VQW 2 "register_operand" "w")]
3334 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3335 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3340 (define_expand "aarch64_usubw2<mode>"
3341 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3342 (match_operand:<VWIDE> 1 "register_operand" "w")
3343 (match_operand:VQW 2 "register_operand" "w")]
3346 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3347 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3352 ;; <su><r>h<addsub>.
3354 (define_insn "aarch64_<sur>h<addsub><mode>"
3355 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3356 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3357 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3360 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3361 [(set_attr "type" "neon_<addsub>_halve<q>")]
3364 ;; <r><addsub>hn<q>.
3366 (define_insn "aarch64_<sur><addsub>hn<mode>"
3367 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3368 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3369 (match_operand:VQN 2 "register_operand" "w")]
3372 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3373 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3376 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3377 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3378 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3379 (match_operand:VQN 2 "register_operand" "w")
3380 (match_operand:VQN 3 "register_operand" "w")]
3383 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3384 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3389 (define_insn "aarch64_pmul<mode>"
3390 [(set (match_operand:VB 0 "register_operand" "=w")
3391 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3392 (match_operand:VB 2 "register_operand" "w")]
3395 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3396 [(set_attr "type" "neon_mul_<Vetype><q>")]
3401 (define_insn "aarch64_fmulx<mode>"
3402 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3404 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3405 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3408 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3409 [(set_attr "type" "neon_fp_mul_<stype>")]
3412 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3414 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3415 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3417 [(match_operand:VDQSF 1 "register_operand" "w")
3418 (vec_duplicate:VDQSF
3420 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3421 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3425 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3426 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3428 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3431 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3433 (define_insn "*aarch64_mulx_elt<mode>"
3434 [(set (match_operand:VDQF 0 "register_operand" "=w")
3436 [(match_operand:VDQF 1 "register_operand" "w")
3439 (match_operand:VDQF 2 "register_operand" "w")
3440 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3444 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3445 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3447 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3452 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3453 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3455 [(match_operand:VHSDF 1 "register_operand" "w")
3456 (vec_duplicate:VHSDF
3457 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3460 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3461 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3464 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3465 ;; vmulxd_lane_f64 == vmulx_lane_f64
3466 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3468 (define_insn "*aarch64_vgetfmulx<mode>"
3469 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3471 [(match_operand:<VEL> 1 "register_operand" "w")
3473 (match_operand:VDQF 2 "register_operand" "w")
3474 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3478 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3479 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3481 [(set_attr "type" "fmul<Vetype>")]
3485 (define_insn "aarch64_<su_optab><optab><mode>"
3486 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3487 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3488 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3490 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3491 [(set_attr "type" "neon_<optab><q>")]
3494 ;; suqadd and usqadd
3496 (define_insn "aarch64_<sur>qadd<mode>"
3497 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3498 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3499 (match_operand:VSDQ_I 2 "register_operand" "w")]
3502 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3503 [(set_attr "type" "neon_qadd<q>")]
3508 (define_insn "aarch64_sqmovun<mode>"
3509 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3510 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3513 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3514 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3517 ;; sqmovn and uqmovn
3519 (define_insn "aarch64_<sur>qmovn<mode>"
3520 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3521 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3524 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3525 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3530 (define_insn "aarch64_s<optab><mode>"
3531 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3533 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3535 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3536 [(set_attr "type" "neon_<optab><q>")]
3541 (define_insn "aarch64_sq<r>dmulh<mode>"
3542 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3544 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3545 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3548 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3549 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3554 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3555 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3557 [(match_operand:VDQHS 1 "register_operand" "w")
3559 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3560 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3564 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3565 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3566 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3569 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3570 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3572 [(match_operand:VDQHS 1 "register_operand" "w")
3574 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3575 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3579 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3580 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3581 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3584 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3585 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3587 [(match_operand:SD_HSI 1 "register_operand" "w")
3589 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3590 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3594 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3595 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3596 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3599 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3600 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3602 [(match_operand:SD_HSI 1 "register_operand" "w")
3604 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3605 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3609 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3610 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3611 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3616 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3617 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3619 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3620 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3621 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3624 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3625 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3628 ;; sqrdml[as]h_lane.
3630 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3631 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3633 [(match_operand:VDQHS 1 "register_operand" "0")
3634 (match_operand:VDQHS 2 "register_operand" "w")
3636 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3637 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3641 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3643 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3645 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3648 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3649 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3651 [(match_operand:SD_HSI 1 "register_operand" "0")
3652 (match_operand:SD_HSI 2 "register_operand" "w")
3654 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3655 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3659 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3661 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3663 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3666 ;; sqrdml[as]h_laneq.
3668 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3669 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3671 [(match_operand:VDQHS 1 "register_operand" "0")
3672 (match_operand:VDQHS 2 "register_operand" "w")
3674 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3675 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3679 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3681 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3683 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3686 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3687 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3689 [(match_operand:SD_HSI 1 "register_operand" "0")
3690 (match_operand:SD_HSI 2 "register_operand" "w")
3692 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3693 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3697 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3699 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3701 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3706 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3707 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3709 (match_operand:<VWIDE> 1 "register_operand" "0")
3712 (sign_extend:<VWIDE>
3713 (match_operand:VSD_HSI 2 "register_operand" "w"))
3714 (sign_extend:<VWIDE>
3715 (match_operand:VSD_HSI 3 "register_operand" "w")))
3718 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3719 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3724 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3725 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3727 (match_operand:<VWIDE> 1 "register_operand" "0")
3730 (sign_extend:<VWIDE>
3731 (match_operand:VD_HSI 2 "register_operand" "w"))
3732 (sign_extend:<VWIDE>
3733 (vec_duplicate:VD_HSI
3735 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3736 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3741 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3743 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3745 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3748 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3749 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3751 (match_operand:<VWIDE> 1 "register_operand" "0")
3754 (sign_extend:<VWIDE>
3755 (match_operand:VD_HSI 2 "register_operand" "w"))
3756 (sign_extend:<VWIDE>
3757 (vec_duplicate:VD_HSI
3759 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3760 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3765 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3767 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3769 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3772 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3773 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3775 (match_operand:<VWIDE> 1 "register_operand" "0")
3778 (sign_extend:<VWIDE>
3779 (match_operand:SD_HSI 2 "register_operand" "w"))
3780 (sign_extend:<VWIDE>
3782 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3783 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3788 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3790 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3792 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3795 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3796 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3798 (match_operand:<VWIDE> 1 "register_operand" "0")
3801 (sign_extend:<VWIDE>
3802 (match_operand:SD_HSI 2 "register_operand" "w"))
3803 (sign_extend:<VWIDE>
3805 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3806 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3811 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3813 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3815 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3820 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3821 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3823 (match_operand:<VWIDE> 1 "register_operand" "0")
3826 (sign_extend:<VWIDE>
3827 (match_operand:VD_HSI 2 "register_operand" "w"))
3828 (sign_extend:<VWIDE>
3829 (vec_duplicate:VD_HSI
3830 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3833 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3834 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3839 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3840 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3842 (match_operand:<VWIDE> 1 "register_operand" "0")
3845 (sign_extend:<VWIDE>
3847 (match_operand:VQ_HSI 2 "register_operand" "w")
3848 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3849 (sign_extend:<VWIDE>
3851 (match_operand:VQ_HSI 3 "register_operand" "w")
3855 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3856 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3859 (define_expand "aarch64_sqdmlal2<mode>"
3860 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3861 (match_operand:<VWIDE> 1 "register_operand" "w")
3862 (match_operand:VQ_HSI 2 "register_operand" "w")
3863 (match_operand:VQ_HSI 3 "register_operand" "w")]
3866 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3867 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3868 operands[2], operands[3], p));
3872 (define_expand "aarch64_sqdmlsl2<mode>"
3873 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3874 (match_operand:<VWIDE> 1 "register_operand" "w")
3875 (match_operand:VQ_HSI 2 "register_operand" "w")
3876 (match_operand:VQ_HSI 3 "register_operand" "w")]
3879 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3880 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3881 operands[2], operands[3], p));
3887 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3888 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3890 (match_operand:<VWIDE> 1 "register_operand" "0")
3893 (sign_extend:<VWIDE>
3895 (match_operand:VQ_HSI 2 "register_operand" "w")
3896 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3897 (sign_extend:<VWIDE>
3898 (vec_duplicate:<VHALF>
3900 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3901 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3906 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3908 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3910 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3913 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3914 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3916 (match_operand:<VWIDE> 1 "register_operand" "0")
3919 (sign_extend:<VWIDE>
3921 (match_operand:VQ_HSI 2 "register_operand" "w")
3922 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3923 (sign_extend:<VWIDE>
3924 (vec_duplicate:<VHALF>
3926 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3927 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3932 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3934 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3936 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3939 (define_expand "aarch64_sqdmlal2_lane<mode>"
3940 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3941 (match_operand:<VWIDE> 1 "register_operand" "w")
3942 (match_operand:VQ_HSI 2 "register_operand" "w")
3943 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3944 (match_operand:SI 4 "immediate_operand" "i")]
3947 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3948 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3949 operands[2], operands[3],
3954 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3955 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3956 (match_operand:<VWIDE> 1 "register_operand" "w")
3957 (match_operand:VQ_HSI 2 "register_operand" "w")
3958 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3959 (match_operand:SI 4 "immediate_operand" "i")]
3962 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3963 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3964 operands[2], operands[3],
3969 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3970 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3971 (match_operand:<VWIDE> 1 "register_operand" "w")
3972 (match_operand:VQ_HSI 2 "register_operand" "w")
3973 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3974 (match_operand:SI 4 "immediate_operand" "i")]
3977 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3978 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3979 operands[2], operands[3],
3984 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3985 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3986 (match_operand:<VWIDE> 1 "register_operand" "w")
3987 (match_operand:VQ_HSI 2 "register_operand" "w")
3988 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3989 (match_operand:SI 4 "immediate_operand" "i")]
3992 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3993 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3994 operands[2], operands[3],
3999 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4000 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4002 (match_operand:<VWIDE> 1 "register_operand" "0")
4005 (sign_extend:<VWIDE>
4007 (match_operand:VQ_HSI 2 "register_operand" "w")
4008 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4009 (sign_extend:<VWIDE>
4010 (vec_duplicate:<VHALF>
4011 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4014 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4015 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4018 (define_expand "aarch64_sqdmlal2_n<mode>"
4019 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4020 (match_operand:<VWIDE> 1 "register_operand" "w")
4021 (match_operand:VQ_HSI 2 "register_operand" "w")
4022 (match_operand:<VEL> 3 "register_operand" "w")]
4025 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4026 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4027 operands[2], operands[3],
4032 (define_expand "aarch64_sqdmlsl2_n<mode>"
4033 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4034 (match_operand:<VWIDE> 1 "register_operand" "w")
4035 (match_operand:VQ_HSI 2 "register_operand" "w")
4036 (match_operand:<VEL> 3 "register_operand" "w")]
4039 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4040 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4041 operands[2], operands[3],
4048 (define_insn "aarch64_sqdmull<mode>"
4049 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4052 (sign_extend:<VWIDE>
4053 (match_operand:VSD_HSI 1 "register_operand" "w"))
4054 (sign_extend:<VWIDE>
4055 (match_operand:VSD_HSI 2 "register_operand" "w")))
4058 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4059 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4064 (define_insn "aarch64_sqdmull_lane<mode>"
4065 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4068 (sign_extend:<VWIDE>
4069 (match_operand:VD_HSI 1 "register_operand" "w"))
4070 (sign_extend:<VWIDE>
4071 (vec_duplicate:VD_HSI
4073 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4074 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4079 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4080 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4082 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4085 (define_insn "aarch64_sqdmull_laneq<mode>"
4086 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4089 (sign_extend:<VWIDE>
4090 (match_operand:VD_HSI 1 "register_operand" "w"))
4091 (sign_extend:<VWIDE>
4092 (vec_duplicate:VD_HSI
4094 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4095 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4100 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4101 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4103 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4106 (define_insn "aarch64_sqdmull_lane<mode>"
4107 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4110 (sign_extend:<VWIDE>
4111 (match_operand:SD_HSI 1 "register_operand" "w"))
4112 (sign_extend:<VWIDE>
4114 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4115 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4120 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4121 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4123 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4126 (define_insn "aarch64_sqdmull_laneq<mode>"
4127 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4130 (sign_extend:<VWIDE>
4131 (match_operand:SD_HSI 1 "register_operand" "w"))
4132 (sign_extend:<VWIDE>
4134 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4135 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4140 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4141 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4143 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4148 (define_insn "aarch64_sqdmull_n<mode>"
4149 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4152 (sign_extend:<VWIDE>
4153 (match_operand:VD_HSI 1 "register_operand" "w"))
4154 (sign_extend:<VWIDE>
4155 (vec_duplicate:VD_HSI
4156 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4160 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4161 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4168 (define_insn "aarch64_sqdmull2<mode>_internal"
4169 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4172 (sign_extend:<VWIDE>
4174 (match_operand:VQ_HSI 1 "register_operand" "w")
4175 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4176 (sign_extend:<VWIDE>
4178 (match_operand:VQ_HSI 2 "register_operand" "w")
4183 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4184 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4187 (define_expand "aarch64_sqdmull2<mode>"
4188 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4189 (match_operand:VQ_HSI 1 "register_operand" "w")
4190 (match_operand:VQ_HSI 2 "register_operand" "w")]
4193 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4201 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4202 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4205 (sign_extend:<VWIDE>
4207 (match_operand:VQ_HSI 1 "register_operand" "w")
4208 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4209 (sign_extend:<VWIDE>
4210 (vec_duplicate:<VHALF>
4212 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4213 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4218 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4219 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4221 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4224 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4225 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4228 (sign_extend:<VWIDE>
4230 (match_operand:VQ_HSI 1 "register_operand" "w")
4231 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4232 (sign_extend:<VWIDE>
4233 (vec_duplicate:<VHALF>
4235 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4236 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4241 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4242 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4244 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4247 (define_expand "aarch64_sqdmull2_lane<mode>"
4248 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4249 (match_operand:VQ_HSI 1 "register_operand" "w")
4250 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4251 (match_operand:SI 3 "immediate_operand" "i")]
4254 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4255 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4256 operands[2], operands[3],
4261 (define_expand "aarch64_sqdmull2_laneq<mode>"
4262 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4263 (match_operand:VQ_HSI 1 "register_operand" "w")
4264 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4265 (match_operand:SI 3 "immediate_operand" "i")]
4268 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4269 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4270 operands[2], operands[3],
4277 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4278 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4281 (sign_extend:<VWIDE>
4283 (match_operand:VQ_HSI 1 "register_operand" "w")
4284 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4285 (sign_extend:<VWIDE>
4286 (vec_duplicate:<VHALF>
4287 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4291 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4292 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4295 (define_expand "aarch64_sqdmull2_n<mode>"
4296 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4297 (match_operand:VQ_HSI 1 "register_operand" "w")
4298 (match_operand:<VEL> 2 "register_operand" "w")]
4301 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4302 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4309 (define_insn "aarch64_<sur>shl<mode>"
4310 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4312 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4313 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4316 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4317 [(set_attr "type" "neon_shift_reg<q>")]
4323 (define_insn "aarch64_<sur>q<r>shl<mode>"
4324 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4326 [(match_operand:VSDQ_I 1 "register_operand" "w")
4327 (match_operand:VSDQ_I 2 "register_operand" "w")]
4330 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4331 [(set_attr "type" "neon_sat_shift_reg<q>")]
4336 (define_insn "aarch64_<sur>shll_n<mode>"
4337 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4338 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4340 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4344 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4345 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4347 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4349 [(set_attr "type" "neon_shift_imm_long")]
4354 (define_insn "aarch64_<sur>shll2_n<mode>"
4355 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4356 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4357 (match_operand:SI 2 "immediate_operand" "i")]
4361 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4362 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4364 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4366 [(set_attr "type" "neon_shift_imm_long")]
4371 (define_insn "aarch64_<sur>shr_n<mode>"
4372 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4373 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4375 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4378 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4379 [(set_attr "type" "neon_sat_shift_imm<q>")]
4384 (define_insn "aarch64_<sur>sra_n<mode>"
4385 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4386 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4387 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4389 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4392 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4393 [(set_attr "type" "neon_shift_acc<q>")]
4398 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4399 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4400 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4401 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4403 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4406 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4407 [(set_attr "type" "neon_shift_imm<q>")]
4412 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4413 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4414 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4416 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4419 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4420 [(set_attr "type" "neon_sat_shift_imm<q>")]
4426 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4427 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4428 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4430 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4433 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4434 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4438 ;; cm(eq|ge|gt|lt|le)
4439 ;; Note, we have constraints for Dz and Z as different expanders
4440 ;; have different ideas of what should be passed to this pattern.
4442 (define_insn "aarch64_cm<optab><mode>"
4443 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4445 (COMPARISONS:<V_INT_EQUIV>
4446 (match_operand:VDQ_I 1 "register_operand" "w,w")
4447 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4451 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4452 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4453 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4456 (define_insn_and_split "aarch64_cm<optab>di"
4457 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4460 (match_operand:DI 1 "register_operand" "w,w,r")
4461 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4463 (clobber (reg:CC CC_REGNUM))]
4466 "&& reload_completed"
4467 [(set (match_operand:DI 0 "register_operand")
4470 (match_operand:DI 1 "register_operand")
4471 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4474 /* If we are in the general purpose register file,
4475 we split to a sequence of comparison and store. */
4476 if (GP_REGNUM_P (REGNO (operands[0]))
4477 && GP_REGNUM_P (REGNO (operands[1])))
4479 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4480 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4481 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4482 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4485 /* Otherwise, we expand to a similar pattern which does not
4486 clobber CC_REGNUM. */
4488 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4491 (define_insn "*aarch64_cm<optab>di"
4492 [(set (match_operand:DI 0 "register_operand" "=w,w")
4495 (match_operand:DI 1 "register_operand" "w,w")
4496 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4498 "TARGET_SIMD && reload_completed"
4500 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4501 cm<optab>\t%d0, %d1, #0"
4502 [(set_attr "type" "neon_compare, neon_compare_zero")]
4507 (define_insn "aarch64_cm<optab><mode>"
4508 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4510 (UCOMPARISONS:<V_INT_EQUIV>
4511 (match_operand:VDQ_I 1 "register_operand" "w")
4512 (match_operand:VDQ_I 2 "register_operand" "w")
4515 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4516 [(set_attr "type" "neon_compare<q>")]
4519 (define_insn_and_split "aarch64_cm<optab>di"
4520 [(set (match_operand:DI 0 "register_operand" "=w,r")
4523 (match_operand:DI 1 "register_operand" "w,r")
4524 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4526 (clobber (reg:CC CC_REGNUM))]
4529 "&& reload_completed"
4530 [(set (match_operand:DI 0 "register_operand")
4533 (match_operand:DI 1 "register_operand")
4534 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4537 /* If we are in the general purpose register file,
4538 we split to a sequence of comparison and store. */
4539 if (GP_REGNUM_P (REGNO (operands[0]))
4540 && GP_REGNUM_P (REGNO (operands[1])))
4542 machine_mode mode = CCmode;
4543 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4544 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4545 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4548 /* Otherwise, we expand to a similar pattern which does not
4549 clobber CC_REGNUM. */
4551 [(set_attr "type" "neon_compare,multiple")]
4554 (define_insn "*aarch64_cm<optab>di"
4555 [(set (match_operand:DI 0 "register_operand" "=w")
4558 (match_operand:DI 1 "register_operand" "w")
4559 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4561 "TARGET_SIMD && reload_completed"
4562 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4563 [(set_attr "type" "neon_compare")]
4568 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4569 ;; we don't have any insns using ne, and aarch64_vcond outputs
4570 ;; not (neg (eq (and x y) 0))
4571 ;; which is rewritten by simplify_rtx as
4572 ;; plus (eq (and x y) 0) -1.
4574 (define_insn "aarch64_cmtst<mode>"
4575 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4579 (match_operand:VDQ_I 1 "register_operand" "w")
4580 (match_operand:VDQ_I 2 "register_operand" "w"))
4581 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4582 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4585 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4586 [(set_attr "type" "neon_tst<q>")]
4589 (define_insn_and_split "aarch64_cmtstdi"
4590 [(set (match_operand:DI 0 "register_operand" "=w,r")
4594 (match_operand:DI 1 "register_operand" "w,r")
4595 (match_operand:DI 2 "register_operand" "w,r"))
4597 (clobber (reg:CC CC_REGNUM))]
4600 "&& reload_completed"
4601 [(set (match_operand:DI 0 "register_operand")
4605 (match_operand:DI 1 "register_operand")
4606 (match_operand:DI 2 "register_operand"))
4609 /* If we are in the general purpose register file,
4610 we split to a sequence of comparison and store. */
4611 if (GP_REGNUM_P (REGNO (operands[0]))
4612 && GP_REGNUM_P (REGNO (operands[1])))
4614 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4615 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4616 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4617 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4618 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4621 /* Otherwise, we expand to a similar pattern which does not
4622 clobber CC_REGNUM. */
4624 [(set_attr "type" "neon_tst,multiple")]
4627 (define_insn "*aarch64_cmtstdi"
4628 [(set (match_operand:DI 0 "register_operand" "=w")
4632 (match_operand:DI 1 "register_operand" "w")
4633 (match_operand:DI 2 "register_operand" "w"))
4636 "cmtst\t%d0, %d1, %d2"
4637 [(set_attr "type" "neon_tst")]
4640 ;; fcm(eq|ge|gt|le|lt)
4642 (define_insn "aarch64_cm<optab><mode>"
4643 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4645 (COMPARISONS:<V_INT_EQUIV>
4646 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4647 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4651 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4652 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4653 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4657 ;; Note we can also handle what would be fac(le|lt) by
4658 ;; generating fac(ge|gt).
4660 (define_insn "aarch64_fac<optab><mode>"
4661 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4663 (FAC_COMPARISONS:<V_INT_EQUIV>
4665 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4667 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4670 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4671 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4676 (define_insn "aarch64_addp<mode>"
4677 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4679 [(match_operand:VD_BHSI 1 "register_operand" "w")
4680 (match_operand:VD_BHSI 2 "register_operand" "w")]
4683 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4684 [(set_attr "type" "neon_reduc_add<q>")]
4687 (define_insn "aarch64_addpdi"
4688 [(set (match_operand:DI 0 "register_operand" "=w")
4690 [(match_operand:V2DI 1 "register_operand" "w")]
4694 [(set_attr "type" "neon_reduc_add")]
4699 (define_expand "sqrt<mode>2"
4700 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4701 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4704 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4708 (define_insn "*sqrt<mode>2"
4709 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4710 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4712 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4713 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4716 ;; Patterns for vector struct loads and stores.
4718 (define_insn "aarch64_simd_ld2<mode>"
4719 [(set (match_operand:OI 0 "register_operand" "=w")
4720 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4721 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4724 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4725 [(set_attr "type" "neon_load2_2reg<q>")]
4728 (define_insn "aarch64_simd_ld2r<mode>"
4729 [(set (match_operand:OI 0 "register_operand" "=w")
4730 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4731 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4734 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4735 [(set_attr "type" "neon_load2_all_lanes<q>")]
4738 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4739 [(set (match_operand:OI 0 "register_operand" "=w")
4740 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4741 (match_operand:OI 2 "register_operand" "0")
4742 (match_operand:SI 3 "immediate_operand" "i")
4743 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4747 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4748 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4750 [(set_attr "type" "neon_load2_one_lane")]
4753 (define_expand "vec_load_lanesoi<mode>"
4754 [(set (match_operand:OI 0 "register_operand" "=w")
4755 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4756 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4760 if (BYTES_BIG_ENDIAN)
4762 rtx tmp = gen_reg_rtx (OImode);
4763 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4764 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4765 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4768 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4772 (define_insn "aarch64_simd_st2<mode>"
4773 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4774 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4775 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4778 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4779 [(set_attr "type" "neon_store2_2reg<q>")]
4782 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4783 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4784 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4785 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4786 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4787 (match_operand:SI 2 "immediate_operand" "i")]
4791 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4792 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4794 [(set_attr "type" "neon_store2_one_lane<q>")]
4797 (define_expand "vec_store_lanesoi<mode>"
4798 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4799 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4800 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4804 if (BYTES_BIG_ENDIAN)
4806 rtx tmp = gen_reg_rtx (OImode);
4807 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4808 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4809 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4812 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4816 (define_insn "aarch64_simd_ld3<mode>"
4817 [(set (match_operand:CI 0 "register_operand" "=w")
4818 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4819 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4822 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4823 [(set_attr "type" "neon_load3_3reg<q>")]
4826 (define_insn "aarch64_simd_ld3r<mode>"
4827 [(set (match_operand:CI 0 "register_operand" "=w")
4828 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4829 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4832 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4833 [(set_attr "type" "neon_load3_all_lanes<q>")]
4836 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4837 [(set (match_operand:CI 0 "register_operand" "=w")
4838 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4839 (match_operand:CI 2 "register_operand" "0")
4840 (match_operand:SI 3 "immediate_operand" "i")
4841 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4845 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4846 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4848 [(set_attr "type" "neon_load3_one_lane")]
4851 (define_expand "vec_load_lanesci<mode>"
4852 [(set (match_operand:CI 0 "register_operand" "=w")
4853 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4854 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4858 if (BYTES_BIG_ENDIAN)
4860 rtx tmp = gen_reg_rtx (CImode);
4861 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4862 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4863 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4866 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4870 (define_insn "aarch64_simd_st3<mode>"
4871 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4872 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4873 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4876 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4877 [(set_attr "type" "neon_store3_3reg<q>")]
4880 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4881 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4882 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4883 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4884 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4885 (match_operand:SI 2 "immediate_operand" "i")]
4889 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4890 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4892 [(set_attr "type" "neon_store3_one_lane<q>")]
4895 (define_expand "vec_store_lanesci<mode>"
4896 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4897 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4898 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4902 if (BYTES_BIG_ENDIAN)
4904 rtx tmp = gen_reg_rtx (CImode);
4905 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4906 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4907 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4910 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4914 (define_insn "aarch64_simd_ld4<mode>"
4915 [(set (match_operand:XI 0 "register_operand" "=w")
4916 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4917 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4920 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4921 [(set_attr "type" "neon_load4_4reg<q>")]
4924 (define_insn "aarch64_simd_ld4r<mode>"
4925 [(set (match_operand:XI 0 "register_operand" "=w")
4926 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4927 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4930 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4931 [(set_attr "type" "neon_load4_all_lanes<q>")]
4934 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4935 [(set (match_operand:XI 0 "register_operand" "=w")
4936 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4937 (match_operand:XI 2 "register_operand" "0")
4938 (match_operand:SI 3 "immediate_operand" "i")
4939 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4943 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4944 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4946 [(set_attr "type" "neon_load4_one_lane")]
4949 (define_expand "vec_load_lanesxi<mode>"
4950 [(set (match_operand:XI 0 "register_operand" "=w")
4951 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4952 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4956 if (BYTES_BIG_ENDIAN)
4958 rtx tmp = gen_reg_rtx (XImode);
4959 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4960 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4961 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4964 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4968 (define_insn "aarch64_simd_st4<mode>"
4969 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4970 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4971 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4974 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4975 [(set_attr "type" "neon_store4_4reg<q>")]
4978 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4979 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4980 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4981 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4982 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4983 (match_operand:SI 2 "immediate_operand" "i")]
4987 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4988 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4990 [(set_attr "type" "neon_store4_one_lane<q>")]
4993 (define_expand "vec_store_lanesxi<mode>"
4994 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4995 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4996 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5000 if (BYTES_BIG_ENDIAN)
5002 rtx tmp = gen_reg_rtx (XImode);
5003 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5004 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5005 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5008 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5012 (define_insn_and_split "aarch64_rev_reglist<mode>"
5013 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5015 [(match_operand:VSTRUCT 1 "register_operand" "w")
5016 (match_operand:V16QI 2 "register_operand" "w")]
5017 UNSPEC_REV_REGLIST))]
5020 "&& reload_completed"
5024 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5025 for (i = 0; i < nregs; i++)
5027 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5028 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5029 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5033 [(set_attr "type" "neon_tbl1_q")
5034 (set_attr "length" "<insn_count>")]
5037 ;; Reload patterns for AdvSIMD register list operands.
5039 (define_expand "mov<mode>"
5040 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5041 (match_operand:VSTRUCT 1 "general_operand" ""))]
5044 if (can_create_pseudo_p ())
5046 if (GET_CODE (operands[0]) != REG)
5047 operands[1] = force_reg (<MODE>mode, operands[1]);
5051 (define_insn "*aarch64_mov<mode>"
5052 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5053 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5054 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5055 && (register_operand (operands[0], <MODE>mode)
5056 || register_operand (operands[1], <MODE>mode))"
5059 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5060 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5061 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5062 neon_load<nregs>_<nregs>reg_q")
5063 (set_attr "length" "<insn_count>,4,4")]
5066 (define_insn "aarch64_be_ld1<mode>"
5067 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5068 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5069 "aarch64_simd_struct_operand" "Utv")]
5072 "ld1\\t{%0<Vmtype>}, %1"
5073 [(set_attr "type" "neon_load1_1reg<q>")]
5076 (define_insn "aarch64_be_st1<mode>"
5077 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5078 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5081 "st1\\t{%1<Vmtype>}, %0"
5082 [(set_attr "type" "neon_store1_1reg<q>")]
5085 (define_insn "*aarch64_be_movoi"
5086 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5087 (match_operand:OI 1 "general_operand" " w,w,m"))]
5088 "TARGET_SIMD && BYTES_BIG_ENDIAN
5089 && (register_operand (operands[0], OImode)
5090 || register_operand (operands[1], OImode))"
5095 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5096 (set_attr "length" "8,4,4")]
5099 (define_insn "*aarch64_be_movci"
5100 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5101 (match_operand:CI 1 "general_operand" " w,w,o"))]
5102 "TARGET_SIMD && BYTES_BIG_ENDIAN
5103 && (register_operand (operands[0], CImode)
5104 || register_operand (operands[1], CImode))"
5106 [(set_attr "type" "multiple")
5107 (set_attr "length" "12,4,4")]
5110 (define_insn "*aarch64_be_movxi"
5111 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5112 (match_operand:XI 1 "general_operand" " w,w,o"))]
5113 "TARGET_SIMD && BYTES_BIG_ENDIAN
5114 && (register_operand (operands[0], XImode)
5115 || register_operand (operands[1], XImode))"
5117 [(set_attr "type" "multiple")
5118 (set_attr "length" "16,4,4")]
5122 [(set (match_operand:OI 0 "register_operand")
5123 (match_operand:OI 1 "register_operand"))]
5124 "TARGET_SIMD && reload_completed"
5127 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5132 [(set (match_operand:CI 0 "nonimmediate_operand")
5133 (match_operand:CI 1 "general_operand"))]
5134 "TARGET_SIMD && reload_completed"
5137 if (register_operand (operands[0], CImode)
5138 && register_operand (operands[1], CImode))
5140 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5143 else if (BYTES_BIG_ENDIAN)
5145 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5146 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5147 emit_move_insn (gen_lowpart (V16QImode,
5148 simplify_gen_subreg (TImode, operands[0],
5150 gen_lowpart (V16QImode,
5151 simplify_gen_subreg (TImode, operands[1],
5160 [(set (match_operand:XI 0 "nonimmediate_operand")
5161 (match_operand:XI 1 "general_operand"))]
5162 "TARGET_SIMD && reload_completed"
5165 if (register_operand (operands[0], XImode)
5166 && register_operand (operands[1], XImode))
5168 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5171 else if (BYTES_BIG_ENDIAN)
5173 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5174 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5175 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5176 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5183 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5184 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5185 (match_operand:DI 1 "register_operand" "w")
5186 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5189 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5190 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5193 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5198 (define_insn "aarch64_ld2<mode>_dreg"
5199 [(set (match_operand:OI 0 "register_operand" "=w")
5200 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5201 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5204 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5205 [(set_attr "type" "neon_load2_2reg<q>")]
5208 (define_insn "aarch64_ld2<mode>_dreg"
5209 [(set (match_operand:OI 0 "register_operand" "=w")
5210 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5211 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5214 "ld1\\t{%S0.1d - %T0.1d}, %1"
5215 [(set_attr "type" "neon_load1_2reg<q>")]
5218 (define_insn "aarch64_ld3<mode>_dreg"
5219 [(set (match_operand:CI 0 "register_operand" "=w")
5220 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5221 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5224 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5225 [(set_attr "type" "neon_load3_3reg<q>")]
5228 (define_insn "aarch64_ld3<mode>_dreg"
5229 [(set (match_operand:CI 0 "register_operand" "=w")
5230 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5231 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5234 "ld1\\t{%S0.1d - %U0.1d}, %1"
5235 [(set_attr "type" "neon_load1_3reg<q>")]
5238 (define_insn "aarch64_ld4<mode>_dreg"
5239 [(set (match_operand:XI 0 "register_operand" "=w")
5240 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5241 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5244 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5245 [(set_attr "type" "neon_load4_4reg<q>")]
5248 (define_insn "aarch64_ld4<mode>_dreg"
5249 [(set (match_operand:XI 0 "register_operand" "=w")
5250 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5251 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5254 "ld1\\t{%S0.1d - %V0.1d}, %1"
5255 [(set_attr "type" "neon_load1_4reg<q>")]
5258 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5259 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5260 (match_operand:DI 1 "register_operand" "r")
5261 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5264 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5265 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5267 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5271 (define_expand "aarch64_ld1<VALL_F16:mode>"
5272 [(match_operand:VALL_F16 0 "register_operand")
5273 (match_operand:DI 1 "register_operand")]
5276 machine_mode mode = <VALL_F16:MODE>mode;
5277 rtx mem = gen_rtx_MEM (mode, operands[1]);
5279 if (BYTES_BIG_ENDIAN)
5280 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5282 emit_move_insn (operands[0], mem);
5286 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5287 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5288 (match_operand:DI 1 "register_operand" "r")
5289 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5292 machine_mode mode = <VSTRUCT:MODE>mode;
5293 rtx mem = gen_rtx_MEM (mode, operands[1]);
5295 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5299 (define_expand "aarch64_ld1x2<VQ:mode>"
5300 [(match_operand:OI 0 "register_operand" "=w")
5301 (match_operand:DI 1 "register_operand" "r")
5302 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5305 machine_mode mode = OImode;
5306 rtx mem = gen_rtx_MEM (mode, operands[1]);
5308 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5312 (define_expand "aarch64_ld1x2<VDC:mode>"
5313 [(match_operand:OI 0 "register_operand" "=w")
5314 (match_operand:DI 1 "register_operand" "r")
5315 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5318 machine_mode mode = OImode;
5319 rtx mem = gen_rtx_MEM (mode, operands[1]);
5321 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5326 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5327 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5328 (match_operand:DI 1 "register_operand" "w")
5329 (match_operand:VSTRUCT 2 "register_operand" "0")
5330 (match_operand:SI 3 "immediate_operand" "i")
5331 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5334 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5335 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5338 aarch64_simd_lane_bounds (operands[3], 0,
5339 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5341 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5342 operands[0], mem, operands[2], operands[3]));
5346 ;; Expanders for builtins to extract vector registers from large
5347 ;; opaque integer modes.
5351 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5352 [(match_operand:VDC 0 "register_operand" "=w")
5353 (match_operand:VSTRUCT 1 "register_operand" "w")
5354 (match_operand:SI 2 "immediate_operand" "i")]
5357 int part = INTVAL (operands[2]);
5358 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5359 int offset = part * 16;
5361 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5362 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5368 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5369 [(match_operand:VQ 0 "register_operand" "=w")
5370 (match_operand:VSTRUCT 1 "register_operand" "w")
5371 (match_operand:SI 2 "immediate_operand" "i")]
5374 int part = INTVAL (operands[2]);
5375 int offset = part * 16;
5377 emit_move_insn (operands[0],
5378 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5382 ;; Permuted-store expanders for neon intrinsics.
5384 ;; Permute instructions
5388 (define_expand "vec_perm_const<mode>"
5389 [(match_operand:VALL_F16 0 "register_operand")
5390 (match_operand:VALL_F16 1 "register_operand")
5391 (match_operand:VALL_F16 2 "register_operand")
5392 (match_operand:<V_INT_EQUIV> 3)]
5395 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5396 operands[2], operands[3], <nunits>))
5402 (define_expand "vec_perm<mode>"
5403 [(match_operand:VB 0 "register_operand")
5404 (match_operand:VB 1 "register_operand")
5405 (match_operand:VB 2 "register_operand")
5406 (match_operand:VB 3 "register_operand")]
5409 aarch64_expand_vec_perm (operands[0], operands[1],
5410 operands[2], operands[3], <nunits>);
5414 (define_insn "aarch64_tbl1<mode>"
5415 [(set (match_operand:VB 0 "register_operand" "=w")
5416 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5417 (match_operand:VB 2 "register_operand" "w")]
5420 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5421 [(set_attr "type" "neon_tbl1<q>")]
5424 ;; Two source registers.
5426 (define_insn "aarch64_tbl2v16qi"
5427 [(set (match_operand:V16QI 0 "register_operand" "=w")
5428 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5429 (match_operand:V16QI 2 "register_operand" "w")]
5432 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5433 [(set_attr "type" "neon_tbl2_q")]
5436 (define_insn "aarch64_tbl3<mode>"
5437 [(set (match_operand:VB 0 "register_operand" "=w")
5438 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5439 (match_operand:VB 2 "register_operand" "w")]
5442 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5443 [(set_attr "type" "neon_tbl3")]
5446 (define_insn "aarch64_tbx4<mode>"
5447 [(set (match_operand:VB 0 "register_operand" "=w")
5448 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5449 (match_operand:OI 2 "register_operand" "w")
5450 (match_operand:VB 3 "register_operand" "w")]
5453 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5454 [(set_attr "type" "neon_tbl4")]
5457 ;; Three source registers.
5459 (define_insn "aarch64_qtbl3<mode>"
5460 [(set (match_operand:VB 0 "register_operand" "=w")
5461 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5462 (match_operand:VB 2 "register_operand" "w")]
5465 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5466 [(set_attr "type" "neon_tbl3")]
5469 (define_insn "aarch64_qtbx3<mode>"
5470 [(set (match_operand:VB 0 "register_operand" "=w")
5471 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5472 (match_operand:CI 2 "register_operand" "w")
5473 (match_operand:VB 3 "register_operand" "w")]
5476 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5477 [(set_attr "type" "neon_tbl3")]
5480 ;; Four source registers.
5482 (define_insn "aarch64_qtbl4<mode>"
5483 [(set (match_operand:VB 0 "register_operand" "=w")
5484 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5485 (match_operand:VB 2 "register_operand" "w")]
5488 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5489 [(set_attr "type" "neon_tbl4")]
5492 (define_insn "aarch64_qtbx4<mode>"
5493 [(set (match_operand:VB 0 "register_operand" "=w")
5494 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5495 (match_operand:XI 2 "register_operand" "w")
5496 (match_operand:VB 3 "register_operand" "w")]
5499 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5500 [(set_attr "type" "neon_tbl4")]
5503 (define_insn_and_split "aarch64_combinev16qi"
5504 [(set (match_operand:OI 0 "register_operand" "=w")
5505 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5506 (match_operand:V16QI 2 "register_operand" "w")]
5510 "&& reload_completed"
5513 aarch64_split_combinev16qi (operands);
5516 [(set_attr "type" "multiple")]
5519 ;; This instruction's pattern is generated directly by
5520 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5521 ;; need corresponding changes there.
5522 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5523 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5524 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5525 (match_operand:VALL_F16 2 "register_operand" "w")]
5528 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5529 [(set_attr "type" "neon_permute<q>")]
5532 ;; This instruction's pattern is generated directly by
5533 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5534 ;; need corresponding changes there. Note that the immediate (third)
5535 ;; operand is a lane index not a byte index.
5536 (define_insn "aarch64_ext<mode>"
5537 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5538 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5539 (match_operand:VALL_F16 2 "register_operand" "w")
5540 (match_operand:SI 3 "immediate_operand" "i")]
5544 operands[3] = GEN_INT (INTVAL (operands[3])
5545 * GET_MODE_UNIT_SIZE (<MODE>mode));
5546 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5548 [(set_attr "type" "neon_ext<q>")]
5551 ;; This instruction's pattern is generated directly by
5552 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5553 ;; need corresponding changes there.
5554 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5555 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5556 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5559 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5560 [(set_attr "type" "neon_rev<q>")]
5563 (define_insn "aarch64_st2<mode>_dreg"
5564 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5565 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5566 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5570 [(set_attr "type" "neon_store2_2reg")]
5573 (define_insn "aarch64_st2<mode>_dreg"
5574 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5575 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5576 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5579 "st1\\t{%S1.1d - %T1.1d}, %0"
5580 [(set_attr "type" "neon_store1_2reg")]
5583 (define_insn "aarch64_st3<mode>_dreg"
5584 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5585 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5586 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5589 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5590 [(set_attr "type" "neon_store3_3reg")]
5593 (define_insn "aarch64_st3<mode>_dreg"
5594 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5595 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5596 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5599 "st1\\t{%S1.1d - %U1.1d}, %0"
5600 [(set_attr "type" "neon_store1_3reg")]
5603 (define_insn "aarch64_st4<mode>_dreg"
5604 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5605 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5606 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5609 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5610 [(set_attr "type" "neon_store4_4reg")]
5613 (define_insn "aarch64_st4<mode>_dreg"
5614 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5615 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5616 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5619 "st1\\t{%S1.1d - %V1.1d}, %0"
5620 [(set_attr "type" "neon_store1_4reg")]
5623 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5624 [(match_operand:DI 0 "register_operand" "r")
5625 (match_operand:VSTRUCT 1 "register_operand" "w")
5626 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5629 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5630 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5632 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5636 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5637 [(match_operand:DI 0 "register_operand" "r")
5638 (match_operand:VSTRUCT 1 "register_operand" "w")
5639 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5642 machine_mode mode = <VSTRUCT:MODE>mode;
5643 rtx mem = gen_rtx_MEM (mode, operands[0]);
5645 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5649 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5650 [(match_operand:DI 0 "register_operand" "r")
5651 (match_operand:VSTRUCT 1 "register_operand" "w")
5652 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5653 (match_operand:SI 2 "immediate_operand")]
5656 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5657 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5660 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5661 mem, operands[1], operands[2]));
5665 (define_expand "aarch64_st1<VALL_F16:mode>"
5666 [(match_operand:DI 0 "register_operand")
5667 (match_operand:VALL_F16 1 "register_operand")]
5670 machine_mode mode = <VALL_F16:MODE>mode;
5671 rtx mem = gen_rtx_MEM (mode, operands[0]);
5673 if (BYTES_BIG_ENDIAN)
5674 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5676 emit_move_insn (mem, operands[1]);
5680 ;; Expander for builtins to insert vector registers into large
5681 ;; opaque integer modes.
5683 ;; Q-register list. We don't need a D-reg inserter as we zero
5684 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5686 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5687 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5688 (match_operand:VSTRUCT 1 "register_operand" "0")
5689 (match_operand:VQ 2 "register_operand" "w")
5690 (match_operand:SI 3 "immediate_operand" "i")]
5693 int part = INTVAL (operands[3]);
5694 int offset = part * 16;
5696 emit_move_insn (operands[0], operands[1]);
5697 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5702 ;; Standard pattern name vec_init<mode><Vel>.
5704 (define_expand "vec_init<mode><Vel>"
5705 [(match_operand:VALL_F16 0 "register_operand" "")
5706 (match_operand 1 "" "")]
5709 aarch64_expand_vector_init (operands[0], operands[1]);
5713 (define_insn "*aarch64_simd_ld1r<mode>"
5714 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5715 (vec_duplicate:VALL_F16
5716 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5718 "ld1r\\t{%0.<Vtype>}, %1"
5719 [(set_attr "type" "neon_load1_all_lanes")]
5722 (define_insn "aarch64_simd_ld1<mode>_x2"
5723 [(set (match_operand:OI 0 "register_operand" "=w")
5724 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5725 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5728 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5729 [(set_attr "type" "neon_load1_2reg<q>")]
5732 (define_insn "aarch64_simd_ld1<mode>_x2"
5733 [(set (match_operand:OI 0 "register_operand" "=w")
5734 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5735 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5738 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5739 [(set_attr "type" "neon_load1_2reg<q>")]
5743 (define_insn "aarch64_frecpe<mode>"
5744 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5745 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5748 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5749 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5752 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5753 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5754 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5757 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5758 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5761 (define_insn "aarch64_frecps<mode>"
5762 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5764 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5765 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5768 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5769 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5772 (define_insn "aarch64_urecpe<mode>"
5773 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5774 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5777 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5778 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5780 ;; Standard pattern name vec_extract<mode><Vel>.
5782 (define_expand "vec_extract<mode><Vel>"
5783 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5784 (match_operand:VALL_F16 1 "register_operand" "")
5785 (match_operand:SI 2 "immediate_operand" "")]
5789 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5795 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5796 [(set (match_operand:V16QI 0 "register_operand" "=w")
5797 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5798 (match_operand:V16QI 2 "register_operand" "w")]
5800 "TARGET_SIMD && TARGET_CRYPTO"
5801 "aes<aes_op>\\t%0.16b, %2.16b"
5802 [(set_attr "type" "crypto_aese")]
5805 ;; When AES/AESMC fusion is enabled we want the register allocation to
5809 ;; So prefer to tie operand 1 to operand 0 when fusing.
5811 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5812 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5813 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5815 "TARGET_SIMD && TARGET_CRYPTO"
5816 "aes<aesmc_op>\\t%0.16b, %1.16b"
5817 [(set_attr "type" "crypto_aesmc")
5818 (set_attr_alternative "enabled"
5819 [(if_then_else (match_test
5820 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5821 (const_string "yes" )
5822 (const_string "no"))
5823 (const_string "yes")])]
5828 (define_insn "aarch64_crypto_sha1hsi"
5829 [(set (match_operand:SI 0 "register_operand" "=w")
5830 (unspec:SI [(match_operand:SI 1
5831 "register_operand" "w")]
5833 "TARGET_SIMD && TARGET_CRYPTO"
5835 [(set_attr "type" "crypto_sha1_fast")]
5838 (define_insn "aarch64_crypto_sha1hv4si"
5839 [(set (match_operand:SI 0 "register_operand" "=w")
5840 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5841 (parallel [(const_int 0)]))]
5843 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5845 [(set_attr "type" "crypto_sha1_fast")]
5848 (define_insn "aarch64_be_crypto_sha1hv4si"
5849 [(set (match_operand:SI 0 "register_operand" "=w")
5850 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5851 (parallel [(const_int 3)]))]
5853 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5855 [(set_attr "type" "crypto_sha1_fast")]
5858 (define_insn "aarch64_crypto_sha1su1v4si"
5859 [(set (match_operand:V4SI 0 "register_operand" "=w")
5860 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5861 (match_operand:V4SI 2 "register_operand" "w")]
5863 "TARGET_SIMD && TARGET_CRYPTO"
5864 "sha1su1\\t%0.4s, %2.4s"
5865 [(set_attr "type" "crypto_sha1_fast")]
5868 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5869 [(set (match_operand:V4SI 0 "register_operand" "=w")
5870 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5871 (match_operand:SI 2 "register_operand" "w")
5872 (match_operand:V4SI 3 "register_operand" "w")]
5874 "TARGET_SIMD && TARGET_CRYPTO"
5875 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5876 [(set_attr "type" "crypto_sha1_slow")]
5879 (define_insn "aarch64_crypto_sha1su0v4si"
5880 [(set (match_operand:V4SI 0 "register_operand" "=w")
5881 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5882 (match_operand:V4SI 2 "register_operand" "w")
5883 (match_operand:V4SI 3 "register_operand" "w")]
5885 "TARGET_SIMD && TARGET_CRYPTO"
5886 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5887 [(set_attr "type" "crypto_sha1_xor")]
5892 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5893 [(set (match_operand:V4SI 0 "register_operand" "=w")
5894 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5895 (match_operand:V4SI 2 "register_operand" "w")
5896 (match_operand:V4SI 3 "register_operand" "w")]
5898 "TARGET_SIMD && TARGET_CRYPTO"
5899 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5900 [(set_attr "type" "crypto_sha256_slow")]
5903 (define_insn "aarch64_crypto_sha256su0v4si"
5904 [(set (match_operand:V4SI 0 "register_operand" "=w")
5905 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5906 (match_operand:V4SI 2 "register_operand" "w")]
5908 "TARGET_SIMD &&TARGET_CRYPTO"
5909 "sha256su0\\t%0.4s, %2.4s"
5910 [(set_attr "type" "crypto_sha256_fast")]
5913 (define_insn "aarch64_crypto_sha256su1v4si"
5914 [(set (match_operand:V4SI 0 "register_operand" "=w")
5915 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5916 (match_operand:V4SI 2 "register_operand" "w")
5917 (match_operand:V4SI 3 "register_operand" "w")]
5919 "TARGET_SIMD &&TARGET_CRYPTO"
5920 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5921 [(set_attr "type" "crypto_sha256_slow")]
5926 (define_insn "aarch64_crypto_pmulldi"
5927 [(set (match_operand:TI 0 "register_operand" "=w")
5928 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5929 (match_operand:DI 2 "register_operand" "w")]
5931 "TARGET_SIMD && TARGET_CRYPTO"
5932 "pmull\\t%0.1q, %1.1d, %2.1d"
5933 [(set_attr "type" "crypto_pmull")]
5936 (define_insn "aarch64_crypto_pmullv2di"
5937 [(set (match_operand:TI 0 "register_operand" "=w")
5938 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5939 (match_operand:V2DI 2 "register_operand" "w")]
5941 "TARGET_SIMD && TARGET_CRYPTO"
5942 "pmull2\\t%0.1q, %1.2d, %2.2d"
5943 [(set_attr "type" "crypto_pmull")]