1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((GET_MODE_SIZE (<MODE>mode) == 16
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || GET_MODE_SIZE (<MODE>mode) == 8)))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1],
125 default: gcc_unreachable ();
128 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
129 neon_logic<q>, neon_to_gp<q>, f_mcr,\
130 mov_reg, neon_move<q>")]
133 (define_insn "*aarch64_simd_mov<VQ:mode>"
134 [(set (match_operand:VQ 0 "nonimmediate_operand"
135 "=w, Umq, m, w, ?r, ?w, ?r, w")
136 (match_operand:VQ 1 "general_operand"
137 "m, Dz, w, w, w, r, r, Dn"))]
139 && (register_operand (operands[0], <MODE>mode)
140 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
142 switch (which_alternative)
145 return "ldr\t%q0, %1";
147 return "stp\txzr, xzr, %0";
149 return "str\t%q1, %0";
151 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
157 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
162 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
163 neon_logic<q>, multiple, multiple,\
164 multiple, neon_move<q>")
165 (set_attr "length" "4,4,4,4,8,8,8,4")]
168 ;; When storing lane zero we can use the normal STR and its more permissive
171 (define_insn "aarch64_store_lane0<mode>"
172 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
173 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
174 (parallel [(match_operand 2 "const_int_operand" "n")])))]
176 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
177 "str\\t%<Vetype>1, %0"
178 [(set_attr "type" "neon_store1_1reg<q>")]
181 (define_insn "load_pair<mode>"
182 [(set (match_operand:VD 0 "register_operand" "=w")
183 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
184 (set (match_operand:VD 2 "register_operand" "=w")
185 (match_operand:VD 3 "memory_operand" "m"))]
187 && rtx_equal_p (XEXP (operands[3], 0),
188 plus_constant (Pmode,
189 XEXP (operands[1], 0),
190 GET_MODE_SIZE (<MODE>mode)))"
192 [(set_attr "type" "neon_ldp")]
195 (define_insn "store_pair<mode>"
196 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
197 (match_operand:VD 1 "register_operand" "w"))
198 (set (match_operand:VD 2 "memory_operand" "=m")
199 (match_operand:VD 3 "register_operand" "w"))]
201 && rtx_equal_p (XEXP (operands[2], 0),
202 plus_constant (Pmode,
203 XEXP (operands[0], 0),
204 GET_MODE_SIZE (<MODE>mode)))"
206 [(set_attr "type" "neon_stp")]
210 [(set (match_operand:VQ 0 "register_operand" "")
211 (match_operand:VQ 1 "register_operand" ""))]
212 "TARGET_SIMD && reload_completed
213 && GP_REGNUM_P (REGNO (operands[0]))
214 && GP_REGNUM_P (REGNO (operands[1]))"
217 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
222 [(set (match_operand:VQ 0 "register_operand" "")
223 (match_operand:VQ 1 "register_operand" ""))]
224 "TARGET_SIMD && reload_completed
225 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
226 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
229 aarch64_split_simd_move (operands[0], operands[1]);
233 (define_expand "aarch64_split_simd_mov<mode>"
234 [(set (match_operand:VQ 0)
235 (match_operand:VQ 1))]
238 rtx dst = operands[0];
239 rtx src = operands[1];
241 if (GP_REGNUM_P (REGNO (src)))
243 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
244 rtx src_high_part = gen_highpart (<VHALF>mode, src);
247 (gen_move_lo_quad_<mode> (dst, src_low_part));
249 (gen_move_hi_quad_<mode> (dst, src_high_part));
254 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
255 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
256 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
257 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
260 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
262 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
268 (define_insn "aarch64_simd_mov_from_<mode>low"
269 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
271 (match_operand:VQ 1 "register_operand" "w")
272 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
273 "TARGET_SIMD && reload_completed"
275 [(set_attr "type" "neon_to_gp<q>")
276 (set_attr "length" "4")
279 (define_insn "aarch64_simd_mov_from_<mode>high"
280 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
282 (match_operand:VQ 1 "register_operand" "w")
283 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
284 "TARGET_SIMD && reload_completed"
286 [(set_attr "type" "neon_to_gp<q>")
287 (set_attr "length" "4")
290 (define_insn "orn<mode>3"
291 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
292 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
293 (match_operand:VDQ_I 2 "register_operand" "w")))]
295 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
296 [(set_attr "type" "neon_logic<q>")]
299 (define_insn "bic<mode>3"
300 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
301 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
302 (match_operand:VDQ_I 2 "register_operand" "w")))]
304 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
305 [(set_attr "type" "neon_logic<q>")]
308 (define_insn "add<mode>3"
309 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
310 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
311 (match_operand:VDQ_I 2 "register_operand" "w")))]
313 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
314 [(set_attr "type" "neon_add<q>")]
317 (define_insn "sub<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
323 [(set_attr "type" "neon_sub<q>")]
326 (define_insn "mul<mode>3"
327 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
328 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
329 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
331 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
332 [(set_attr "type" "neon_mul_<Vetype><q>")]
335 (define_insn "bswap<mode>2"
336 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
337 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
339 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
340 [(set_attr "type" "neon_rev<q>")]
343 (define_insn "aarch64_rbit<mode>"
344 [(set (match_operand:VB 0 "register_operand" "=w")
345 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
348 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
349 [(set_attr "type" "neon_rbit")]
352 (define_expand "ctz<mode>2"
353 [(set (match_operand:VS 0 "register_operand")
354 (ctz:VS (match_operand:VS 1 "register_operand")))]
357 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
358 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
360 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
361 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
366 (define_expand "xorsign<mode>3"
367 [(match_operand:VHSDF 0 "register_operand")
368 (match_operand:VHSDF 1 "register_operand")
369 (match_operand:VHSDF 2 "register_operand")]
373 machine_mode imode = <V_INT_EQUIV>mode;
374 rtx v_bitmask = gen_reg_rtx (imode);
375 rtx op1x = gen_reg_rtx (imode);
376 rtx op2x = gen_reg_rtx (imode);
378 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
379 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
381 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
383 emit_move_insn (v_bitmask,
384 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
385 HOST_WIDE_INT_M1U << bits));
387 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
388 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
389 emit_move_insn (operands[0],
390 lowpart_subreg (<MODE>mode, op1x, imode));
395 ;; These instructions map to the __builtins for the Dot Product operations.
396 (define_insn "aarch64_<sur>dot<vsi2qi>"
397 [(set (match_operand:VS 0 "register_operand" "=w")
398 (plus:VS (match_operand:VS 1 "register_operand" "0")
399 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
400 (match_operand:<VSI2QI> 3 "register_operand" "w")]
403 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
404 [(set_attr "type" "neon_dot")]
407 ;; These expands map to the Dot Product optab the vectorizer checks for.
408 ;; The auto-vectorizer expects a dot product builtin that also does an
409 ;; accumulation into the provided register.
410 ;; Given the following pattern
412 ;; for (i=0; i<len; i++) {
418 ;; This can be auto-vectorized to
419 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
421 ;; given enough iterations. However the vectorizer can keep unrolling the loop
422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
427 (define_expand "<sur>dot_prod<vsi2qi>"
428 [(set (match_operand:VS 0 "register_operand")
429 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
430 (match_operand:<VSI2QI> 2 "register_operand")]
432 (match_operand:VS 3 "register_operand")))]
436 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
438 emit_insn (gen_rtx_SET (operands[0], operands[3]));
442 ;; These instructions map to the __builtins for the Dot Product
443 ;; indexed operations.
444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
445 [(set (match_operand:VS 0 "register_operand" "=w")
446 (plus:VS (match_operand:VS 1 "register_operand" "0")
447 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
448 (match_operand:V8QI 3 "register_operand" "<h_con>")
449 (match_operand:SI 4 "immediate_operand" "i")]
453 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
454 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
456 [(set_attr "type" "neon_dot")]
459 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
460 [(set (match_operand:VS 0 "register_operand" "=w")
461 (plus:VS (match_operand:VS 1 "register_operand" "0")
462 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
463 (match_operand:V16QI 3 "register_operand" "<h_con>")
464 (match_operand:SI 4 "immediate_operand" "i")]
468 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
469 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
471 [(set_attr "type" "neon_dot")]
474 (define_expand "copysign<mode>3"
475 [(match_operand:VHSDF 0 "register_operand")
476 (match_operand:VHSDF 1 "register_operand")
477 (match_operand:VHSDF 2 "register_operand")]
478 "TARGET_FLOAT && TARGET_SIMD"
480 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
481 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
483 emit_move_insn (v_bitmask,
484 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
485 HOST_WIDE_INT_M1U << bits));
486 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
487 operands[2], operands[1]));
492 (define_insn "*aarch64_mul3_elt<mode>"
493 [(set (match_operand:VMUL 0 "register_operand" "=w")
497 (match_operand:VMUL 1 "register_operand" "<h_con>")
498 (parallel [(match_operand:SI 2 "immediate_operand")])))
499 (match_operand:VMUL 3 "register_operand" "w")))]
502 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
503 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
505 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
508 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
509 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
510 (mult:VMUL_CHANGE_NLANES
511 (vec_duplicate:VMUL_CHANGE_NLANES
513 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
514 (parallel [(match_operand:SI 2 "immediate_operand")])))
515 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
518 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
519 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
521 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
524 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
525 [(set (match_operand:VMUL 0 "register_operand" "=w")
528 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
529 (match_operand:VMUL 2 "register_operand" "w")))]
531 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
535 (define_insn "aarch64_rsqrte<mode>"
536 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
537 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
540 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
541 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
543 (define_insn "aarch64_rsqrts<mode>"
544 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
545 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
546 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
549 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
550 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
552 (define_expand "rsqrt<mode>2"
553 [(set (match_operand:VALLF 0 "register_operand" "=w")
554 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
558 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
562 (define_insn "*aarch64_mul3_elt_to_64v2df"
563 [(set (match_operand:DF 0 "register_operand" "=w")
566 (match_operand:V2DF 1 "register_operand" "w")
567 (parallel [(match_operand:SI 2 "immediate_operand")]))
568 (match_operand:DF 3 "register_operand" "w")))]
571 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
572 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
574 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
577 (define_insn "neg<mode>2"
578 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
579 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
581 "neg\t%0.<Vtype>, %1.<Vtype>"
582 [(set_attr "type" "neon_neg<q>")]
585 (define_insn "abs<mode>2"
586 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
587 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
589 "abs\t%0.<Vtype>, %1.<Vtype>"
590 [(set_attr "type" "neon_abs<q>")]
593 ;; The intrinsic version of integer ABS must not be allowed to
594 ;; combine with any operation with an integerated ABS step, such
596 (define_insn "aarch64_abs<mode>"
597 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
599 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
602 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
603 [(set_attr "type" "neon_abs<q>")]
606 (define_insn "abd<mode>_3"
607 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
608 (abs:VDQ_BHSI (minus:VDQ_BHSI
609 (match_operand:VDQ_BHSI 1 "register_operand" "w")
610 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
612 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
613 [(set_attr "type" "neon_abd<q>")]
616 (define_insn "aba<mode>_3"
617 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
618 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
619 (match_operand:VDQ_BHSI 1 "register_operand" "w")
620 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
621 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
623 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
624 [(set_attr "type" "neon_arith_acc<q>")]
627 (define_insn "fabd<mode>3"
628 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
631 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
632 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
634 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
635 [(set_attr "type" "neon_fp_abd_<stype><q>")]
638 ;; For AND (vector, register) and BIC (vector, immediate)
639 (define_insn "and<mode>3"
640 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
641 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
642 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
645 switch (which_alternative)
648 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
650 return aarch64_output_simd_mov_immediate (operands[2],
651 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
656 [(set_attr "type" "neon_logic<q>")]
659 ;; For ORR (vector, register) and ORR (vector, immediate)
660 (define_insn "ior<mode>3"
661 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
662 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
663 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
666 switch (which_alternative)
669 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
671 return aarch64_output_simd_mov_immediate (operands[2],
672 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
677 [(set_attr "type" "neon_logic<q>")]
680 (define_insn "xor<mode>3"
681 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
682 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
683 (match_operand:VDQ_I 2 "register_operand" "w")))]
685 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
686 [(set_attr "type" "neon_logic<q>")]
689 (define_insn "one_cmpl<mode>2"
690 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
691 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
693 "not\t%0.<Vbtype>, %1.<Vbtype>"
694 [(set_attr "type" "neon_logic<q>")]
697 (define_insn "aarch64_simd_vec_set<mode>"
698 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
700 (vec_duplicate:VDQ_BHSI
701 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
702 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
703 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
706 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
707 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
708 switch (which_alternative)
711 return "ins\\t%0.<Vetype>[%p2], %w1";
713 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
715 return "ld1\\t{%0.<Vetype>}[%p2], %1";
720 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
723 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
724 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
726 (vec_duplicate:VALL_F16
728 (match_operand:VALL_F16 3 "register_operand" "w")
730 [(match_operand:SI 4 "immediate_operand" "i")])))
731 (match_operand:VALL_F16 1 "register_operand" "0")
732 (match_operand:SI 2 "immediate_operand" "i")))]
735 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
736 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
737 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
739 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
741 [(set_attr "type" "neon_ins<q>")]
744 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
745 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
746 (vec_merge:VALL_F16_NO_V2Q
747 (vec_duplicate:VALL_F16_NO_V2Q
749 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
751 [(match_operand:SI 4 "immediate_operand" "i")])))
752 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
753 (match_operand:SI 2 "immediate_operand" "i")))]
756 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
757 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
758 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
759 INTVAL (operands[4]));
761 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
763 [(set_attr "type" "neon_ins<q>")]
766 (define_insn "aarch64_simd_lshr<mode>"
767 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
768 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
769 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
771 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
772 [(set_attr "type" "neon_shift_imm<q>")]
775 (define_insn "aarch64_simd_ashr<mode>"
776 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
777 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
778 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
780 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
781 [(set_attr "type" "neon_shift_imm<q>")]
784 (define_insn "aarch64_simd_imm_shl<mode>"
785 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
786 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
787 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
789 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
790 [(set_attr "type" "neon_shift_imm<q>")]
793 (define_insn "aarch64_simd_reg_sshl<mode>"
794 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
795 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
796 (match_operand:VDQ_I 2 "register_operand" "w")))]
798 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
799 [(set_attr "type" "neon_shift_reg<q>")]
802 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
803 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
804 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
805 (match_operand:VDQ_I 2 "register_operand" "w")]
806 UNSPEC_ASHIFT_UNSIGNED))]
808 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
809 [(set_attr "type" "neon_shift_reg<q>")]
812 (define_insn "aarch64_simd_reg_shl<mode>_signed"
813 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
814 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
815 (match_operand:VDQ_I 2 "register_operand" "w")]
816 UNSPEC_ASHIFT_SIGNED))]
818 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
819 [(set_attr "type" "neon_shift_reg<q>")]
822 (define_expand "ashl<mode>3"
823 [(match_operand:VDQ_I 0 "register_operand" "")
824 (match_operand:VDQ_I 1 "register_operand" "")
825 (match_operand:SI 2 "general_operand" "")]
828 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
831 if (CONST_INT_P (operands[2]))
833 shift_amount = INTVAL (operands[2]);
834 if (shift_amount >= 0 && shift_amount < bit_width)
836 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
838 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
845 operands[2] = force_reg (SImode, operands[2]);
848 else if (MEM_P (operands[2]))
850 operands[2] = force_reg (SImode, operands[2]);
853 if (REG_P (operands[2]))
855 rtx tmp = gen_reg_rtx (<MODE>mode);
856 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
857 convert_to_mode (<VEL>mode,
860 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
869 (define_expand "lshr<mode>3"
870 [(match_operand:VDQ_I 0 "register_operand" "")
871 (match_operand:VDQ_I 1 "register_operand" "")
872 (match_operand:SI 2 "general_operand" "")]
875 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
878 if (CONST_INT_P (operands[2]))
880 shift_amount = INTVAL (operands[2]);
881 if (shift_amount > 0 && shift_amount <= bit_width)
883 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
885 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
891 operands[2] = force_reg (SImode, operands[2]);
893 else if (MEM_P (operands[2]))
895 operands[2] = force_reg (SImode, operands[2]);
898 if (REG_P (operands[2]))
900 rtx tmp = gen_reg_rtx (SImode);
901 rtx tmp1 = gen_reg_rtx (<MODE>mode);
902 emit_insn (gen_negsi2 (tmp, operands[2]));
903 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
904 convert_to_mode (<VEL>mode,
906 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
916 (define_expand "ashr<mode>3"
917 [(match_operand:VDQ_I 0 "register_operand" "")
918 (match_operand:VDQ_I 1 "register_operand" "")
919 (match_operand:SI 2 "general_operand" "")]
922 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
925 if (CONST_INT_P (operands[2]))
927 shift_amount = INTVAL (operands[2]);
928 if (shift_amount > 0 && shift_amount <= bit_width)
930 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
932 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
938 operands[2] = force_reg (SImode, operands[2]);
940 else if (MEM_P (operands[2]))
942 operands[2] = force_reg (SImode, operands[2]);
945 if (REG_P (operands[2]))
947 rtx tmp = gen_reg_rtx (SImode);
948 rtx tmp1 = gen_reg_rtx (<MODE>mode);
949 emit_insn (gen_negsi2 (tmp, operands[2]));
950 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
951 convert_to_mode (<VEL>mode,
953 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
963 (define_expand "vashl<mode>3"
964 [(match_operand:VDQ_I 0 "register_operand" "")
965 (match_operand:VDQ_I 1 "register_operand" "")
966 (match_operand:VDQ_I 2 "register_operand" "")]
969 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
974 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
975 ;; Negating individual lanes most certainly offsets the
976 ;; gain from vectorization.
977 (define_expand "vashr<mode>3"
978 [(match_operand:VDQ_BHSI 0 "register_operand" "")
979 (match_operand:VDQ_BHSI 1 "register_operand" "")
980 (match_operand:VDQ_BHSI 2 "register_operand" "")]
983 rtx neg = gen_reg_rtx (<MODE>mode);
984 emit (gen_neg<mode>2 (neg, operands[2]));
985 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
991 (define_expand "aarch64_ashr_simddi"
992 [(match_operand:DI 0 "register_operand" "=w")
993 (match_operand:DI 1 "register_operand" "w")
994 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
997 /* An arithmetic shift right by 64 fills the result with copies of the sign
998 bit, just like asr by 63 - however the standard pattern does not handle
1000 if (INTVAL (operands[2]) == 64)
1001 operands[2] = GEN_INT (63);
1002 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1007 (define_expand "vlshr<mode>3"
1008 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1009 (match_operand:VDQ_BHSI 1 "register_operand" "")
1010 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1013 rtx neg = gen_reg_rtx (<MODE>mode);
1014 emit (gen_neg<mode>2 (neg, operands[2]));
1015 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1020 (define_expand "aarch64_lshr_simddi"
1021 [(match_operand:DI 0 "register_operand" "=w")
1022 (match_operand:DI 1 "register_operand" "w")
1023 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1026 if (INTVAL (operands[2]) == 64)
1027 emit_move_insn (operands[0], const0_rtx);
1029 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1034 (define_expand "vec_set<mode>"
1035 [(match_operand:VDQ_BHSI 0 "register_operand")
1036 (match_operand:<VEL> 1 "register_operand")
1037 (match_operand:SI 2 "immediate_operand")]
1040 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1041 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1042 GEN_INT (elem), operands[0]));
1047 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1048 (define_insn "vec_shr_<mode>"
1049 [(set (match_operand:VD 0 "register_operand" "=w")
1050 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1051 (match_operand:SI 2 "immediate_operand" "i")]
1055 if (BYTES_BIG_ENDIAN)
1056 return "shl %d0, %d1, %2";
1058 return "ushr %d0, %d1, %2";
1060 [(set_attr "type" "neon_shift_imm")]
1063 (define_insn "aarch64_simd_vec_setv2di"
1064 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1067 (match_operand:DI 1 "register_operand" "r,w"))
1068 (match_operand:V2DI 3 "register_operand" "0,0")
1069 (match_operand:SI 2 "immediate_operand" "i,i")))]
1072 int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1073 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1074 switch (which_alternative)
1077 return "ins\\t%0.d[%p2], %1";
1079 return "ins\\t%0.d[%p2], %1.d[0]";
1084 [(set_attr "type" "neon_from_gp, neon_ins_q")]
1087 (define_expand "vec_setv2di"
1088 [(match_operand:V2DI 0 "register_operand")
1089 (match_operand:DI 1 "register_operand")
1090 (match_operand:SI 2 "immediate_operand")]
1093 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1094 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1095 GEN_INT (elem), operands[0]));
1100 (define_insn "aarch64_simd_vec_set<mode>"
1101 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1103 (vec_duplicate:VDQF_F16
1104 (match_operand:<VEL> 1 "register_operand" "w"))
1105 (match_operand:VDQF_F16 3 "register_operand" "0")
1106 (match_operand:SI 2 "immediate_operand" "i")))]
1109 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1111 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1112 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1114 [(set_attr "type" "neon_ins<q>")]
1117 (define_expand "vec_set<mode>"
1118 [(match_operand:VDQF_F16 0 "register_operand" "+w")
1119 (match_operand:<VEL> 1 "register_operand" "w")
1120 (match_operand:SI 2 "immediate_operand" "")]
1123 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1124 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1125 GEN_INT (elem), operands[0]));
1131 (define_insn "aarch64_mla<mode>"
1132 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1133 (plus:VDQ_BHSI (mult:VDQ_BHSI
1134 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1135 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1136 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1138 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1139 [(set_attr "type" "neon_mla_<Vetype><q>")]
1142 (define_insn "*aarch64_mla_elt<mode>"
1143 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1146 (vec_duplicate:VDQHS
1148 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1149 (parallel [(match_operand:SI 2 "immediate_operand")])))
1150 (match_operand:VDQHS 3 "register_operand" "w"))
1151 (match_operand:VDQHS 4 "register_operand" "0")))]
1154 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1155 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1157 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1160 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1161 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1164 (vec_duplicate:VDQHS
1166 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1167 (parallel [(match_operand:SI 2 "immediate_operand")])))
1168 (match_operand:VDQHS 3 "register_operand" "w"))
1169 (match_operand:VDQHS 4 "register_operand" "0")))]
1172 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1173 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1175 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1178 (define_insn "*aarch64_mla_elt_merge<mode>"
1179 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1181 (mult:VDQHS (vec_duplicate:VDQHS
1182 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1183 (match_operand:VDQHS 2 "register_operand" "w"))
1184 (match_operand:VDQHS 3 "register_operand" "0")))]
1186 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1187 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1190 (define_insn "aarch64_mls<mode>"
1191 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1192 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1193 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1194 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1196 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1197 [(set_attr "type" "neon_mla_<Vetype><q>")]
1200 (define_insn "*aarch64_mls_elt<mode>"
1201 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1203 (match_operand:VDQHS 4 "register_operand" "0")
1205 (vec_duplicate:VDQHS
1207 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1208 (parallel [(match_operand:SI 2 "immediate_operand")])))
1209 (match_operand:VDQHS 3 "register_operand" "w"))))]
1212 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1213 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1215 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1218 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1219 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1221 (match_operand:VDQHS 4 "register_operand" "0")
1223 (vec_duplicate:VDQHS
1225 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1226 (parallel [(match_operand:SI 2 "immediate_operand")])))
1227 (match_operand:VDQHS 3 "register_operand" "w"))))]
1230 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1231 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1233 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1236 (define_insn "*aarch64_mls_elt_merge<mode>"
1237 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1239 (match_operand:VDQHS 1 "register_operand" "0")
1240 (mult:VDQHS (vec_duplicate:VDQHS
1241 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1242 (match_operand:VDQHS 3 "register_operand" "w"))))]
1244 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1245 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1248 ;; Max/Min operations.
1249 (define_insn "<su><maxmin><mode>3"
1250 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1251 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1252 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1254 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1255 [(set_attr "type" "neon_minmax<q>")]
1258 (define_expand "<su><maxmin>v2di3"
1259 [(set (match_operand:V2DI 0 "register_operand" "")
1260 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1261 (match_operand:V2DI 2 "register_operand" "")))]
1264 enum rtx_code cmp_operator;
1285 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1286 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1287 operands[2], cmp_fmt, operands[1], operands[2]));
1291 ;; Pairwise Integer Max/Min operations.
1292 (define_insn "aarch64_<maxmin_uns>p<mode>"
1293 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1294 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1295 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1298 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1299 [(set_attr "type" "neon_minmax<q>")]
1302 ;; Pairwise FP Max/Min operations.
1303 (define_insn "aarch64_<maxmin_uns>p<mode>"
1304 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1305 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1306 (match_operand:VHSDF 2 "register_operand" "w")]
1309 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1310 [(set_attr "type" "neon_minmax<q>")]
1313 ;; vec_concat gives a new vector with the low elements from operand 1, and
1314 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1315 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1316 ;; What that means, is that the RTL descriptions of the below patterns
1317 ;; need to change depending on endianness.
1319 ;; Move to the low architectural bits of the register.
1320 ;; On little-endian this is { operand, zeroes }
1321 ;; On big-endian this is { zeroes, operand }
1323 (define_insn "move_lo_quad_internal_<mode>"
1324 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1326 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1327 (vec_duplicate:<VHALF> (const_int 0))))]
1328 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1333 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1334 (set_attr "simd" "yes,*,yes")
1335 (set_attr "fp" "*,yes,*")
1336 (set_attr "length" "4")]
1339 (define_insn "move_lo_quad_internal_<mode>"
1340 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1342 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1344 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1349 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1350 (set_attr "simd" "yes,*,yes")
1351 (set_attr "fp" "*,yes,*")
1352 (set_attr "length" "4")]
1355 (define_insn "move_lo_quad_internal_be_<mode>"
1356 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1358 (vec_duplicate:<VHALF> (const_int 0))
1359 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1360 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1365 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1366 (set_attr "simd" "yes,*,yes")
1367 (set_attr "fp" "*,yes,*")
1368 (set_attr "length" "4")]
1371 (define_insn "move_lo_quad_internal_be_<mode>"
1372 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1375 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1376 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1381 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1382 (set_attr "simd" "yes,*,yes")
1383 (set_attr "fp" "*,yes,*")
1384 (set_attr "length" "4")]
1387 (define_expand "move_lo_quad_<mode>"
1388 [(match_operand:VQ 0 "register_operand")
1389 (match_operand:VQ 1 "register_operand")]
1392 if (BYTES_BIG_ENDIAN)
1393 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1395 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1400 ;; Move operand1 to the high architectural bits of the register, keeping
1401 ;; the low architectural bits of operand2.
1402 ;; For little-endian this is { operand2, operand1 }
1403 ;; For big-endian this is { operand1, operand2 }
1405 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1406 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1410 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1411 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1412 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1414 ins\\t%0.d[1], %1.d[0]
1416 [(set_attr "type" "neon_ins")]
1419 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1420 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1422 (match_operand:<VHALF> 1 "register_operand" "w,r")
1425 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1426 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1428 ins\\t%0.d[1], %1.d[0]
1430 [(set_attr "type" "neon_ins")]
1433 (define_expand "move_hi_quad_<mode>"
1434 [(match_operand:VQ 0 "register_operand" "")
1435 (match_operand:<VHALF> 1 "register_operand" "")]
1438 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1439 if (BYTES_BIG_ENDIAN)
1440 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1443 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1448 ;; Narrowing operations.
1451 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1452 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1453 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1455 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1456 [(set_attr "type" "neon_shift_imm_narrow_q")]
1459 (define_expand "vec_pack_trunc_<mode>"
1460 [(match_operand:<VNARROWD> 0 "register_operand" "")
1461 (match_operand:VDN 1 "register_operand" "")
1462 (match_operand:VDN 2 "register_operand" "")]
1465 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1466 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1467 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1469 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1470 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1471 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1477 (define_insn "vec_pack_trunc_<mode>"
1478 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1479 (vec_concat:<VNARROWQ2>
1480 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1481 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1484 if (BYTES_BIG_ENDIAN)
1485 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1487 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1489 [(set_attr "type" "multiple")
1490 (set_attr "length" "8")]
1493 ;; Widening operations.
1495 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1496 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1497 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1498 (match_operand:VQW 1 "register_operand" "w")
1499 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1502 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1503 [(set_attr "type" "neon_shift_imm_long")]
1506 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1507 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1508 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1509 (match_operand:VQW 1 "register_operand" "w")
1510 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1513 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1514 [(set_attr "type" "neon_shift_imm_long")]
1517 (define_expand "vec_unpack<su>_hi_<mode>"
1518 [(match_operand:<VWIDE> 0 "register_operand" "")
1519 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1522 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1523 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1529 (define_expand "vec_unpack<su>_lo_<mode>"
1530 [(match_operand:<VWIDE> 0 "register_operand" "")
1531 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1534 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1535 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1541 ;; Widening arithmetic.
1543 (define_insn "*aarch64_<su>mlal_lo<mode>"
1544 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1547 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1548 (match_operand:VQW 2 "register_operand" "w")
1549 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1550 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1551 (match_operand:VQW 4 "register_operand" "w")
1553 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1555 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1556 [(set_attr "type" "neon_mla_<Vetype>_long")]
1559 (define_insn "*aarch64_<su>mlal_hi<mode>"
1560 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1563 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1564 (match_operand:VQW 2 "register_operand" "w")
1565 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1566 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1567 (match_operand:VQW 4 "register_operand" "w")
1569 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1571 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1572 [(set_attr "type" "neon_mla_<Vetype>_long")]
1575 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1576 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1578 (match_operand:<VWIDE> 1 "register_operand" "0")
1580 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581 (match_operand:VQW 2 "register_operand" "w")
1582 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1583 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584 (match_operand:VQW 4 "register_operand" "w")
1587 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1588 [(set_attr "type" "neon_mla_<Vetype>_long")]
1591 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1592 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1594 (match_operand:<VWIDE> 1 "register_operand" "0")
1596 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1597 (match_operand:VQW 2 "register_operand" "w")
1598 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1599 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1600 (match_operand:VQW 4 "register_operand" "w")
1603 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1604 [(set_attr "type" "neon_mla_<Vetype>_long")]
1607 (define_insn "*aarch64_<su>mlal<mode>"
1608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1612 (match_operand:VD_BHSI 1 "register_operand" "w"))
1614 (match_operand:VD_BHSI 2 "register_operand" "w")))
1615 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1617 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1618 [(set_attr "type" "neon_mla_<Vetype>_long")]
1621 (define_insn "*aarch64_<su>mlsl<mode>"
1622 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1624 (match_operand:<VWIDE> 1 "register_operand" "0")
1627 (match_operand:VD_BHSI 2 "register_operand" "w"))
1629 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1631 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1632 [(set_attr "type" "neon_mla_<Vetype>_long")]
1635 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1636 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1637 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1638 (match_operand:VQW 1 "register_operand" "w")
1639 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1640 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1641 (match_operand:VQW 2 "register_operand" "w")
1644 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1645 [(set_attr "type" "neon_mul_<Vetype>_long")]
1648 (define_expand "vec_widen_<su>mult_lo_<mode>"
1649 [(match_operand:<VWIDE> 0 "register_operand" "")
1650 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1651 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1654 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1655 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1662 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1663 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1664 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665 (match_operand:VQW 1 "register_operand" "w")
1666 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668 (match_operand:VQW 2 "register_operand" "w")
1671 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1672 [(set_attr "type" "neon_mul_<Vetype>_long")]
1675 (define_expand "vec_widen_<su>mult_hi_<mode>"
1676 [(match_operand:<VWIDE> 0 "register_operand" "")
1677 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1678 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1681 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1682 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1690 ;; FP vector operations.
1691 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1692 ;; double-precision (64-bit) floating-point data types and arithmetic as
1693 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1694 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1696 ;; Floating-point operations can raise an exception. Vectorizing such
1697 ;; operations are safe because of reasons explained below.
1699 ;; ARMv8 permits an extension to enable trapped floating-point
1700 ;; exception handling, however this is an optional feature. In the
1701 ;; event of a floating-point exception being raised by vectorised
1703 ;; 1. If trapped floating-point exceptions are available, then a trap
1704 ;; will be taken when any lane raises an enabled exception. A trap
1705 ;; handler may determine which lane raised the exception.
1706 ;; 2. Alternatively a sticky exception flag is set in the
1707 ;; floating-point status register (FPSR). Software may explicitly
1708 ;; test the exception flags, in which case the tests will either
1709 ;; prevent vectorisation, allowing precise identification of the
1710 ;; failing operation, or if tested outside of vectorisable regions
1711 ;; then the specific operation and lane are not of interest.
1713 ;; FP arithmetic operations.
1715 (define_insn "add<mode>3"
1716 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1717 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1718 (match_operand:VHSDF 2 "register_operand" "w")))]
1720 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1721 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1724 (define_insn "sub<mode>3"
1725 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1726 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1727 (match_operand:VHSDF 2 "register_operand" "w")))]
1729 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1730 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1733 (define_insn "mul<mode>3"
1734 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1735 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1736 (match_operand:VHSDF 2 "register_operand" "w")))]
1738 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1739 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1742 (define_expand "div<mode>3"
1743 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1744 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1745 (match_operand:VHSDF 2 "register_operand" "w")))]
1748 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1751 operands[1] = force_reg (<MODE>mode, operands[1]);
1754 (define_insn "*div<mode>3"
1755 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757 (match_operand:VHSDF 2 "register_operand" "w")))]
1759 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760 [(set_attr "type" "neon_fp_div_<stype><q>")]
1763 (define_insn "neg<mode>2"
1764 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1767 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1768 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1771 (define_insn "abs<mode>2"
1772 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1775 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1776 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1779 (define_insn "fma<mode>4"
1780 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1781 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1782 (match_operand:VHSDF 2 "register_operand" "w")
1783 (match_operand:VHSDF 3 "register_operand" "0")))]
1785 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1789 (define_insn "*aarch64_fma4_elt<mode>"
1790 [(set (match_operand:VDQF 0 "register_operand" "=w")
1794 (match_operand:VDQF 1 "register_operand" "<h_con>")
1795 (parallel [(match_operand:SI 2 "immediate_operand")])))
1796 (match_operand:VDQF 3 "register_operand" "w")
1797 (match_operand:VDQF 4 "register_operand" "0")))]
1800 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1801 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1803 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1806 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1807 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1809 (vec_duplicate:VDQSF
1811 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1812 (parallel [(match_operand:SI 2 "immediate_operand")])))
1813 (match_operand:VDQSF 3 "register_operand" "w")
1814 (match_operand:VDQSF 4 "register_operand" "0")))]
1817 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1818 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1820 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1823 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1824 [(set (match_operand:VMUL 0 "register_operand" "=w")
1827 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1828 (match_operand:VMUL 2 "register_operand" "w")
1829 (match_operand:VMUL 3 "register_operand" "0")))]
1831 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1832 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1835 (define_insn "*aarch64_fma4_elt_to_64v2df"
1836 [(set (match_operand:DF 0 "register_operand" "=w")
1839 (match_operand:V2DF 1 "register_operand" "w")
1840 (parallel [(match_operand:SI 2 "immediate_operand")]))
1841 (match_operand:DF 3 "register_operand" "w")
1842 (match_operand:DF 4 "register_operand" "0")))]
1845 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1846 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1848 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1851 (define_insn "fnma<mode>4"
1852 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1854 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1855 (match_operand:VHSDF 2 "register_operand" "w")
1856 (match_operand:VHSDF 3 "register_operand" "0")))]
1858 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1859 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1862 (define_insn "*aarch64_fnma4_elt<mode>"
1863 [(set (match_operand:VDQF 0 "register_operand" "=w")
1866 (match_operand:VDQF 3 "register_operand" "w"))
1869 (match_operand:VDQF 1 "register_operand" "<h_con>")
1870 (parallel [(match_operand:SI 2 "immediate_operand")])))
1871 (match_operand:VDQF 4 "register_operand" "0")))]
1874 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1875 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1877 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1880 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1881 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1884 (match_operand:VDQSF 3 "register_operand" "w"))
1885 (vec_duplicate:VDQSF
1887 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1888 (parallel [(match_operand:SI 2 "immediate_operand")])))
1889 (match_operand:VDQSF 4 "register_operand" "0")))]
1892 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1893 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1895 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1898 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1899 [(set (match_operand:VMUL 0 "register_operand" "=w")
1902 (match_operand:VMUL 2 "register_operand" "w"))
1904 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1905 (match_operand:VMUL 3 "register_operand" "0")))]
1907 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1908 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1911 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1912 [(set (match_operand:DF 0 "register_operand" "=w")
1915 (match_operand:V2DF 1 "register_operand" "w")
1916 (parallel [(match_operand:SI 2 "immediate_operand")]))
1918 (match_operand:DF 3 "register_operand" "w"))
1919 (match_operand:DF 4 "register_operand" "0")))]
1922 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1923 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1925 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1928 ;; Vector versions of the floating-point frint patterns.
1929 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1930 (define_insn "<frint_pattern><mode>2"
1931 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1932 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1935 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1936 [(set_attr "type" "neon_fp_round_<stype><q>")]
1939 ;; Vector versions of the fcvt standard patterns.
1940 ;; Expands to lbtrunc, lround, lceil, lfloor
1941 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1942 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1943 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1944 [(match_operand:VHSDF 1 "register_operand" "w")]
1947 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1948 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1951 ;; HF Scalar variants of related SIMD instructions.
1952 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1953 [(set (match_operand:HI 0 "register_operand" "=w")
1954 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1956 "TARGET_SIMD_F16INST"
1957 "fcvt<frint_suffix><su>\t%h0, %h1"
1958 [(set_attr "type" "neon_fp_to_int_s")]
1961 (define_insn "<optab>_trunchfhi2"
1962 [(set (match_operand:HI 0 "register_operand" "=w")
1963 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1964 "TARGET_SIMD_F16INST"
1965 "fcvtz<su>\t%h0, %h1"
1966 [(set_attr "type" "neon_fp_to_int_s")]
1969 (define_insn "<optab>hihf2"
1970 [(set (match_operand:HF 0 "register_operand" "=w")
1971 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1972 "TARGET_SIMD_F16INST"
1973 "<su_optab>cvtf\t%h0, %h1"
1974 [(set_attr "type" "neon_int_to_fp_s")]
1977 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1978 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1979 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1981 (match_operand:VDQF 1 "register_operand" "w")
1982 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1985 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1986 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1988 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1990 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1991 output_asm_insn (buf, operands);
1994 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1997 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1998 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1999 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2000 [(match_operand:VHSDF 1 "register_operand")]
2005 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2006 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2007 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2008 [(match_operand:VHSDF 1 "register_operand")]
2013 (define_expand "ftrunc<VHSDF:mode>2"
2014 [(set (match_operand:VHSDF 0 "register_operand")
2015 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2020 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2021 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2023 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2025 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2026 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2029 ;; Conversions between vectors of floats and doubles.
2030 ;; Contains a mix of patterns to match standard pattern names
2031 ;; and those for intrinsics.
2033 ;; Float widening operations.
2035 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2036 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2037 (float_extend:<VWIDE> (vec_select:<VHALF>
2038 (match_operand:VQ_HSF 1 "register_operand" "w")
2039 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2042 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2043 [(set_attr "type" "neon_fp_cvt_widen_s")]
2046 ;; Convert between fixed-point and floating-point (vector modes)
2048 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2049 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2050 (unspec:<VHSDF:FCVT_TARGET>
2051 [(match_operand:VHSDF 1 "register_operand" "w")
2052 (match_operand:SI 2 "immediate_operand" "i")]
2055 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2056 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2059 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2060 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2061 (unspec:<VDQ_HSDI:FCVT_TARGET>
2062 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2063 (match_operand:SI 2 "immediate_operand" "i")]
2066 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2067 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2070 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2071 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2072 ;; the meaning of HI and LO changes depending on the target endianness.
2073 ;; While elsewhere we map the higher numbered elements of a vector to
2074 ;; the lower architectural lanes of the vector, for these patterns we want
2075 ;; to always treat "hi" as referring to the higher architectural lanes.
2076 ;; Consequently, while the patterns below look inconsistent with our
2077 ;; other big-endian patterns their behavior is as required.
2079 (define_expand "vec_unpacks_lo_<mode>"
2080 [(match_operand:<VWIDE> 0 "register_operand" "")
2081 (match_operand:VQ_HSF 1 "register_operand" "")]
2084 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2085 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2091 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093 (float_extend:<VWIDE> (vec_select:<VHALF>
2094 (match_operand:VQ_HSF 1 "register_operand" "w")
2095 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2098 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2099 [(set_attr "type" "neon_fp_cvt_widen_s")]
2102 (define_expand "vec_unpacks_hi_<mode>"
2103 [(match_operand:<VWIDE> 0 "register_operand" "")
2104 (match_operand:VQ_HSF 1 "register_operand" "")]
2107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2113 (define_insn "aarch64_float_extend_lo_<Vwide>"
2114 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2115 (float_extend:<VWIDE>
2116 (match_operand:VDF 1 "register_operand" "w")))]
2118 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2119 [(set_attr "type" "neon_fp_cvt_widen_s")]
2122 ;; Float narrowing operations.
2124 (define_insn "aarch64_float_truncate_lo_<mode>"
2125 [(set (match_operand:VDF 0 "register_operand" "=w")
2127 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2129 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2130 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2133 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2134 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2136 (match_operand:VDF 1 "register_operand" "0")
2138 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2139 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2140 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2141 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2144 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2145 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2148 (match_operand:<VWIDE> 2 "register_operand" "w"))
2149 (match_operand:VDF 1 "register_operand" "0")))]
2150 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2151 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2152 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2155 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2156 [(match_operand:<VDBL> 0 "register_operand" "=w")
2157 (match_operand:VDF 1 "register_operand" "0")
2158 (match_operand:<VWIDE> 2 "register_operand" "w")]
2161 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2162 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2163 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2164 emit_insn (gen (operands[0], operands[1], operands[2]));
2169 (define_expand "vec_pack_trunc_v2df"
2170 [(set (match_operand:V4SF 0 "register_operand")
2172 (float_truncate:V2SF
2173 (match_operand:V2DF 1 "register_operand"))
2174 (float_truncate:V2SF
2175 (match_operand:V2DF 2 "register_operand"))
2179 rtx tmp = gen_reg_rtx (V2SFmode);
2180 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2181 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2183 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2184 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2185 tmp, operands[hi]));
2190 (define_expand "vec_pack_trunc_df"
2191 [(set (match_operand:V2SF 0 "register_operand")
2194 (match_operand:DF 1 "register_operand"))
2196 (match_operand:DF 2 "register_operand"))
2200 rtx tmp = gen_reg_rtx (V2SFmode);
2201 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2204 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2205 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2206 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2212 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2214 ;; a = (b < c) ? b : c;
2215 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2216 ;; either explicitly or indirectly via -ffast-math.
2218 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2219 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2220 ;; operand will be returned when both operands are zero (i.e. they may not
2221 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2222 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2225 (define_insn "<su><maxmin><mode>3"
2226 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2227 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2228 (match_operand:VHSDF 2 "register_operand" "w")))]
2230 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2231 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2234 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2235 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2236 ;; which implement the IEEE fmax ()/fmin () functions.
2237 (define_insn "<maxmin_uns><mode>3"
2238 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2239 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2240 (match_operand:VHSDF 2 "register_operand" "w")]
2243 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2244 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2247 ;; 'across lanes' add.
2249 (define_expand "reduc_plus_scal_<mode>"
2250 [(match_operand:<VEL> 0 "register_operand" "=w")
2251 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2255 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2256 rtx scratch = gen_reg_rtx (<MODE>mode);
2257 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2258 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2263 (define_insn "aarch64_faddp<mode>"
2264 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2265 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2266 (match_operand:VHSDF 2 "register_operand" "w")]
2269 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2270 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2273 (define_insn "aarch64_reduc_plus_internal<mode>"
2274 [(set (match_operand:VDQV 0 "register_operand" "=w")
2275 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2278 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2279 [(set_attr "type" "neon_reduc_add<q>")]
2282 (define_insn "aarch64_reduc_plus_internalv2si"
2283 [(set (match_operand:V2SI 0 "register_operand" "=w")
2284 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2287 "addp\\t%0.2s, %1.2s, %1.2s"
2288 [(set_attr "type" "neon_reduc_add")]
2291 (define_insn "reduc_plus_scal_<mode>"
2292 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2293 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2296 "faddp\\t%<Vetype>0, %1.<Vtype>"
2297 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2300 (define_expand "reduc_plus_scal_v4sf"
2301 [(set (match_operand:SF 0 "register_operand")
2302 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2306 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2307 rtx scratch = gen_reg_rtx (V4SFmode);
2308 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2309 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2310 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2314 (define_insn "clrsb<mode>2"
2315 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2316 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2318 "cls\\t%0.<Vtype>, %1.<Vtype>"
2319 [(set_attr "type" "neon_cls<q>")]
2322 (define_insn "clz<mode>2"
2323 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2324 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2326 "clz\\t%0.<Vtype>, %1.<Vtype>"
2327 [(set_attr "type" "neon_cls<q>")]
2330 (define_insn "popcount<mode>2"
2331 [(set (match_operand:VB 0 "register_operand" "=w")
2332 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2334 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2335 [(set_attr "type" "neon_cnt<q>")]
2338 ;; 'across lanes' max and min ops.
2340 ;; Template for outputting a scalar, so we can create __builtins which can be
2341 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2342 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2343 [(match_operand:<VEL> 0 "register_operand")
2344 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2348 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2349 rtx scratch = gen_reg_rtx (<MODE>mode);
2350 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2352 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2357 ;; Likewise for integer cases, signed and unsigned.
2358 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2359 [(match_operand:<VEL> 0 "register_operand")
2360 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2364 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2365 rtx scratch = gen_reg_rtx (<MODE>mode);
2366 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2368 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2373 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2374 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2375 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2378 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2379 [(set_attr "type" "neon_reduc_minmax<q>")]
2382 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2383 [(set (match_operand:V2SI 0 "register_operand" "=w")
2384 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2387 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2388 [(set_attr "type" "neon_reduc_minmax")]
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2393 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2396 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2397 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2400 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2402 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2405 ;; Thus our BSL is of the form:
2406 ;; op0 = bsl (mask, op2, op3)
2407 ;; We can use any of:
2410 ;; bsl mask, op1, op2
2411 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2412 ;; bit op0, op2, mask
2413 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2414 ;; bif op0, op1, mask
2416 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2417 ;; Some forms of straight-line code may generate the equivalent form
2418 ;; in *aarch64_simd_bsl<mode>_alt.
2420 (define_insn "aarch64_simd_bsl<mode>_internal"
2421 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2425 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2426 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2427 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2428 (match_dup:<V_INT_EQUIV> 3)
2432 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2433 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2434 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2435 [(set_attr "type" "neon_bsl<q>")]
2438 ;; We need this form in addition to the above pattern to match the case
2439 ;; when combine tries merging three insns such that the second operand of
2440 ;; the outer XOR matches the second operand of the inner XOR rather than
2441 ;; the first. The two are equivalent but since recog doesn't try all
2442 ;; permutations of commutative operations, we have to have a separate pattern.
2444 (define_insn "*aarch64_simd_bsl<mode>_alt"
2445 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2449 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2450 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2451 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2452 (match_dup:<V_INT_EQUIV> 2)))]
2455 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2456 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2457 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2458 [(set_attr "type" "neon_bsl<q>")]
2461 ;; DImode is special, we want to avoid computing operations which are
2462 ;; more naturally computed in general purpose registers in the vector
2463 ;; registers. If we do that, we need to move all three operands from general
2464 ;; purpose registers to vector registers, then back again. However, we
2465 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2466 ;; optimizations based on the component operations of a BSL.
2468 ;; That means we need a splitter back to the individual operations, if they
2469 ;; would be better calculated on the integer side.
2471 (define_insn_and_split "aarch64_simd_bsldi_internal"
2472 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2476 (match_operand:DI 3 "register_operand" "w,0,w,r")
2477 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2478 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2483 bsl\\t%0.8b, %2.8b, %3.8b
2484 bit\\t%0.8b, %2.8b, %1.8b
2485 bif\\t%0.8b, %3.8b, %1.8b
2487 "&& GP_REGNUM_P (REGNO (operands[0]))"
2488 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2490 /* Split back to individual operations. If we're before reload, and
2491 able to create a temporary register, do so. If we're after reload,
2492 we've got an early-clobber destination register, so use that.
2493 Otherwise, we can't create pseudos and we can't yet guarantee that
2494 operands[0] is safe to write, so FAIL to split. */
2497 if (reload_completed)
2498 scratch = operands[0];
2499 else if (can_create_pseudo_p ())
2500 scratch = gen_reg_rtx (DImode);
2504 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2505 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2506 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2509 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2510 (set_attr "length" "4,4,4,12")]
2513 (define_insn_and_split "aarch64_simd_bsldi_alt"
2514 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2518 (match_operand:DI 3 "register_operand" "w,w,0,r")
2519 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2520 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2525 bsl\\t%0.8b, %3.8b, %2.8b
2526 bit\\t%0.8b, %3.8b, %1.8b
2527 bif\\t%0.8b, %2.8b, %1.8b
2529 "&& GP_REGNUM_P (REGNO (operands[0]))"
2530 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2532 /* Split back to individual operations. If we're before reload, and
2533 able to create a temporary register, do so. If we're after reload,
2534 we've got an early-clobber destination register, so use that.
2535 Otherwise, we can't create pseudos and we can't yet guarantee that
2536 operands[0] is safe to write, so FAIL to split. */
2539 if (reload_completed)
2540 scratch = operands[0];
2541 else if (can_create_pseudo_p ())
2542 scratch = gen_reg_rtx (DImode);
2546 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2547 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2548 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2551 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2552 (set_attr "length" "4,4,4,12")]
2555 (define_expand "aarch64_simd_bsl<mode>"
2556 [(match_operand:VALLDIF 0 "register_operand")
2557 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2558 (match_operand:VALLDIF 2 "register_operand")
2559 (match_operand:VALLDIF 3 "register_operand")]
2562 /* We can't alias operands together if they have different modes. */
2563 rtx tmp = operands[0];
2564 if (FLOAT_MODE_P (<MODE>mode))
2566 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2567 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2568 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2570 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2571 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2575 if (tmp != operands[0])
2576 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2581 (define_expand "vcond_mask_<mode><v_int_equiv>"
2582 [(match_operand:VALLDI 0 "register_operand")
2583 (match_operand:VALLDI 1 "nonmemory_operand")
2584 (match_operand:VALLDI 2 "nonmemory_operand")
2585 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2588 /* If we have (a = (P) ? -1 : 0);
2589 Then we can simply move the generated mask (result must be int). */
2590 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2591 && operands[2] == CONST0_RTX (<MODE>mode))
2592 emit_move_insn (operands[0], operands[3]);
2593 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2594 else if (operands[1] == CONST0_RTX (<MODE>mode)
2595 && operands[2] == CONSTM1_RTX (<MODE>mode))
2596 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2599 if (!REG_P (operands[1]))
2600 operands[1] = force_reg (<MODE>mode, operands[1]);
2601 if (!REG_P (operands[2]))
2602 operands[2] = force_reg (<MODE>mode, operands[2]);
2603 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2604 operands[1], operands[2]));
2610 ;; Patterns comparing two vectors to produce a mask.
2612 (define_expand "vec_cmp<mode><mode>"
2613 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2614 (match_operator 1 "comparison_operator"
2615 [(match_operand:VSDQ_I_DI 2 "register_operand")
2616 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2619 rtx mask = operands[0];
2620 enum rtx_code code = GET_CODE (operands[1]);
2630 if (operands[3] == CONST0_RTX (<MODE>mode))
2635 if (!REG_P (operands[3]))
2636 operands[3] = force_reg (<MODE>mode, operands[3]);
2644 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2648 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2652 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2656 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2660 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2664 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2668 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2672 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2676 /* Handle NE as !EQ. */
2677 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2678 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2682 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2692 (define_expand "vec_cmp<mode><v_int_equiv>"
2693 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2694 (match_operator 1 "comparison_operator"
2695 [(match_operand:VDQF 2 "register_operand")
2696 (match_operand:VDQF 3 "nonmemory_operand")]))]
2699 int use_zero_form = 0;
2700 enum rtx_code code = GET_CODE (operands[1]);
2701 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2703 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2712 if (operands[3] == CONST0_RTX (<MODE>mode))
2719 if (!REG_P (operands[3]))
2720 operands[3] = force_reg (<MODE>mode, operands[3]);
2730 comparison = gen_aarch64_cmlt<mode>;
2735 std::swap (operands[2], operands[3]);
2739 comparison = gen_aarch64_cmgt<mode>;
2744 comparison = gen_aarch64_cmle<mode>;
2749 std::swap (operands[2], operands[3]);
2753 comparison = gen_aarch64_cmge<mode>;
2757 comparison = gen_aarch64_cmeq<mode>;
2774 /* FCM returns false for lanes which are unordered, so if we use
2775 the inverse of the comparison we actually want to emit, then
2776 invert the result, we will end up with the correct result.
2777 Note that a NE NaN and NaN NE b are true for all a, b.
2779 Our transformations are:
2780 a UNGE b -> !(b GT a)
2781 a UNGT b -> !(b GE a)
2782 a UNLE b -> !(a GT b)
2783 a UNLT b -> !(a GE b)
2784 a NE b -> !(a EQ b) */
2785 gcc_assert (comparison != NULL);
2786 emit_insn (comparison (operands[0], operands[2], operands[3]));
2787 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2795 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2796 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2802 gcc_assert (comparison != NULL);
2803 emit_insn (comparison (operands[0], operands[2], operands[3]));
2807 /* We first check (a > b || b > a) which is !UNEQ, inverting
2808 this result will then give us (a == b || a UNORDERED b). */
2809 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2810 operands[2], operands[3]));
2811 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2812 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2813 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2817 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2818 UNORDERED as !ORDERED. */
2819 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2820 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2821 operands[3], operands[2]));
2822 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2823 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2827 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2828 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2829 operands[3], operands[2]));
2830 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2840 (define_expand "vec_cmpu<mode><mode>"
2841 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2842 (match_operator 1 "comparison_operator"
2843 [(match_operand:VSDQ_I_DI 2 "register_operand")
2844 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2847 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2848 operands[2], operands[3]));
2852 (define_expand "vcond<mode><mode>"
2853 [(set (match_operand:VALLDI 0 "register_operand")
2854 (if_then_else:VALLDI
2855 (match_operator 3 "comparison_operator"
2856 [(match_operand:VALLDI 4 "register_operand")
2857 (match_operand:VALLDI 5 "nonmemory_operand")])
2858 (match_operand:VALLDI 1 "nonmemory_operand")
2859 (match_operand:VALLDI 2 "nonmemory_operand")))]
2862 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2863 enum rtx_code code = GET_CODE (operands[3]);
2865 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2866 it as well as switch operands 1/2 in order to avoid the additional
2870 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2871 operands[4], operands[5]);
2872 std::swap (operands[1], operands[2]);
2874 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2875 operands[4], operands[5]));
2876 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2877 operands[2], mask));
2882 (define_expand "vcond<v_cmp_mixed><mode>"
2883 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2884 (if_then_else:<V_cmp_mixed>
2885 (match_operator 3 "comparison_operator"
2886 [(match_operand:VDQF_COND 4 "register_operand")
2887 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2888 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2889 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2892 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2893 enum rtx_code code = GET_CODE (operands[3]);
2895 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2896 it as well as switch operands 1/2 in order to avoid the additional
2900 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2901 operands[4], operands[5]);
2902 std::swap (operands[1], operands[2]);
2904 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2905 operands[4], operands[5]));
2906 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2907 operands[0], operands[1],
2908 operands[2], mask));
2913 (define_expand "vcondu<mode><mode>"
2914 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2915 (if_then_else:VSDQ_I_DI
2916 (match_operator 3 "comparison_operator"
2917 [(match_operand:VSDQ_I_DI 4 "register_operand")
2918 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2919 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2920 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2923 rtx mask = gen_reg_rtx (<MODE>mode);
2924 enum rtx_code code = GET_CODE (operands[3]);
2926 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2927 it as well as switch operands 1/2 in order to avoid the additional
2931 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2932 operands[4], operands[5]);
2933 std::swap (operands[1], operands[2]);
2935 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2936 operands[4], operands[5]));
2937 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2938 operands[2], mask));
2942 (define_expand "vcondu<mode><v_cmp_mixed>"
2943 [(set (match_operand:VDQF 0 "register_operand")
2945 (match_operator 3 "comparison_operator"
2946 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2947 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2948 (match_operand:VDQF 1 "nonmemory_operand")
2949 (match_operand:VDQF 2 "nonmemory_operand")))]
2952 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2953 enum rtx_code code = GET_CODE (operands[3]);
2955 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2956 it as well as switch operands 1/2 in order to avoid the additional
2960 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2961 operands[4], operands[5]);
2962 std::swap (operands[1], operands[2]);
2964 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2966 operands[4], operands[5]));
2967 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2968 operands[2], mask));
2972 ;; Patterns for AArch64 SIMD Intrinsics.
2974 ;; Lane extraction with sign extension to general purpose register.
2975 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2976 [(set (match_operand:GPI 0 "register_operand" "=r")
2979 (match_operand:VDQQH 1 "register_operand" "w")
2980 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2983 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2984 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2986 [(set_attr "type" "neon_to_gp<q>")]
2989 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2990 [(set (match_operand:SI 0 "register_operand" "=r")
2993 (match_operand:VDQQH 1 "register_operand" "w")
2994 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2997 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2998 return "umov\\t%w0, %1.<Vetype>[%2]";
3000 [(set_attr "type" "neon_to_gp<q>")]
3003 ;; Lane extraction of a value, neither sign nor zero extension
3004 ;; is guaranteed so upper bits should be considered undefined.
3005 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3006 (define_insn "aarch64_get_lane<mode>"
3007 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3009 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3010 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3013 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3014 switch (which_alternative)
3017 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3019 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3021 return "st1\\t{%1.<Vetype>}[%2], %0";
3026 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3029 (define_insn "load_pair_lanes<mode>"
3030 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3032 (match_operand:VDC 1 "memory_operand" "Utq")
3033 (match_operand:VDC 2 "memory_operand" "m")))]
3034 "TARGET_SIMD && !STRICT_ALIGNMENT
3035 && rtx_equal_p (XEXP (operands[2], 0),
3036 plus_constant (Pmode,
3037 XEXP (operands[1], 0),
3038 GET_MODE_SIZE (<MODE>mode)))"
3040 [(set_attr "type" "neon_load1_1reg_q")]
3043 (define_insn "store_pair_lanes<mode>"
3044 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3046 (match_operand:VDC 1 "register_operand" "w, r")
3047 (match_operand:VDC 2 "register_operand" "w, r")))]
3051 stp\\t%x1, %x2, %y0"
3052 [(set_attr "type" "neon_stp, store_16")]
3055 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3058 (define_insn "*aarch64_combinez<mode>"
3059 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3061 (match_operand:VDC 1 "general_operand" "w,?r,m")
3062 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3063 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3068 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3069 (set_attr "simd" "yes,*,yes")
3070 (set_attr "fp" "*,yes,*")]
3073 (define_insn "*aarch64_combinez_be<mode>"
3074 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3076 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3077 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3078 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3083 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3084 (set_attr "simd" "yes,*,yes")
3085 (set_attr "fp" "*,yes,*")]
3088 (define_expand "aarch64_combine<mode>"
3089 [(match_operand:<VDBL> 0 "register_operand")
3090 (match_operand:VDC 1 "register_operand")
3091 (match_operand:VDC 2 "register_operand")]
3094 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3100 (define_expand "aarch64_simd_combine<mode>"
3101 [(match_operand:<VDBL> 0 "register_operand")
3102 (match_operand:VDC 1 "register_operand")
3103 (match_operand:VDC 2 "register_operand")]
3106 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3107 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3110 [(set_attr "type" "multiple")]
3113 ;; <su><addsub>l<q>.
3115 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3116 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3117 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3118 (match_operand:VQW 1 "register_operand" "w")
3119 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3120 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3121 (match_operand:VQW 2 "register_operand" "w")
3124 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3125 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3128 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3129 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3130 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3131 (match_operand:VQW 1 "register_operand" "w")
3132 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3133 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3134 (match_operand:VQW 2 "register_operand" "w")
3137 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3138 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3142 (define_expand "aarch64_saddl2<mode>"
3143 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3144 (match_operand:VQW 1 "register_operand" "w")
3145 (match_operand:VQW 2 "register_operand" "w")]
3148 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3149 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3154 (define_expand "aarch64_uaddl2<mode>"
3155 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3156 (match_operand:VQW 1 "register_operand" "w")
3157 (match_operand:VQW 2 "register_operand" "w")]
3160 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3161 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3166 (define_expand "aarch64_ssubl2<mode>"
3167 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3168 (match_operand:VQW 1 "register_operand" "w")
3169 (match_operand:VQW 2 "register_operand" "w")]
3172 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3173 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3178 (define_expand "aarch64_usubl2<mode>"
3179 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3180 (match_operand:VQW 1 "register_operand" "w")
3181 (match_operand:VQW 2 "register_operand" "w")]
3184 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3185 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3190 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3191 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3192 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3193 (match_operand:VD_BHSI 1 "register_operand" "w"))
3195 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3197 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3198 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3201 ;; <su><addsub>w<q>.
3203 (define_expand "widen_ssum<mode>3"
3204 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3205 (plus:<VDBLW> (sign_extend:<VDBLW>
3206 (match_operand:VQW 1 "register_operand" ""))
3207 (match_operand:<VDBLW> 2 "register_operand" "")))]
3210 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3211 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3213 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3215 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3220 (define_expand "widen_ssum<mode>3"
3221 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3222 (plus:<VWIDE> (sign_extend:<VWIDE>
3223 (match_operand:VD_BHSI 1 "register_operand" ""))
3224 (match_operand:<VWIDE> 2 "register_operand" "")))]
3227 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3231 (define_expand "widen_usum<mode>3"
3232 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3233 (plus:<VDBLW> (zero_extend:<VDBLW>
3234 (match_operand:VQW 1 "register_operand" ""))
3235 (match_operand:<VDBLW> 2 "register_operand" "")))]
3238 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3239 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3241 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3243 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3248 (define_expand "widen_usum<mode>3"
3249 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3250 (plus:<VWIDE> (zero_extend:<VWIDE>
3251 (match_operand:VD_BHSI 1 "register_operand" ""))
3252 (match_operand:<VWIDE> 2 "register_operand" "")))]
3255 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3259 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3260 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3261 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3263 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3265 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3266 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3269 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3270 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3271 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3274 (match_operand:VQW 2 "register_operand" "w")
3275 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3277 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3278 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3281 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3282 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3283 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3286 (match_operand:VQW 2 "register_operand" "w")
3287 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3289 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3290 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3293 (define_expand "aarch64_saddw2<mode>"
3294 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3295 (match_operand:<VWIDE> 1 "register_operand" "w")
3296 (match_operand:VQW 2 "register_operand" "w")]
3299 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3300 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3305 (define_expand "aarch64_uaddw2<mode>"
3306 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3307 (match_operand:<VWIDE> 1 "register_operand" "w")
3308 (match_operand:VQW 2 "register_operand" "w")]
3311 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3312 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3318 (define_expand "aarch64_ssubw2<mode>"
3319 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3320 (match_operand:<VWIDE> 1 "register_operand" "w")
3321 (match_operand:VQW 2 "register_operand" "w")]
3324 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3325 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3330 (define_expand "aarch64_usubw2<mode>"
3331 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3332 (match_operand:<VWIDE> 1 "register_operand" "w")
3333 (match_operand:VQW 2 "register_operand" "w")]
3336 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3337 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3342 ;; <su><r>h<addsub>.
3344 (define_insn "aarch64_<sur>h<addsub><mode>"
3345 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3346 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3347 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3350 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3351 [(set_attr "type" "neon_<addsub>_halve<q>")]
3354 ;; <r><addsub>hn<q>.
3356 (define_insn "aarch64_<sur><addsub>hn<mode>"
3357 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3358 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3359 (match_operand:VQN 2 "register_operand" "w")]
3362 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3363 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3366 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3367 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3368 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3369 (match_operand:VQN 2 "register_operand" "w")
3370 (match_operand:VQN 3 "register_operand" "w")]
3373 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3374 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3379 (define_insn "aarch64_pmul<mode>"
3380 [(set (match_operand:VB 0 "register_operand" "=w")
3381 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3382 (match_operand:VB 2 "register_operand" "w")]
3385 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3386 [(set_attr "type" "neon_mul_<Vetype><q>")]
3391 (define_insn "aarch64_fmulx<mode>"
3392 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3394 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3395 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3398 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3399 [(set_attr "type" "neon_fp_mul_<stype>")]
3402 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3404 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3405 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3407 [(match_operand:VDQSF 1 "register_operand" "w")
3408 (vec_duplicate:VDQSF
3410 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3411 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3415 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3416 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3418 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3421 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3423 (define_insn "*aarch64_mulx_elt<mode>"
3424 [(set (match_operand:VDQF 0 "register_operand" "=w")
3426 [(match_operand:VDQF 1 "register_operand" "w")
3429 (match_operand:VDQF 2 "register_operand" "w")
3430 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3434 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3435 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3437 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3442 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3443 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3445 [(match_operand:VHSDF 1 "register_operand" "w")
3446 (vec_duplicate:VHSDF
3447 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3450 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3451 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3454 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3455 ;; vmulxd_lane_f64 == vmulx_lane_f64
3456 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3458 (define_insn "*aarch64_vgetfmulx<mode>"
3459 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3461 [(match_operand:<VEL> 1 "register_operand" "w")
3463 (match_operand:VDQF 2 "register_operand" "w")
3464 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3468 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3469 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3471 [(set_attr "type" "fmul<Vetype>")]
3475 (define_insn "aarch64_<su_optab><optab><mode>"
3476 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3477 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3478 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3480 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3481 [(set_attr "type" "neon_<optab><q>")]
3484 ;; suqadd and usqadd
3486 (define_insn "aarch64_<sur>qadd<mode>"
3487 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3488 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3489 (match_operand:VSDQ_I 2 "register_operand" "w")]
3492 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3493 [(set_attr "type" "neon_qadd<q>")]
3498 (define_insn "aarch64_sqmovun<mode>"
3499 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3500 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3503 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3504 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3507 ;; sqmovn and uqmovn
3509 (define_insn "aarch64_<sur>qmovn<mode>"
3510 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3511 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3514 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3515 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3520 (define_insn "aarch64_s<optab><mode>"
3521 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3523 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3525 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3526 [(set_attr "type" "neon_<optab><q>")]
3531 (define_insn "aarch64_sq<r>dmulh<mode>"
3532 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3534 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3535 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3538 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3539 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3544 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3545 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3547 [(match_operand:VDQHS 1 "register_operand" "w")
3549 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3550 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3554 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3555 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3556 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3559 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3560 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3562 [(match_operand:VDQHS 1 "register_operand" "w")
3564 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3565 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3569 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3570 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3571 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3574 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3575 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3577 [(match_operand:SD_HSI 1 "register_operand" "w")
3579 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3580 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3584 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3585 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3586 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3589 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3590 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3592 [(match_operand:SD_HSI 1 "register_operand" "w")
3594 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3595 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3599 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3600 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3601 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3606 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3607 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3609 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3610 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3611 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3614 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3615 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3618 ;; sqrdml[as]h_lane.
3620 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3621 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3623 [(match_operand:VDQHS 1 "register_operand" "0")
3624 (match_operand:VDQHS 2 "register_operand" "w")
3626 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3627 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3631 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3633 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3635 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3641 [(match_operand:SD_HSI 1 "register_operand" "0")
3642 (match_operand:SD_HSI 2 "register_operand" "w")
3644 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3649 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3651 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3653 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3656 ;; sqrdml[as]h_laneq.
3658 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3659 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3661 [(match_operand:VDQHS 1 "register_operand" "0")
3662 (match_operand:VDQHS 2 "register_operand" "w")
3664 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3665 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3669 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3671 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3673 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3679 [(match_operand:SD_HSI 1 "register_operand" "0")
3680 (match_operand:SD_HSI 2 "register_operand" "w")
3682 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3687 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3689 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3691 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3696 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3697 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3699 (match_operand:<VWIDE> 1 "register_operand" "0")
3702 (sign_extend:<VWIDE>
3703 (match_operand:VSD_HSI 2 "register_operand" "w"))
3704 (sign_extend:<VWIDE>
3705 (match_operand:VSD_HSI 3 "register_operand" "w")))
3708 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3709 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3715 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3717 (match_operand:<VWIDE> 1 "register_operand" "0")
3720 (sign_extend:<VWIDE>
3721 (match_operand:VD_HSI 2 "register_operand" "w"))
3722 (sign_extend:<VWIDE>
3723 (vec_duplicate:VD_HSI
3725 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3726 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3731 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3733 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3735 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3738 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3739 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3741 (match_operand:<VWIDE> 1 "register_operand" "0")
3744 (sign_extend:<VWIDE>
3745 (match_operand:VD_HSI 2 "register_operand" "w"))
3746 (sign_extend:<VWIDE>
3747 (vec_duplicate:VD_HSI
3749 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3750 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3755 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3757 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3759 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3762 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3763 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3765 (match_operand:<VWIDE> 1 "register_operand" "0")
3768 (sign_extend:<VWIDE>
3769 (match_operand:SD_HSI 2 "register_operand" "w"))
3770 (sign_extend:<VWIDE>
3772 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3773 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3778 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3780 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3782 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3785 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3786 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3788 (match_operand:<VWIDE> 1 "register_operand" "0")
3791 (sign_extend:<VWIDE>
3792 (match_operand:SD_HSI 2 "register_operand" "w"))
3793 (sign_extend:<VWIDE>
3795 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3796 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3801 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3803 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3805 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3810 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3811 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3813 (match_operand:<VWIDE> 1 "register_operand" "0")
3816 (sign_extend:<VWIDE>
3817 (match_operand:VD_HSI 2 "register_operand" "w"))
3818 (sign_extend:<VWIDE>
3819 (vec_duplicate:VD_HSI
3820 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3823 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3824 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3829 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3830 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3832 (match_operand:<VWIDE> 1 "register_operand" "0")
3835 (sign_extend:<VWIDE>
3837 (match_operand:VQ_HSI 2 "register_operand" "w")
3838 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3839 (sign_extend:<VWIDE>
3841 (match_operand:VQ_HSI 3 "register_operand" "w")
3845 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3846 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3849 (define_expand "aarch64_sqdmlal2<mode>"
3850 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3851 (match_operand:<VWIDE> 1 "register_operand" "w")
3852 (match_operand:VQ_HSI 2 "register_operand" "w")
3853 (match_operand:VQ_HSI 3 "register_operand" "w")]
3856 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3857 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3858 operands[2], operands[3], p));
3862 (define_expand "aarch64_sqdmlsl2<mode>"
3863 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3864 (match_operand:<VWIDE> 1 "register_operand" "w")
3865 (match_operand:VQ_HSI 2 "register_operand" "w")
3866 (match_operand:VQ_HSI 3 "register_operand" "w")]
3869 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3870 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3871 operands[2], operands[3], p));
3877 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3878 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3880 (match_operand:<VWIDE> 1 "register_operand" "0")
3883 (sign_extend:<VWIDE>
3885 (match_operand:VQ_HSI 2 "register_operand" "w")
3886 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3887 (sign_extend:<VWIDE>
3888 (vec_duplicate:<VHALF>
3890 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3891 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3896 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3898 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3900 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3903 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3904 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3906 (match_operand:<VWIDE> 1 "register_operand" "0")
3909 (sign_extend:<VWIDE>
3911 (match_operand:VQ_HSI 2 "register_operand" "w")
3912 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3913 (sign_extend:<VWIDE>
3914 (vec_duplicate:<VHALF>
3916 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3917 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3922 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3924 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3926 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3929 (define_expand "aarch64_sqdmlal2_lane<mode>"
3930 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3931 (match_operand:<VWIDE> 1 "register_operand" "w")
3932 (match_operand:VQ_HSI 2 "register_operand" "w")
3933 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3934 (match_operand:SI 4 "immediate_operand" "i")]
3937 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3938 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3939 operands[2], operands[3],
3944 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3945 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3946 (match_operand:<VWIDE> 1 "register_operand" "w")
3947 (match_operand:VQ_HSI 2 "register_operand" "w")
3948 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3949 (match_operand:SI 4 "immediate_operand" "i")]
3952 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3953 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3954 operands[2], operands[3],
3959 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3960 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3961 (match_operand:<VWIDE> 1 "register_operand" "w")
3962 (match_operand:VQ_HSI 2 "register_operand" "w")
3963 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3964 (match_operand:SI 4 "immediate_operand" "i")]
3967 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3968 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3969 operands[2], operands[3],
3974 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3975 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3976 (match_operand:<VWIDE> 1 "register_operand" "w")
3977 (match_operand:VQ_HSI 2 "register_operand" "w")
3978 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3979 (match_operand:SI 4 "immediate_operand" "i")]
3982 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3983 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3984 operands[2], operands[3],
3989 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3990 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3992 (match_operand:<VWIDE> 1 "register_operand" "0")
3995 (sign_extend:<VWIDE>
3997 (match_operand:VQ_HSI 2 "register_operand" "w")
3998 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3999 (sign_extend:<VWIDE>
4000 (vec_duplicate:<VHALF>
4001 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4004 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4005 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4008 (define_expand "aarch64_sqdmlal2_n<mode>"
4009 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4010 (match_operand:<VWIDE> 1 "register_operand" "w")
4011 (match_operand:VQ_HSI 2 "register_operand" "w")
4012 (match_operand:<VEL> 3 "register_operand" "w")]
4015 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4016 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4017 operands[2], operands[3],
4022 (define_expand "aarch64_sqdmlsl2_n<mode>"
4023 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4024 (match_operand:<VWIDE> 1 "register_operand" "w")
4025 (match_operand:VQ_HSI 2 "register_operand" "w")
4026 (match_operand:<VEL> 3 "register_operand" "w")]
4029 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4030 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4031 operands[2], operands[3],
4038 (define_insn "aarch64_sqdmull<mode>"
4039 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4042 (sign_extend:<VWIDE>
4043 (match_operand:VSD_HSI 1 "register_operand" "w"))
4044 (sign_extend:<VWIDE>
4045 (match_operand:VSD_HSI 2 "register_operand" "w")))
4048 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4049 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4054 (define_insn "aarch64_sqdmull_lane<mode>"
4055 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4058 (sign_extend:<VWIDE>
4059 (match_operand:VD_HSI 1 "register_operand" "w"))
4060 (sign_extend:<VWIDE>
4061 (vec_duplicate:VD_HSI
4063 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4064 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4069 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4070 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4072 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4075 (define_insn "aarch64_sqdmull_laneq<mode>"
4076 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4079 (sign_extend:<VWIDE>
4080 (match_operand:VD_HSI 1 "register_operand" "w"))
4081 (sign_extend:<VWIDE>
4082 (vec_duplicate:VD_HSI
4084 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4085 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4090 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4091 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4093 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4096 (define_insn "aarch64_sqdmull_lane<mode>"
4097 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4100 (sign_extend:<VWIDE>
4101 (match_operand:SD_HSI 1 "register_operand" "w"))
4102 (sign_extend:<VWIDE>
4104 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4105 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4110 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4111 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4113 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4116 (define_insn "aarch64_sqdmull_laneq<mode>"
4117 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4120 (sign_extend:<VWIDE>
4121 (match_operand:SD_HSI 1 "register_operand" "w"))
4122 (sign_extend:<VWIDE>
4124 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4125 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4130 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4131 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4133 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4138 (define_insn "aarch64_sqdmull_n<mode>"
4139 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4142 (sign_extend:<VWIDE>
4143 (match_operand:VD_HSI 1 "register_operand" "w"))
4144 (sign_extend:<VWIDE>
4145 (vec_duplicate:VD_HSI
4146 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4150 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4151 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4158 (define_insn "aarch64_sqdmull2<mode>_internal"
4159 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4162 (sign_extend:<VWIDE>
4164 (match_operand:VQ_HSI 1 "register_operand" "w")
4165 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4166 (sign_extend:<VWIDE>
4168 (match_operand:VQ_HSI 2 "register_operand" "w")
4173 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4174 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4177 (define_expand "aarch64_sqdmull2<mode>"
4178 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4179 (match_operand:VQ_HSI 1 "register_operand" "w")
4180 (match_operand:VQ_HSI 2 "register_operand" "w")]
4183 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4184 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4191 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4192 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4195 (sign_extend:<VWIDE>
4197 (match_operand:VQ_HSI 1 "register_operand" "w")
4198 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4199 (sign_extend:<VWIDE>
4200 (vec_duplicate:<VHALF>
4202 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4203 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4208 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4209 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4211 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4214 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4215 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4218 (sign_extend:<VWIDE>
4220 (match_operand:VQ_HSI 1 "register_operand" "w")
4221 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4222 (sign_extend:<VWIDE>
4223 (vec_duplicate:<VHALF>
4225 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4226 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4231 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4232 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4234 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4237 (define_expand "aarch64_sqdmull2_lane<mode>"
4238 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4239 (match_operand:VQ_HSI 1 "register_operand" "w")
4240 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4241 (match_operand:SI 3 "immediate_operand" "i")]
4244 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4245 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4246 operands[2], operands[3],
4251 (define_expand "aarch64_sqdmull2_laneq<mode>"
4252 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4253 (match_operand:VQ_HSI 1 "register_operand" "w")
4254 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4255 (match_operand:SI 3 "immediate_operand" "i")]
4258 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4259 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4260 operands[2], operands[3],
4267 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4268 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4271 (sign_extend:<VWIDE>
4273 (match_operand:VQ_HSI 1 "register_operand" "w")
4274 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4275 (sign_extend:<VWIDE>
4276 (vec_duplicate:<VHALF>
4277 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4281 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4282 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4285 (define_expand "aarch64_sqdmull2_n<mode>"
4286 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4287 (match_operand:VQ_HSI 1 "register_operand" "w")
4288 (match_operand:<VEL> 2 "register_operand" "w")]
4291 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4292 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4299 (define_insn "aarch64_<sur>shl<mode>"
4300 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4302 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4303 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4306 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4307 [(set_attr "type" "neon_shift_reg<q>")]
4313 (define_insn "aarch64_<sur>q<r>shl<mode>"
4314 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4316 [(match_operand:VSDQ_I 1 "register_operand" "w")
4317 (match_operand:VSDQ_I 2 "register_operand" "w")]
4320 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4321 [(set_attr "type" "neon_sat_shift_reg<q>")]
4326 (define_insn "aarch64_<sur>shll_n<mode>"
4327 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4328 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4330 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4334 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4335 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4337 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4339 [(set_attr "type" "neon_shift_imm_long")]
4344 (define_insn "aarch64_<sur>shll2_n<mode>"
4345 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4347 (match_operand:SI 2 "immediate_operand" "i")]
4351 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4352 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4354 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4356 [(set_attr "type" "neon_shift_imm_long")]
4361 (define_insn "aarch64_<sur>shr_n<mode>"
4362 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4363 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4365 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4368 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4369 [(set_attr "type" "neon_sat_shift_imm<q>")]
4374 (define_insn "aarch64_<sur>sra_n<mode>"
4375 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4376 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4377 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4379 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4382 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4383 [(set_attr "type" "neon_shift_acc<q>")]
4388 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4389 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4390 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4391 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4393 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4396 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4397 [(set_attr "type" "neon_shift_imm<q>")]
4402 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4403 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4404 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4406 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4409 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4410 [(set_attr "type" "neon_sat_shift_imm<q>")]
4416 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4417 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4418 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4420 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4423 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4424 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4428 ;; cm(eq|ge|gt|lt|le)
4429 ;; Note, we have constraints for Dz and Z as different expanders
4430 ;; have different ideas of what should be passed to this pattern.
4432 (define_insn "aarch64_cm<optab><mode>"
4433 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4435 (COMPARISONS:<V_INT_EQUIV>
4436 (match_operand:VDQ_I 1 "register_operand" "w,w")
4437 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4441 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4442 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4443 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4446 (define_insn_and_split "aarch64_cm<optab>di"
4447 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4450 (match_operand:DI 1 "register_operand" "w,w,r")
4451 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4453 (clobber (reg:CC CC_REGNUM))]
4457 [(set (match_operand:DI 0 "register_operand")
4460 (match_operand:DI 1 "register_operand")
4461 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4464 /* If we are in the general purpose register file,
4465 we split to a sequence of comparison and store. */
4466 if (GP_REGNUM_P (REGNO (operands[0]))
4467 && GP_REGNUM_P (REGNO (operands[1])))
4469 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4470 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4471 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4472 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4475 /* Otherwise, we expand to a similar pattern which does not
4476 clobber CC_REGNUM. */
4478 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4481 (define_insn "*aarch64_cm<optab>di"
4482 [(set (match_operand:DI 0 "register_operand" "=w,w")
4485 (match_operand:DI 1 "register_operand" "w,w")
4486 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4488 "TARGET_SIMD && reload_completed"
4490 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4491 cm<optab>\t%d0, %d1, #0"
4492 [(set_attr "type" "neon_compare, neon_compare_zero")]
4497 (define_insn "aarch64_cm<optab><mode>"
4498 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4500 (UCOMPARISONS:<V_INT_EQUIV>
4501 (match_operand:VDQ_I 1 "register_operand" "w")
4502 (match_operand:VDQ_I 2 "register_operand" "w")
4505 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4506 [(set_attr "type" "neon_compare<q>")]
4509 (define_insn_and_split "aarch64_cm<optab>di"
4510 [(set (match_operand:DI 0 "register_operand" "=w,r")
4513 (match_operand:DI 1 "register_operand" "w,r")
4514 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4516 (clobber (reg:CC CC_REGNUM))]
4520 [(set (match_operand:DI 0 "register_operand")
4523 (match_operand:DI 1 "register_operand")
4524 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4527 /* If we are in the general purpose register file,
4528 we split to a sequence of comparison and store. */
4529 if (GP_REGNUM_P (REGNO (operands[0]))
4530 && GP_REGNUM_P (REGNO (operands[1])))
4532 machine_mode mode = CCmode;
4533 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4534 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4535 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4538 /* Otherwise, we expand to a similar pattern which does not
4539 clobber CC_REGNUM. */
4541 [(set_attr "type" "neon_compare,multiple")]
4544 (define_insn "*aarch64_cm<optab>di"
4545 [(set (match_operand:DI 0 "register_operand" "=w")
4548 (match_operand:DI 1 "register_operand" "w")
4549 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4551 "TARGET_SIMD && reload_completed"
4552 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4553 [(set_attr "type" "neon_compare")]
4558 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4559 ;; we don't have any insns using ne, and aarch64_vcond outputs
4560 ;; not (neg (eq (and x y) 0))
4561 ;; which is rewritten by simplify_rtx as
4562 ;; plus (eq (and x y) 0) -1.
4564 (define_insn "aarch64_cmtst<mode>"
4565 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4569 (match_operand:VDQ_I 1 "register_operand" "w")
4570 (match_operand:VDQ_I 2 "register_operand" "w"))
4571 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4572 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4575 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4576 [(set_attr "type" "neon_tst<q>")]
4579 (define_insn_and_split "aarch64_cmtstdi"
4580 [(set (match_operand:DI 0 "register_operand" "=w,r")
4584 (match_operand:DI 1 "register_operand" "w,r")
4585 (match_operand:DI 2 "register_operand" "w,r"))
4587 (clobber (reg:CC CC_REGNUM))]
4591 [(set (match_operand:DI 0 "register_operand")
4595 (match_operand:DI 1 "register_operand")
4596 (match_operand:DI 2 "register_operand"))
4599 /* If we are in the general purpose register file,
4600 we split to a sequence of comparison and store. */
4601 if (GP_REGNUM_P (REGNO (operands[0]))
4602 && GP_REGNUM_P (REGNO (operands[1])))
4604 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4605 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4606 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4607 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4608 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4611 /* Otherwise, we expand to a similar pattern which does not
4612 clobber CC_REGNUM. */
4614 [(set_attr "type" "neon_tst,multiple")]
4617 (define_insn "*aarch64_cmtstdi"
4618 [(set (match_operand:DI 0 "register_operand" "=w")
4622 (match_operand:DI 1 "register_operand" "w")
4623 (match_operand:DI 2 "register_operand" "w"))
4626 "cmtst\t%d0, %d1, %d2"
4627 [(set_attr "type" "neon_tst")]
4630 ;; fcm(eq|ge|gt|le|lt)
4632 (define_insn "aarch64_cm<optab><mode>"
4633 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4635 (COMPARISONS:<V_INT_EQUIV>
4636 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4637 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4641 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4642 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4643 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4647 ;; Note we can also handle what would be fac(le|lt) by
4648 ;; generating fac(ge|gt).
4650 (define_insn "aarch64_fac<optab><mode>"
4651 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4653 (FAC_COMPARISONS:<V_INT_EQUIV>
4655 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4657 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4660 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4661 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4666 (define_insn "aarch64_addp<mode>"
4667 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4669 [(match_operand:VD_BHSI 1 "register_operand" "w")
4670 (match_operand:VD_BHSI 2 "register_operand" "w")]
4673 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4674 [(set_attr "type" "neon_reduc_add<q>")]
4677 (define_insn "aarch64_addpdi"
4678 [(set (match_operand:DI 0 "register_operand" "=w")
4680 [(match_operand:V2DI 1 "register_operand" "w")]
4684 [(set_attr "type" "neon_reduc_add")]
4689 (define_expand "sqrt<mode>2"
4690 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4691 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4694 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4698 (define_insn "*sqrt<mode>2"
4699 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4700 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4702 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4703 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4706 ;; Patterns for vector struct loads and stores.
4708 (define_insn "aarch64_simd_ld2<mode>"
4709 [(set (match_operand:OI 0 "register_operand" "=w")
4710 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4711 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4714 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4715 [(set_attr "type" "neon_load2_2reg<q>")]
4718 (define_insn "aarch64_simd_ld2r<mode>"
4719 [(set (match_operand:OI 0 "register_operand" "=w")
4720 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4721 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4724 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4725 [(set_attr "type" "neon_load2_all_lanes<q>")]
4728 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4729 [(set (match_operand:OI 0 "register_operand" "=w")
4730 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4731 (match_operand:OI 2 "register_operand" "0")
4732 (match_operand:SI 3 "immediate_operand" "i")
4733 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4737 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4738 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4740 [(set_attr "type" "neon_load2_one_lane")]
4743 (define_expand "vec_load_lanesoi<mode>"
4744 [(set (match_operand:OI 0 "register_operand" "=w")
4745 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4746 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4750 if (BYTES_BIG_ENDIAN)
4752 rtx tmp = gen_reg_rtx (OImode);
4753 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4754 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4755 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4758 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4762 (define_insn "aarch64_simd_st2<mode>"
4763 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4764 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4765 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4768 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4769 [(set_attr "type" "neon_store2_2reg<q>")]
4772 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4773 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4774 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4775 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4776 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4777 (match_operand:SI 2 "immediate_operand" "i")]
4781 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4782 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4784 [(set_attr "type" "neon_store2_one_lane<q>")]
4787 (define_expand "vec_store_lanesoi<mode>"
4788 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4789 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4790 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4794 if (BYTES_BIG_ENDIAN)
4796 rtx tmp = gen_reg_rtx (OImode);
4797 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4798 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4799 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4802 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4806 (define_insn "aarch64_simd_ld3<mode>"
4807 [(set (match_operand:CI 0 "register_operand" "=w")
4808 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4809 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4812 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4813 [(set_attr "type" "neon_load3_3reg<q>")]
4816 (define_insn "aarch64_simd_ld3r<mode>"
4817 [(set (match_operand:CI 0 "register_operand" "=w")
4818 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4819 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4822 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4823 [(set_attr "type" "neon_load3_all_lanes<q>")]
4826 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4827 [(set (match_operand:CI 0 "register_operand" "=w")
4828 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4829 (match_operand:CI 2 "register_operand" "0")
4830 (match_operand:SI 3 "immediate_operand" "i")
4831 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4835 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4836 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4838 [(set_attr "type" "neon_load3_one_lane")]
4841 (define_expand "vec_load_lanesci<mode>"
4842 [(set (match_operand:CI 0 "register_operand" "=w")
4843 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4844 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4848 if (BYTES_BIG_ENDIAN)
4850 rtx tmp = gen_reg_rtx (CImode);
4851 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4852 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4853 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4856 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4860 (define_insn "aarch64_simd_st3<mode>"
4861 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4862 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4863 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4866 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4867 [(set_attr "type" "neon_store3_3reg<q>")]
4870 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4871 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4872 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4873 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4874 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4875 (match_operand:SI 2 "immediate_operand" "i")]
4879 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4880 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4882 [(set_attr "type" "neon_store3_one_lane<q>")]
4885 (define_expand "vec_store_lanesci<mode>"
4886 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4887 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4888 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4892 if (BYTES_BIG_ENDIAN)
4894 rtx tmp = gen_reg_rtx (CImode);
4895 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4896 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4897 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4900 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4904 (define_insn "aarch64_simd_ld4<mode>"
4905 [(set (match_operand:XI 0 "register_operand" "=w")
4906 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4907 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4910 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4911 [(set_attr "type" "neon_load4_4reg<q>")]
4914 (define_insn "aarch64_simd_ld4r<mode>"
4915 [(set (match_operand:XI 0 "register_operand" "=w")
4916 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4917 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4920 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4921 [(set_attr "type" "neon_load4_all_lanes<q>")]
4924 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4925 [(set (match_operand:XI 0 "register_operand" "=w")
4926 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4927 (match_operand:XI 2 "register_operand" "0")
4928 (match_operand:SI 3 "immediate_operand" "i")
4929 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4933 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4934 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4936 [(set_attr "type" "neon_load4_one_lane")]
4939 (define_expand "vec_load_lanesxi<mode>"
4940 [(set (match_operand:XI 0 "register_operand" "=w")
4941 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4942 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4946 if (BYTES_BIG_ENDIAN)
4948 rtx tmp = gen_reg_rtx (XImode);
4949 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4950 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4951 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4954 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4958 (define_insn "aarch64_simd_st4<mode>"
4959 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4960 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4961 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4964 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4965 [(set_attr "type" "neon_store4_4reg<q>")]
4968 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4969 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4970 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4971 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4972 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4973 (match_operand:SI 2 "immediate_operand" "i")]
4977 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4978 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4980 [(set_attr "type" "neon_store4_one_lane<q>")]
4983 (define_expand "vec_store_lanesxi<mode>"
4984 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4985 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4986 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4990 if (BYTES_BIG_ENDIAN)
4992 rtx tmp = gen_reg_rtx (XImode);
4993 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4994 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4995 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4998 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5002 (define_insn_and_split "aarch64_rev_reglist<mode>"
5003 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5005 [(match_operand:VSTRUCT 1 "register_operand" "w")
5006 (match_operand:V16QI 2 "register_operand" "w")]
5007 UNSPEC_REV_REGLIST))]
5010 "&& reload_completed"
5014 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5015 for (i = 0; i < nregs; i++)
5017 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5018 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5019 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5023 [(set_attr "type" "neon_tbl1_q")
5024 (set_attr "length" "<insn_count>")]
5027 ;; Reload patterns for AdvSIMD register list operands.
5029 (define_expand "mov<mode>"
5030 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5031 (match_operand:VSTRUCT 1 "general_operand" ""))]
5034 if (can_create_pseudo_p ())
5036 if (GET_CODE (operands[0]) != REG)
5037 operands[1] = force_reg (<MODE>mode, operands[1]);
5041 (define_insn "*aarch64_mov<mode>"
5042 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5043 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5044 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5045 && (register_operand (operands[0], <MODE>mode)
5046 || register_operand (operands[1], <MODE>mode))"
5049 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5050 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5051 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5052 neon_load<nregs>_<nregs>reg_q")
5053 (set_attr "length" "<insn_count>,4,4")]
5056 (define_insn "aarch64_be_ld1<mode>"
5057 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5058 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5059 "aarch64_simd_struct_operand" "Utv")]
5062 "ld1\\t{%0<Vmtype>}, %1"
5063 [(set_attr "type" "neon_load1_1reg<q>")]
5066 (define_insn "aarch64_be_st1<mode>"
5067 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5068 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5071 "st1\\t{%1<Vmtype>}, %0"
5072 [(set_attr "type" "neon_store1_1reg<q>")]
5075 (define_insn "*aarch64_be_movoi"
5076 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5077 (match_operand:OI 1 "general_operand" " w,w,m"))]
5078 "TARGET_SIMD && BYTES_BIG_ENDIAN
5079 && (register_operand (operands[0], OImode)
5080 || register_operand (operands[1], OImode))"
5085 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5086 (set_attr "length" "8,4,4")]
5089 (define_insn "*aarch64_be_movci"
5090 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5091 (match_operand:CI 1 "general_operand" " w,w,o"))]
5092 "TARGET_SIMD && BYTES_BIG_ENDIAN
5093 && (register_operand (operands[0], CImode)
5094 || register_operand (operands[1], CImode))"
5096 [(set_attr "type" "multiple")
5097 (set_attr "length" "12,4,4")]
5100 (define_insn "*aarch64_be_movxi"
5101 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5102 (match_operand:XI 1 "general_operand" " w,w,o"))]
5103 "TARGET_SIMD && BYTES_BIG_ENDIAN
5104 && (register_operand (operands[0], XImode)
5105 || register_operand (operands[1], XImode))"
5107 [(set_attr "type" "multiple")
5108 (set_attr "length" "16,4,4")]
5112 [(set (match_operand:OI 0 "register_operand")
5113 (match_operand:OI 1 "register_operand"))]
5114 "TARGET_SIMD && reload_completed"
5117 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5122 [(set (match_operand:CI 0 "nonimmediate_operand")
5123 (match_operand:CI 1 "general_operand"))]
5124 "TARGET_SIMD && reload_completed"
5127 if (register_operand (operands[0], CImode)
5128 && register_operand (operands[1], CImode))
5130 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5133 else if (BYTES_BIG_ENDIAN)
5135 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5136 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5137 emit_move_insn (gen_lowpart (V16QImode,
5138 simplify_gen_subreg (TImode, operands[0],
5140 gen_lowpart (V16QImode,
5141 simplify_gen_subreg (TImode, operands[1],
5150 [(set (match_operand:XI 0 "nonimmediate_operand")
5151 (match_operand:XI 1 "general_operand"))]
5152 "TARGET_SIMD && reload_completed"
5155 if (register_operand (operands[0], XImode)
5156 && register_operand (operands[1], XImode))
5158 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5161 else if (BYTES_BIG_ENDIAN)
5163 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5164 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5165 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5166 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5173 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5174 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5175 (match_operand:DI 1 "register_operand" "w")
5176 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5179 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5180 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5183 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5188 (define_insn "aarch64_ld2<mode>_dreg"
5189 [(set (match_operand:OI 0 "register_operand" "=w")
5190 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5191 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5194 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5195 [(set_attr "type" "neon_load2_2reg<q>")]
5198 (define_insn "aarch64_ld2<mode>_dreg"
5199 [(set (match_operand:OI 0 "register_operand" "=w")
5200 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5201 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5204 "ld1\\t{%S0.1d - %T0.1d}, %1"
5205 [(set_attr "type" "neon_load1_2reg<q>")]
5208 (define_insn "aarch64_ld3<mode>_dreg"
5209 [(set (match_operand:CI 0 "register_operand" "=w")
5210 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5211 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5214 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5215 [(set_attr "type" "neon_load3_3reg<q>")]
5218 (define_insn "aarch64_ld3<mode>_dreg"
5219 [(set (match_operand:CI 0 "register_operand" "=w")
5220 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5221 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5224 "ld1\\t{%S0.1d - %U0.1d}, %1"
5225 [(set_attr "type" "neon_load1_3reg<q>")]
5228 (define_insn "aarch64_ld4<mode>_dreg"
5229 [(set (match_operand:XI 0 "register_operand" "=w")
5230 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5231 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5234 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5235 [(set_attr "type" "neon_load4_4reg<q>")]
5238 (define_insn "aarch64_ld4<mode>_dreg"
5239 [(set (match_operand:XI 0 "register_operand" "=w")
5240 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5241 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5244 "ld1\\t{%S0.1d - %V0.1d}, %1"
5245 [(set_attr "type" "neon_load1_4reg<q>")]
5248 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5249 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5250 (match_operand:DI 1 "register_operand" "r")
5251 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5254 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5255 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5257 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5261 (define_expand "aarch64_ld1<VALL_F16:mode>"
5262 [(match_operand:VALL_F16 0 "register_operand")
5263 (match_operand:DI 1 "register_operand")]
5266 machine_mode mode = <VALL_F16:MODE>mode;
5267 rtx mem = gen_rtx_MEM (mode, operands[1]);
5269 if (BYTES_BIG_ENDIAN)
5270 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5272 emit_move_insn (operands[0], mem);
5276 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5277 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5278 (match_operand:DI 1 "register_operand" "r")
5279 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5282 machine_mode mode = <VSTRUCT:MODE>mode;
5283 rtx mem = gen_rtx_MEM (mode, operands[1]);
5285 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5289 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5290 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5291 (match_operand:DI 1 "register_operand" "w")
5292 (match_operand:VSTRUCT 2 "register_operand" "0")
5293 (match_operand:SI 3 "immediate_operand" "i")
5294 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5297 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5298 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5301 aarch64_simd_lane_bounds (operands[3], 0,
5302 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5304 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5305 operands[0], mem, operands[2], operands[3]));
5309 ;; Expanders for builtins to extract vector registers from large
5310 ;; opaque integer modes.
5314 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5315 [(match_operand:VDC 0 "register_operand" "=w")
5316 (match_operand:VSTRUCT 1 "register_operand" "w")
5317 (match_operand:SI 2 "immediate_operand" "i")]
5320 int part = INTVAL (operands[2]);
5321 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5322 int offset = part * 16;
5324 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5325 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5331 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5332 [(match_operand:VQ 0 "register_operand" "=w")
5333 (match_operand:VSTRUCT 1 "register_operand" "w")
5334 (match_operand:SI 2 "immediate_operand" "i")]
5337 int part = INTVAL (operands[2]);
5338 int offset = part * 16;
5340 emit_move_insn (operands[0],
5341 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5345 ;; Permuted-store expanders for neon intrinsics.
5347 ;; Permute instructions
5351 (define_expand "vec_perm_const<mode>"
5352 [(match_operand:VALL_F16 0 "register_operand")
5353 (match_operand:VALL_F16 1 "register_operand")
5354 (match_operand:VALL_F16 2 "register_operand")
5355 (match_operand:<V_INT_EQUIV> 3)]
5358 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5359 operands[2], operands[3], <nunits>))
5365 (define_expand "vec_perm<mode>"
5366 [(match_operand:VB 0 "register_operand")
5367 (match_operand:VB 1 "register_operand")
5368 (match_operand:VB 2 "register_operand")
5369 (match_operand:VB 3 "register_operand")]
5372 aarch64_expand_vec_perm (operands[0], operands[1],
5373 operands[2], operands[3], <nunits>);
5377 (define_insn "aarch64_tbl1<mode>"
5378 [(set (match_operand:VB 0 "register_operand" "=w")
5379 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5380 (match_operand:VB 2 "register_operand" "w")]
5383 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5384 [(set_attr "type" "neon_tbl1<q>")]
5387 ;; Two source registers.
5389 (define_insn "aarch64_tbl2v16qi"
5390 [(set (match_operand:V16QI 0 "register_operand" "=w")
5391 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5392 (match_operand:V16QI 2 "register_operand" "w")]
5395 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5396 [(set_attr "type" "neon_tbl2_q")]
5399 (define_insn "aarch64_tbl3<mode>"
5400 [(set (match_operand:VB 0 "register_operand" "=w")
5401 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5402 (match_operand:VB 2 "register_operand" "w")]
5405 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5406 [(set_attr "type" "neon_tbl3")]
5409 (define_insn "aarch64_tbx4<mode>"
5410 [(set (match_operand:VB 0 "register_operand" "=w")
5411 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5412 (match_operand:OI 2 "register_operand" "w")
5413 (match_operand:VB 3 "register_operand" "w")]
5416 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5417 [(set_attr "type" "neon_tbl4")]
5420 ;; Three source registers.
5422 (define_insn "aarch64_qtbl3<mode>"
5423 [(set (match_operand:VB 0 "register_operand" "=w")
5424 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5425 (match_operand:VB 2 "register_operand" "w")]
5428 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5429 [(set_attr "type" "neon_tbl3")]
5432 (define_insn "aarch64_qtbx3<mode>"
5433 [(set (match_operand:VB 0 "register_operand" "=w")
5434 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5435 (match_operand:CI 2 "register_operand" "w")
5436 (match_operand:VB 3 "register_operand" "w")]
5439 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5440 [(set_attr "type" "neon_tbl3")]
5443 ;; Four source registers.
5445 (define_insn "aarch64_qtbl4<mode>"
5446 [(set (match_operand:VB 0 "register_operand" "=w")
5447 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5448 (match_operand:VB 2 "register_operand" "w")]
5451 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5452 [(set_attr "type" "neon_tbl4")]
5455 (define_insn "aarch64_qtbx4<mode>"
5456 [(set (match_operand:VB 0 "register_operand" "=w")
5457 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5458 (match_operand:XI 2 "register_operand" "w")
5459 (match_operand:VB 3 "register_operand" "w")]
5462 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5463 [(set_attr "type" "neon_tbl4")]
5466 (define_insn_and_split "aarch64_combinev16qi"
5467 [(set (match_operand:OI 0 "register_operand" "=w")
5468 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5469 (match_operand:V16QI 2 "register_operand" "w")]
5473 "&& reload_completed"
5476 aarch64_split_combinev16qi (operands);
5479 [(set_attr "type" "multiple")]
5482 ;; This instruction's pattern is generated directly by
5483 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5484 ;; need corresponding changes there.
5485 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5486 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5487 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5488 (match_operand:VALL_F16 2 "register_operand" "w")]
5491 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5492 [(set_attr "type" "neon_permute<q>")]
5495 ;; This instruction's pattern is generated directly by
5496 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5497 ;; need corresponding changes there. Note that the immediate (third)
5498 ;; operand is a lane index not a byte index.
5499 (define_insn "aarch64_ext<mode>"
5500 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5501 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5502 (match_operand:VALL_F16 2 "register_operand" "w")
5503 (match_operand:SI 3 "immediate_operand" "i")]
5507 operands[3] = GEN_INT (INTVAL (operands[3])
5508 * GET_MODE_UNIT_SIZE (<MODE>mode));
5509 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5511 [(set_attr "type" "neon_ext<q>")]
5514 ;; This instruction's pattern is generated directly by
5515 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5516 ;; need corresponding changes there.
5517 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5518 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5519 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5522 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5523 [(set_attr "type" "neon_rev<q>")]
5526 (define_insn "aarch64_st2<mode>_dreg"
5527 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5528 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5529 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5532 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5533 [(set_attr "type" "neon_store2_2reg")]
5536 (define_insn "aarch64_st2<mode>_dreg"
5537 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5538 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5539 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5542 "st1\\t{%S1.1d - %T1.1d}, %0"
5543 [(set_attr "type" "neon_store1_2reg")]
5546 (define_insn "aarch64_st3<mode>_dreg"
5547 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5548 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5549 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5552 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5553 [(set_attr "type" "neon_store3_3reg")]
5556 (define_insn "aarch64_st3<mode>_dreg"
5557 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5558 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5559 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5562 "st1\\t{%S1.1d - %U1.1d}, %0"
5563 [(set_attr "type" "neon_store1_3reg")]
5566 (define_insn "aarch64_st4<mode>_dreg"
5567 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5568 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5569 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5572 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5573 [(set_attr "type" "neon_store4_4reg")]
5576 (define_insn "aarch64_st4<mode>_dreg"
5577 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5578 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5579 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5582 "st1\\t{%S1.1d - %V1.1d}, %0"
5583 [(set_attr "type" "neon_store1_4reg")]
5586 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5587 [(match_operand:DI 0 "register_operand" "r")
5588 (match_operand:VSTRUCT 1 "register_operand" "w")
5589 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5592 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5593 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5595 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5599 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5600 [(match_operand:DI 0 "register_operand" "r")
5601 (match_operand:VSTRUCT 1 "register_operand" "w")
5602 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5605 machine_mode mode = <VSTRUCT:MODE>mode;
5606 rtx mem = gen_rtx_MEM (mode, operands[0]);
5608 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5612 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5613 [(match_operand:DI 0 "register_operand" "r")
5614 (match_operand:VSTRUCT 1 "register_operand" "w")
5615 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5616 (match_operand:SI 2 "immediate_operand")]
5619 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5620 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5623 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5624 mem, operands[1], operands[2]));
5628 (define_expand "aarch64_st1<VALL_F16:mode>"
5629 [(match_operand:DI 0 "register_operand")
5630 (match_operand:VALL_F16 1 "register_operand")]
5633 machine_mode mode = <VALL_F16:MODE>mode;
5634 rtx mem = gen_rtx_MEM (mode, operands[0]);
5636 if (BYTES_BIG_ENDIAN)
5637 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5639 emit_move_insn (mem, operands[1]);
5643 ;; Expander for builtins to insert vector registers into large
5644 ;; opaque integer modes.
5646 ;; Q-register list. We don't need a D-reg inserter as we zero
5647 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5649 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5650 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5651 (match_operand:VSTRUCT 1 "register_operand" "0")
5652 (match_operand:VQ 2 "register_operand" "w")
5653 (match_operand:SI 3 "immediate_operand" "i")]
5656 int part = INTVAL (operands[3]);
5657 int offset = part * 16;
5659 emit_move_insn (operands[0], operands[1]);
5660 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5665 ;; Standard pattern name vec_init<mode><Vel>.
5667 (define_expand "vec_init<mode><Vel>"
5668 [(match_operand:VALL_F16 0 "register_operand" "")
5669 (match_operand 1 "" "")]
5672 aarch64_expand_vector_init (operands[0], operands[1]);
5676 (define_insn "*aarch64_simd_ld1r<mode>"
5677 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5678 (vec_duplicate:VALL_F16
5679 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5681 "ld1r\\t{%0.<Vtype>}, %1"
5682 [(set_attr "type" "neon_load1_all_lanes")]
5685 (define_insn "aarch64_frecpe<mode>"
5686 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5687 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5690 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5691 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5694 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5695 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5696 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5699 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5700 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5703 (define_insn "aarch64_frecps<mode>"
5704 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5706 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5707 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5710 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5711 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5714 (define_insn "aarch64_urecpe<mode>"
5715 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5716 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5719 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5720 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5722 ;; Standard pattern name vec_extract<mode><Vel>.
5724 (define_expand "vec_extract<mode><Vel>"
5725 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5726 (match_operand:VALL_F16 1 "register_operand" "")
5727 (match_operand:SI 2 "immediate_operand" "")]
5731 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5737 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5738 [(set (match_operand:V16QI 0 "register_operand" "=w")
5739 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5740 (match_operand:V16QI 2 "register_operand" "w")]
5742 "TARGET_SIMD && TARGET_CRYPTO"
5743 "aes<aes_op>\\t%0.16b, %2.16b"
5744 [(set_attr "type" "crypto_aese")]
5747 ;; When AES/AESMC fusion is enabled we want the register allocation to
5751 ;; So prefer to tie operand 1 to operand 0 when fusing.
5753 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5754 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5755 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5757 "TARGET_SIMD && TARGET_CRYPTO"
5758 "aes<aesmc_op>\\t%0.16b, %1.16b"
5759 [(set_attr "type" "crypto_aesmc")
5760 (set_attr_alternative "enabled"
5761 [(if_then_else (match_test
5762 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5763 (const_string "yes" )
5764 (const_string "no"))
5765 (const_string "yes")])]
5770 (define_insn "aarch64_crypto_sha1hsi"
5771 [(set (match_operand:SI 0 "register_operand" "=w")
5772 (unspec:SI [(match_operand:SI 1
5773 "register_operand" "w")]
5775 "TARGET_SIMD && TARGET_CRYPTO"
5777 [(set_attr "type" "crypto_sha1_fast")]
5780 (define_insn "aarch64_crypto_sha1hv4si"
5781 [(set (match_operand:SI 0 "register_operand" "=w")
5782 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5783 (parallel [(const_int 0)]))]
5785 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5787 [(set_attr "type" "crypto_sha1_fast")]
5790 (define_insn "aarch64_be_crypto_sha1hv4si"
5791 [(set (match_operand:SI 0 "register_operand" "=w")
5792 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5793 (parallel [(const_int 3)]))]
5795 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5797 [(set_attr "type" "crypto_sha1_fast")]
5800 (define_insn "aarch64_crypto_sha1su1v4si"
5801 [(set (match_operand:V4SI 0 "register_operand" "=w")
5802 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5803 (match_operand:V4SI 2 "register_operand" "w")]
5805 "TARGET_SIMD && TARGET_CRYPTO"
5806 "sha1su1\\t%0.4s, %2.4s"
5807 [(set_attr "type" "crypto_sha1_fast")]
5810 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5811 [(set (match_operand:V4SI 0 "register_operand" "=w")
5812 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5813 (match_operand:SI 2 "register_operand" "w")
5814 (match_operand:V4SI 3 "register_operand" "w")]
5816 "TARGET_SIMD && TARGET_CRYPTO"
5817 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5818 [(set_attr "type" "crypto_sha1_slow")]
5821 (define_insn "aarch64_crypto_sha1su0v4si"
5822 [(set (match_operand:V4SI 0 "register_operand" "=w")
5823 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5824 (match_operand:V4SI 2 "register_operand" "w")
5825 (match_operand:V4SI 3 "register_operand" "w")]
5827 "TARGET_SIMD && TARGET_CRYPTO"
5828 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5829 [(set_attr "type" "crypto_sha1_xor")]
5834 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5835 [(set (match_operand:V4SI 0 "register_operand" "=w")
5836 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5837 (match_operand:V4SI 2 "register_operand" "w")
5838 (match_operand:V4SI 3 "register_operand" "w")]
5840 "TARGET_SIMD && TARGET_CRYPTO"
5841 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5842 [(set_attr "type" "crypto_sha256_slow")]
5845 (define_insn "aarch64_crypto_sha256su0v4si"
5846 [(set (match_operand:V4SI 0 "register_operand" "=w")
5847 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5848 (match_operand:V4SI 2 "register_operand" "w")]
5850 "TARGET_SIMD &&TARGET_CRYPTO"
5851 "sha256su0\\t%0.4s, %2.4s"
5852 [(set_attr "type" "crypto_sha256_fast")]
5855 (define_insn "aarch64_crypto_sha256su1v4si"
5856 [(set (match_operand:V4SI 0 "register_operand" "=w")
5857 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5858 (match_operand:V4SI 2 "register_operand" "w")
5859 (match_operand:V4SI 3 "register_operand" "w")]
5861 "TARGET_SIMD &&TARGET_CRYPTO"
5862 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5863 [(set_attr "type" "crypto_sha256_slow")]
5868 (define_insn "aarch64_crypto_pmulldi"
5869 [(set (match_operand:TI 0 "register_operand" "=w")
5870 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5871 (match_operand:DI 2 "register_operand" "w")]
5873 "TARGET_SIMD && TARGET_CRYPTO"
5874 "pmull\\t%0.1q, %1.1d, %2.1d"
5875 [(set_attr "type" "crypto_pmull")]
5878 (define_insn "aarch64_crypto_pmullv2di"
5879 [(set (match_operand:TI 0 "register_operand" "=w")
5880 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5881 (match_operand:V2DI 2 "register_operand" "w")]
5883 "TARGET_SIMD && TARGET_CRYPTO"
5884 "pmull2\\t%0.1q, %1.2d, %2.2d"
5885 [(set_attr "type" "crypto_pmull")]