1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2021 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "unaligned_storev8qi"
27 [(set (match_operand:V8QI 0 "memory_operand" "=Un")
28 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")]
29 UNSPEC_UNALIGNED_STORE))]
32 return output_move_neon (operands);
34 [(set_attr "type" "neon_store1_1reg")])
36 (define_insn "*neon_mov<mode>"
37 [(set (match_operand:VDXMOV 0 "nonimmediate_operand"
38 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r")
39 (match_operand:VDXMOV 1 "general_operand"
40 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
42 && (register_operand (operands[0], <MODE>mode)
43 || register_operand (operands[1], <MODE>mode))"
45 if (which_alternative == 2 || which_alternative == 3)
48 static char templ[40];
50 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
51 &operands[1], &width);
53 gcc_assert (is_valid != 0);
56 return "vmov.f32\t%P0, %1 @ <mode>";
58 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
63 switch (which_alternative)
65 case 0: return "vmov\t%P0, %P1 @ <mode>";
66 case 1: case 4: return output_move_neon (operands);
67 case 2: case 3: gcc_unreachable ();
68 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>";
69 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>";
71 default: return output_move_double (operands, true, NULL);
74 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
75 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
76 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
78 (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
79 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*")
80 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*")
81 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
83 (define_insn "*neon_mov<mode>"
84 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
85 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us")
86 (match_operand:VQXMOV 1 "general_operand"
87 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
89 && (register_operand (operands[0], <MODE>mode)
90 || register_operand (operands[1], <MODE>mode))"
92 if (which_alternative == 2 || which_alternative == 3)
95 static char templ[40];
97 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
98 &operands[1], &width);
100 gcc_assert (is_valid != 0);
103 return "vmov.f32\t%q0, %1 @ <mode>";
105 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
110 switch (which_alternative)
112 case 0: return "vmov\t%q0, %q1 @ <mode>";
113 case 1: case 4: return output_move_neon (operands);
114 case 2: case 3: gcc_unreachable ();
115 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
116 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
117 default: return output_move_quad (operands);
120 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
121 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
122 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
123 (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
124 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
125 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
126 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
128 /* We define these mov expanders to match the standard mov$a optab to prevent
129 the mid-end from trying to do a subreg for these modes which is the most
130 inefficient way to expand the move. Also big-endian subreg's aren't
131 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
132 Without these RTL generation patterns the mid-end would attempt to take a
133 sub-reg and may ICE if it can't. */
135 (define_expand "movti"
136 [(set (match_operand:TI 0 "nonimmediate_operand")
137 (match_operand:TI 1 "general_operand"))]
140 gcc_checking_assert (aligned_operand (operands[0], TImode));
141 gcc_checking_assert (aligned_operand (operands[1], TImode));
142 if (can_create_pseudo_p ())
144 if (!REG_P (operands[0]))
145 operands[1] = force_reg (TImode, operands[1]);
149 (define_expand "mov<mode>"
150 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
151 (match_operand:VSTRUCT 1 "general_operand"))]
152 "TARGET_NEON || TARGET_HAVE_MVE"
154 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
155 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
156 if (can_create_pseudo_p ())
158 if (!REG_P (operands[0]))
159 operands[1] = force_reg (<MODE>mode, operands[1]);
163 ;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into
164 ;; two groups. The pattern movv8hf is common for MVE and NEON, so it is moved
165 ;; into vec-common.md file. Remaining mov expand patterns with half float and
166 ;; bfloats are implemented below.
167 (define_expand "mov<mode>"
168 [(set (match_operand:VHFBF_split 0 "s_register_operand")
169 (match_operand:VHFBF_split 1 "s_register_operand"))]
172 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
173 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
174 if (can_create_pseudo_p ())
176 if (!REG_P (operands[0]))
177 operands[1] = force_reg (<MODE>mode, operands[1]);
181 (define_insn "*neon_mov<mode>"
182 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
183 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
184 "(TARGET_NEON || TARGET_HAVE_MVE)
185 && (register_operand (operands[0], <MODE>mode)
186 || register_operand (operands[1], <MODE>mode))"
188 switch (which_alternative)
191 case 1: case 2: return output_move_neon (operands);
192 default: gcc_unreachable ();
195 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
196 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
199 [(set (match_operand:EI 0 "s_register_operand" "")
200 (match_operand:EI 1 "s_register_operand" ""))]
201 "TARGET_NEON && reload_completed"
202 [(set (match_dup 0) (match_dup 1))
203 (set (match_dup 2) (match_dup 3))]
205 int rdest = REGNO (operands[0]);
206 int rsrc = REGNO (operands[1]);
209 dest[0] = gen_rtx_REG (TImode, rdest);
210 src[0] = gen_rtx_REG (TImode, rsrc);
211 dest[1] = gen_rtx_REG (DImode, rdest + 4);
212 src[1] = gen_rtx_REG (DImode, rsrc + 4);
214 neon_disambiguate_copy (operands, dest, src, 2);
218 [(set (match_operand:OI 0 "s_register_operand" "")
219 (match_operand:OI 1 "s_register_operand" ""))]
220 "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))]
224 int rdest = REGNO (operands[0]);
225 int rsrc = REGNO (operands[1]);
228 dest[0] = gen_rtx_REG (TImode, rdest);
229 src[0] = gen_rtx_REG (TImode, rsrc);
230 dest[1] = gen_rtx_REG (TImode, rdest + 4);
231 src[1] = gen_rtx_REG (TImode, rsrc + 4);
233 neon_disambiguate_copy (operands, dest, src, 2);
237 [(set (match_operand:CI 0 "s_register_operand" "")
238 (match_operand:CI 1 "s_register_operand" ""))]
239 "TARGET_NEON && reload_completed"
240 [(set (match_dup 0) (match_dup 1))
241 (set (match_dup 2) (match_dup 3))
242 (set (match_dup 4) (match_dup 5))]
244 int rdest = REGNO (operands[0]);
245 int rsrc = REGNO (operands[1]);
248 dest[0] = gen_rtx_REG (TImode, rdest);
249 src[0] = gen_rtx_REG (TImode, rsrc);
250 dest[1] = gen_rtx_REG (TImode, rdest + 4);
251 src[1] = gen_rtx_REG (TImode, rsrc + 4);
252 dest[2] = gen_rtx_REG (TImode, rdest + 8);
253 src[2] = gen_rtx_REG (TImode, rsrc + 8);
255 neon_disambiguate_copy (operands, dest, src, 3);
259 [(set (match_operand:XI 0 "s_register_operand" "")
260 (match_operand:XI 1 "s_register_operand" ""))]
261 "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed"
262 [(set (match_dup 0) (match_dup 1))
263 (set (match_dup 2) (match_dup 3))
264 (set (match_dup 4) (match_dup 5))
265 (set (match_dup 6) (match_dup 7))]
267 int rdest = REGNO (operands[0]);
268 int rsrc = REGNO (operands[1]);
271 dest[0] = gen_rtx_REG (TImode, rdest);
272 src[0] = gen_rtx_REG (TImode, rsrc);
273 dest[1] = gen_rtx_REG (TImode, rdest + 4);
274 src[1] = gen_rtx_REG (TImode, rsrc + 4);
275 dest[2] = gen_rtx_REG (TImode, rdest + 8);
276 src[2] = gen_rtx_REG (TImode, rsrc + 8);
277 dest[3] = gen_rtx_REG (TImode, rdest + 12);
278 src[3] = gen_rtx_REG (TImode, rsrc + 12);
280 neon_disambiguate_copy (operands, dest, src, 4);
283 (define_expand "movmisalign<mode>"
284 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
285 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
286 UNSPEC_MISALIGNED_ACCESS))]
287 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
290 /* This pattern is not permitted to fail during expansion: if both arguments
291 are non-registers (e.g. memory := constant, which can be created by the
292 auto-vectorizer), force operand 1 into a register. */
293 if (!s_register_operand (operands[0], <MODE>mode)
294 && !s_register_operand (operands[1], <MODE>mode))
295 operands[1] = force_reg (<MODE>mode, operands[1]);
297 if (s_register_operand (operands[0], <MODE>mode))
298 adjust_mem = operands[1];
300 adjust_mem = operands[0];
302 /* Legitimize address. */
303 if (!neon_vector_mem_operand (adjust_mem, 2, true))
304 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
308 (define_insn "*movmisalign<mode>_neon_store"
309 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
310 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
311 UNSPEC_MISALIGNED_ACCESS))]
312 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
313 "vst1.<V_sz_elem>\t{%P1}, %A0"
314 [(set_attr "type" "neon_store1_1reg<q>")])
316 (define_insn "*movmisalign<mode>_neon_load"
317 [(set (match_operand:VDX 0 "s_register_operand" "=w")
318 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
320 UNSPEC_MISALIGNED_ACCESS))]
321 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
322 "vld1.<V_sz_elem>\t{%P0}, %A1"
323 [(set_attr "type" "neon_load1_1reg<q>")])
325 (define_insn "*movmisalign<mode>_neon_store"
326 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
327 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
328 UNSPEC_MISALIGNED_ACCESS))]
329 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
330 "vst1.<V_sz_elem>\t{%q1}, %A0"
331 [(set_attr "type" "neon_store1_1reg<q>")])
333 (define_insn "*movmisalign<mode>_neon_load"
334 [(set (match_operand:VQX 0 "s_register_operand" "=w")
335 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
337 UNSPEC_MISALIGNED_ACCESS))]
338 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
339 "vld1.<V_sz_elem>\t{%q0}, %A1"
340 [(set_attr "type" "neon_load1_1reg<q>")])
342 (define_insn "@vec_set<mode>_internal"
343 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
345 (vec_duplicate:VD_LANE
346 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
347 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
348 (match_operand:SI 2 "immediate_operand" "i,i")))]
351 int elt = ffs ((int) INTVAL (operands[2])) - 1;
352 if (BYTES_BIG_ENDIAN)
353 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
354 operands[2] = GEN_INT (elt);
356 if (which_alternative == 0)
357 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
359 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
361 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
363 (define_insn "@vec_set<mode>_internal"
364 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
367 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
368 (match_operand:VQ2 3 "s_register_operand" "0,0")
369 (match_operand:SI 2 "immediate_operand" "i,i")))]
372 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
373 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
374 int elt = elem % half_elts;
375 int hi = (elem / half_elts) * 2;
376 int regno = REGNO (operands[0]);
378 if (BYTES_BIG_ENDIAN)
379 elt = half_elts - 1 - elt;
381 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
382 operands[2] = GEN_INT (elt);
384 if (which_alternative == 0)
385 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
387 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
389 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
392 (define_insn "@vec_set<mode>_internal"
393 [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
395 (vec_duplicate:V2DI_ONLY
396 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
397 (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
398 (match_operand:SI 2 "immediate_operand" "i,i")))]
401 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
402 int regno = REGNO (operands[0]) + 2 * elem;
404 operands[0] = gen_rtx_REG (DImode, regno);
406 if (which_alternative == 0)
407 return "vld1.64\t%P0, %A1";
409 return "vmov\t%P0, %Q1, %R1";
411 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
414 (define_insn "vec_extract<mode><V_elem_l>"
415 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
417 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
418 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
421 if (BYTES_BIG_ENDIAN)
423 int elt = INTVAL (operands[2]);
424 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
425 operands[2] = GEN_INT (elt);
428 if (which_alternative == 0)
429 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
431 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
433 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
436 ;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to
437 ;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called
438 ;; by define_expand in vec-common.md file.
439 (define_insn "neon_vec_extract<mode><V_elem_l>"
440 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
442 (match_operand:VQ2 1 "s_register_operand" "w,w")
443 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
446 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
447 int elt = INTVAL (operands[2]) % half_elts;
448 int hi = (INTVAL (operands[2]) / half_elts) * 2;
449 int regno = REGNO (operands[1]);
451 if (BYTES_BIG_ENDIAN)
452 elt = half_elts - 1 - elt;
454 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
455 operands[2] = GEN_INT (elt);
457 if (which_alternative == 0)
458 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
460 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
462 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
465 ;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi"
466 ;; and this pattern is called by define_expand in vec-common.md file.
467 (define_insn "neon_vec_extractv2didi"
468 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
470 (match_operand:V2DI 1 "s_register_operand" "w,w")
471 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
474 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
476 operands[1] = gen_rtx_REG (DImode, regno);
478 if (which_alternative == 0)
479 return "vst1.64\t{%P1}, %A0 @ v2di";
481 return "vmov\t%Q0, %R0, %P1 @ v2di";
483 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
486 (define_expand "vec_init<mode><V_elem_l>"
487 [(match_operand:VDQ 0 "s_register_operand")
488 (match_operand 1 "" "")]
489 "TARGET_NEON || TARGET_HAVE_MVE"
491 neon_expand_vector_init (operands[0], operands[1]);
495 ;; Doubleword and quadword arithmetic.
497 ;; NOTE: some other instructions also support 64-bit integer
498 ;; element size, which we could potentially use for "long long" operations.
500 (define_insn "*add<mode>3_neon"
501 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
502 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
503 (match_operand:VDQ 2 "s_register_operand" "w")))]
504 "ARM_HAVE_NEON_<MODE>_ARITH"
505 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
507 (if_then_else (match_test "<Is_float_mode>")
508 (const_string "neon_fp_addsub_s<q>")
509 (const_string "neon_add<q>")))]
512 (define_insn "*sub<mode>3_neon"
513 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
514 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
515 (match_operand:VDQ 2 "s_register_operand" "w")))]
516 "ARM_HAVE_NEON_<MODE>_ARITH"
517 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
519 (if_then_else (match_test "<Is_float_mode>")
520 (const_string "neon_fp_addsub_s<q>")
521 (const_string "neon_sub<q>")))]
524 (define_insn "*mul<mode>3_neon"
525 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
526 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
527 (match_operand:VDQW 2 "s_register_operand" "w")))]
528 "ARM_HAVE_NEON_<MODE>_ARITH"
529 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
531 (if_then_else (match_test "<Is_float_mode>")
532 (const_string "neon_fp_mul_s<q>")
533 (const_string "neon_mul_<V_elem_ch><q>")))]
536 /* Perform division using multiply-by-reciprocal.
537 Reciprocal is calculated using Newton-Raphson method.
538 Enabled with -funsafe-math-optimizations -freciprocal-math
539 and disabled for -Os since it increases code size . */
541 (define_expand "div<mode>3"
542 [(set (match_operand:VCVTF 0 "s_register_operand")
543 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
544 (match_operand:VCVTF 2 "s_register_operand")))]
545 "TARGET_NEON && !optimize_size
546 && flag_reciprocal_math"
548 rtx rec = gen_reg_rtx (<MODE>mode);
549 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
551 /* Reciprocal estimate. */
552 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
554 /* Perform 2 iterations of newton-raphson method. */
555 for (int i = 0; i < 2; i++)
557 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
558 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
561 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
562 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
568 (define_insn "mul<mode>3add<mode>_neon"
569 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
570 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
571 (match_operand:VDQW 3 "s_register_operand" "w"))
572 (match_operand:VDQW 1 "s_register_operand" "0")))]
573 "ARM_HAVE_NEON_<MODE>_ARITH"
574 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
576 (if_then_else (match_test "<Is_float_mode>")
577 (const_string "neon_fp_mla_s<q>")
578 (const_string "neon_mla_<V_elem_ch><q>")))]
581 (define_insn "mul<mode>3add<mode>_neon"
582 [(set (match_operand:VH 0 "s_register_operand" "=w")
583 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
584 (match_operand:VH 3 "s_register_operand" "w"))
585 (match_operand:VH 1 "s_register_operand" "0")))]
586 "ARM_HAVE_NEON_<MODE>_ARITH"
587 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
588 [(set_attr "type" "neon_fp_mla_s<q>")]
591 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
592 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
593 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
594 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
595 (match_operand:VDQW 3 "s_register_operand" "w"))))]
596 "ARM_HAVE_NEON_<MODE>_ARITH"
597 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
599 (if_then_else (match_test "<Is_float_mode>")
600 (const_string "neon_fp_mla_s<q>")
601 (const_string "neon_mla_<V_elem_ch><q>")))]
604 ;; Fused multiply-accumulate
605 ;; We define each insn twice here:
606 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
607 ;; to be able to use when converting to FMA.
608 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
609 (define_insn "fma<VCVTF:mode>4"
610 [(set (match_operand:VCVTF 0 "register_operand" "=w")
611 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
612 (match_operand:VCVTF 2 "register_operand" "w")
613 (match_operand:VCVTF 3 "register_operand" "0")))]
614 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
615 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
616 [(set_attr "type" "neon_fp_mla_s<q>")]
619 (define_insn "fma<VCVTF:mode>4_intrinsic"
620 [(set (match_operand:VCVTF 0 "register_operand" "=w")
621 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
622 (match_operand:VCVTF 2 "register_operand" "w")
623 (match_operand:VCVTF 3 "register_operand" "0")))]
624 "TARGET_NEON && TARGET_FMA"
625 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
626 [(set_attr "type" "neon_fp_mla_s<q>")]
629 (define_insn "fma<VH:mode>4"
630 [(set (match_operand:VH 0 "register_operand" "=w")
632 (match_operand:VH 1 "register_operand" "w")
633 (match_operand:VH 2 "register_operand" "w")
634 (match_operand:VH 3 "register_operand" "0")))]
635 "ARM_HAVE_NEON_<MODE>_ARITH"
636 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
637 [(set_attr "type" "neon_fp_mla_s<q>")]
640 (define_insn "*fmsub<VCVTF:mode>4"
641 [(set (match_operand:VCVTF 0 "register_operand" "=w")
642 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
643 (match_operand:VCVTF 2 "register_operand" "w")
644 (match_operand:VCVTF 3 "register_operand" "0")))]
645 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
646 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
647 [(set_attr "type" "neon_fp_mla_s<q>")]
650 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
651 [(set (match_operand:VCVTF 0 "register_operand" "=w")
653 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
654 (match_operand:VCVTF 2 "register_operand" "w")
655 (match_operand:VCVTF 3 "register_operand" "0")))]
656 "TARGET_NEON && TARGET_FMA"
657 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
658 [(set_attr "type" "neon_fp_mla_s<q>")]
661 (define_insn "fmsub<VH:mode>4_intrinsic"
662 [(set (match_operand:VH 0 "register_operand" "=w")
664 (neg:VH (match_operand:VH 1 "register_operand" "w"))
665 (match_operand:VH 2 "register_operand" "w")
666 (match_operand:VH 3 "register_operand" "0")))]
667 "TARGET_NEON_FP16INST"
668 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
669 [(set_attr "type" "neon_fp_mla_s<q>")]
672 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
673 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
674 (unspec:VCVTF [(match_operand:VCVTF 1
675 "s_register_operand" "w")]
677 "TARGET_NEON && TARGET_VFP5"
678 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
679 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
682 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
683 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
684 (FIXUORS:<V_cmp_result> (unspec:VCVTF
685 [(match_operand:VCVTF 1 "register_operand" "w")]
687 "TARGET_NEON && TARGET_VFP5"
688 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
689 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
690 (set_attr "predicable" "no")]
693 (define_insn "ior<mode>3_neon"
694 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
695 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
696 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
699 switch (which_alternative)
701 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
702 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
703 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
704 default: gcc_unreachable ();
707 [(set_attr "type" "neon_logic<q>")]
710 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
711 ;; vorr. We support the pseudo-instruction vand instead, because that
712 ;; corresponds to the canonical form the middle-end expects to use for
713 ;; immediate bitwise-ANDs.
715 (define_insn "and<mode>3_neon"
716 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
717 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
718 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
721 switch (which_alternative)
723 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
724 case 1: return neon_output_logic_immediate ("vand", &operands[2],
725 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
726 default: gcc_unreachable ();
729 [(set_attr "type" "neon_logic<q>")]
732 (define_insn "orn<mode>3_neon"
733 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
734 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
735 (match_operand:VDQ 1 "s_register_operand" "w")))]
737 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
738 [(set_attr "type" "neon_logic<q>")]
741 (define_insn "bic<mode>3_neon"
742 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
743 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
744 (match_operand:VDQ 1 "s_register_operand" "w")))]
746 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
747 [(set_attr "type" "neon_logic<q>")]
750 (define_insn "xor<mode>3_neon"
751 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
752 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
753 (match_operand:VDQ 2 "s_register_operand" "w")))]
755 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
756 [(set_attr "type" "neon_logic<q>")]
759 (define_insn "one_cmpl<mode>2_neon"
760 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
761 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
763 "vmvn\t%<V_reg>0, %<V_reg>1"
764 [(set_attr "type" "neon_move<q>")]
767 (define_insn "abs<mode>2"
768 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
769 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
771 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
773 (if_then_else (match_test "<Is_float_mode>")
774 (const_string "neon_fp_abs_s<q>")
775 (const_string "neon_abs<q>")))]
778 (define_insn "neon_neg<mode>2"
779 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
780 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
782 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
784 (if_then_else (match_test "<Is_float_mode>")
785 (const_string "neon_fp_neg_s<q>")
786 (const_string "neon_neg<q>")))]
789 (define_insn "neon_<absneg_str><mode>2"
790 [(set (match_operand:VH 0 "s_register_operand" "=w")
791 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
792 "TARGET_NEON_FP16INST"
793 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
794 [(set_attr "type" "neon_abs<q>")]
797 (define_expand "neon_v<absneg_str><mode>"
799 (match_operand:VH 0 "s_register_operand")
800 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
801 "TARGET_NEON_FP16INST"
803 emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1]));
807 (define_insn "neon_v<fp16_rnd_str><mode>"
808 [(set (match_operand:VH 0 "s_register_operand" "=w")
810 [(match_operand:VH 1 "s_register_operand" "w")]
812 "TARGET_NEON_FP16INST"
813 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
814 [(set_attr "type" "neon_fp_round_s<q>")]
817 (define_insn "neon_vrsqrte<mode>"
818 [(set (match_operand:VH 0 "s_register_operand" "=w")
820 [(match_operand:VH 1 "s_register_operand" "w")]
822 "TARGET_NEON_FP16INST"
823 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
824 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
827 (define_insn "*umin<mode>3_neon"
828 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
829 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
830 (match_operand:VDQIW 2 "s_register_operand" "w")))]
832 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
833 [(set_attr "type" "neon_minmax<q>")]
836 (define_insn "*umax<mode>3_neon"
837 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
838 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
839 (match_operand:VDQIW 2 "s_register_operand" "w")))]
841 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
842 [(set_attr "type" "neon_minmax<q>")]
845 (define_insn "*smin<mode>3_neon"
846 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
847 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
848 (match_operand:VDQW 2 "s_register_operand" "w")))]
850 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
852 (if_then_else (match_test "<Is_float_mode>")
853 (const_string "neon_fp_minmax_s<q>")
854 (const_string "neon_minmax<q>")))]
857 (define_insn "*smax<mode>3_neon"
858 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
859 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
860 (match_operand:VDQW 2 "s_register_operand" "w")))]
862 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
864 (if_then_else (match_test "<Is_float_mode>")
865 (const_string "neon_fp_minmax_s<q>")
866 (const_string "neon_minmax<q>")))]
869 ; TODO: V2DI shifts are current disabled because there are bugs in the
870 ; generic vectorizer code. It ends up creating a V2DI constructor with
873 (define_insn "vashl<mode>3"
874 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
875 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
876 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
879 switch (which_alternative)
881 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
882 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
884 VALID_NEON_QREG_MODE (<MODE>mode),
886 default: gcc_unreachable ();
889 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
892 (define_insn "vashr<mode>3_imm"
893 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
894 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
895 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
898 return neon_output_shift_immediate ("vshr", 's', &operands[2],
899 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
902 [(set_attr "type" "neon_shift_imm<q>")]
905 (define_insn "vlshr<mode>3_imm"
906 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
907 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
908 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
911 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
912 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
915 [(set_attr "type" "neon_shift_imm<q>")]
918 ; Used for implementing logical shift-right, which is a left-shift by a negative
919 ; amount, with signed operands. This is essentially the same as ashl<mode>3
920 ; above, but using an unspec in case GCC tries anything tricky with negative
923 (define_insn "ashl<mode>3_signed"
924 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
925 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
926 (match_operand:VDQI 2 "s_register_operand" "w")]
927 UNSPEC_ASHIFT_SIGNED))]
929 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
930 [(set_attr "type" "neon_shift_reg<q>")]
933 ; Used for implementing logical shift-right, which is a left-shift by a negative
934 ; amount, with unsigned operands.
936 (define_insn "ashl<mode>3_unsigned"
937 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
938 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
939 (match_operand:VDQI 2 "s_register_operand" "w")]
940 UNSPEC_ASHIFT_UNSIGNED))]
942 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
943 [(set_attr "type" "neon_shift_reg<q>")]
946 (define_expand "vashr<mode>3"
947 [(set (match_operand:VDQIW 0 "s_register_operand")
948 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
949 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
952 if (s_register_operand (operands[2], <MODE>mode))
954 rtx neg = gen_reg_rtx (<MODE>mode);
955 emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
956 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
959 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
963 (define_expand "vlshr<mode>3"
964 [(set (match_operand:VDQIW 0 "s_register_operand")
965 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
966 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
969 if (s_register_operand (operands[2], <MODE>mode))
971 rtx neg = gen_reg_rtx (<MODE>mode);
972 emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
973 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
976 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
982 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
983 ;; leaving the upper half uninitalized. This is OK since the shift
984 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
985 ;; data flow analysis however, we pretend the full register is set
987 (define_insn "neon_load_count"
988 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
989 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
993 vld1.32\t{%P0[0]}, %A1
995 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
998 ;; Widening operations
1000 (define_expand "widen_ssum<mode>3"
1001 [(set (match_operand:<V_double_width> 0 "s_register_operand")
1002 (plus:<V_double_width>
1003 (sign_extend:<V_double_width>
1004 (match_operand:VQI 1 "s_register_operand"))
1005 (match_operand:<V_double_width> 2 "s_register_operand")))]
1008 machine_mode mode = GET_MODE (operands[1]);
1011 p1 = arm_simd_vect_par_cnst_half (mode, false);
1012 p2 = arm_simd_vect_par_cnst_half (mode, true);
1014 if (operands[0] != operands[2])
1015 emit_move_insn (operands[0], operands[2]);
1017 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1021 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1029 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1030 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1031 (plus:<V_double_width>
1032 (sign_extend:<V_double_width>
1033 (vec_select:<V_HALF>
1034 (match_operand:VQI 1 "s_register_operand" "%w")
1035 (match_operand:VQI 2 "vect_par_constant_low" "")))
1036 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1039 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1040 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1042 [(set_attr "type" "neon_add_widen")])
1044 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1045 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1046 (plus:<V_double_width>
1047 (sign_extend:<V_double_width>
1048 (vec_select:<V_HALF>
1049 (match_operand:VQI 1 "s_register_operand" "%w")
1050 (match_operand:VQI 2 "vect_par_constant_high" "")))
1051 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1054 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1055 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1057 [(set_attr "type" "neon_add_widen")])
1059 (define_insn "widen_ssum<mode>3"
1060 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1062 (sign_extend:<V_widen>
1063 (match_operand:VW 1 "s_register_operand" "%w"))
1064 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1066 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1067 [(set_attr "type" "neon_add_widen")]
1070 (define_expand "widen_usum<mode>3"
1071 [(set (match_operand:<V_double_width> 0 "s_register_operand")
1072 (plus:<V_double_width>
1073 (zero_extend:<V_double_width>
1074 (match_operand:VQI 1 "s_register_operand"))
1075 (match_operand:<V_double_width> 2 "s_register_operand")))]
1078 machine_mode mode = GET_MODE (operands[1]);
1081 p1 = arm_simd_vect_par_cnst_half (mode, false);
1082 p2 = arm_simd_vect_par_cnst_half (mode, true);
1084 if (operands[0] != operands[2])
1085 emit_move_insn (operands[0], operands[2]);
1087 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1091 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1099 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1100 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1101 (plus:<V_double_width>
1102 (zero_extend:<V_double_width>
1103 (vec_select:<V_HALF>
1104 (match_operand:VQI 1 "s_register_operand" "%w")
1105 (match_operand:VQI 2 "vect_par_constant_low" "")))
1106 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1109 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1110 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1112 [(set_attr "type" "neon_add_widen")])
1114 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1115 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1116 (plus:<V_double_width>
1117 (zero_extend:<V_double_width>
1118 (vec_select:<V_HALF>
1119 (match_operand:VQI 1 "s_register_operand" "%w")
1120 (match_operand:VQI 2 "vect_par_constant_high" "")))
1121 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1124 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1125 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1127 [(set_attr "type" "neon_add_widen")])
1129 (define_insn "widen_usum<mode>3"
1130 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1131 (plus:<V_widen> (zero_extend:<V_widen>
1132 (match_operand:VW 1 "s_register_operand" "%w"))
1133 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1135 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1136 [(set_attr "type" "neon_add_widen")]
1139 ;; Helpers for quad-word reduction operations
1141 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1142 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1143 ; N/2-element vector.
1145 (define_insn "quad_halves_<code>v4si"
1146 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1148 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1149 (parallel [(const_int 0) (const_int 1)]))
1150 (vec_select:V2SI (match_dup 1)
1151 (parallel [(const_int 2) (const_int 3)]))))]
1153 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1154 [(set_attr "vqh_mnem" "<VQH_mnem>")
1155 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1158 (define_insn "quad_halves_<code>v4sf"
1159 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1161 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1162 (parallel [(const_int 0) (const_int 1)]))
1163 (vec_select:V2SF (match_dup 1)
1164 (parallel [(const_int 2) (const_int 3)]))))]
1165 "ARM_HAVE_NEON_V4SF_ARITH"
1166 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1167 [(set_attr "vqh_mnem" "<VQH_mnem>")
1168 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1171 (define_insn "quad_halves_<code>v8hi"
1172 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1174 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1175 (parallel [(const_int 0) (const_int 1)
1176 (const_int 2) (const_int 3)]))
1177 (vec_select:V4HI (match_dup 1)
1178 (parallel [(const_int 4) (const_int 5)
1179 (const_int 6) (const_int 7)]))))]
1181 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1182 [(set_attr "vqh_mnem" "<VQH_mnem>")
1183 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1186 (define_insn "quad_halves_<code>v16qi"
1187 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1189 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1190 (parallel [(const_int 0) (const_int 1)
1191 (const_int 2) (const_int 3)
1192 (const_int 4) (const_int 5)
1193 (const_int 6) (const_int 7)]))
1194 (vec_select:V8QI (match_dup 1)
1195 (parallel [(const_int 8) (const_int 9)
1196 (const_int 10) (const_int 11)
1197 (const_int 12) (const_int 13)
1198 (const_int 14) (const_int 15)]))))]
1200 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1201 [(set_attr "vqh_mnem" "<VQH_mnem>")
1202 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1205 (define_expand "move_hi_quad_<mode>"
1206 [(match_operand:ANY128 0 "s_register_operand")
1207 (match_operand:<V_HALF> 1 "s_register_operand")]
1210 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1211 GET_MODE_SIZE (<V_HALF>mode)),
1216 (define_expand "move_lo_quad_<mode>"
1217 [(match_operand:ANY128 0 "s_register_operand")
1218 (match_operand:<V_HALF> 1 "s_register_operand")]
1221 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1227 ;; Reduction operations
1229 (define_expand "reduc_plus_scal_<mode>"
1230 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1231 (match_operand:VD 1 "s_register_operand")]
1232 "ARM_HAVE_NEON_<MODE>_ARITH"
1234 rtx vec = gen_reg_rtx (<MODE>mode);
1235 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1236 &gen_neon_vpadd_internal<mode>);
1237 /* The same result is actually computed into every element. */
1238 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1242 (define_expand "reduc_plus_scal_<mode>"
1243 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1244 (match_operand:VQ 1 "s_register_operand")]
1245 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1247 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1249 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1250 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1255 (define_expand "reduc_plus_scal_v2di"
1256 [(match_operand:DI 0 "nonimmediate_operand")
1257 (match_operand:V2DI 1 "s_register_operand")]
1258 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1260 rtx vec = gen_reg_rtx (V2DImode);
1262 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1263 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1268 (define_insn "arm_reduc_plus_internal_v2di"
1269 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1270 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1272 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1273 "vadd.i64\t%e0, %e1, %f1"
1274 [(set_attr "type" "neon_add_q")]
1277 (define_expand "reduc_smin_scal_<mode>"
1278 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1279 (match_operand:VD 1 "s_register_operand")]
1280 "ARM_HAVE_NEON_<MODE>_ARITH"
1282 rtx vec = gen_reg_rtx (<MODE>mode);
1284 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1285 &gen_neon_vpsmin<mode>);
1286 /* The result is computed into every element of the vector. */
1287 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1291 (define_expand "reduc_smin_scal_<mode>"
1292 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1293 (match_operand:VQ 1 "s_register_operand")]
1294 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1296 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1298 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1299 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1304 (define_expand "reduc_smax_scal_<mode>"
1305 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1306 (match_operand:VD 1 "s_register_operand")]
1307 "ARM_HAVE_NEON_<MODE>_ARITH"
1309 rtx vec = gen_reg_rtx (<MODE>mode);
1310 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1311 &gen_neon_vpsmax<mode>);
1312 /* The result is computed into every element of the vector. */
1313 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1317 (define_expand "reduc_smax_scal_<mode>"
1318 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1319 (match_operand:VQ 1 "s_register_operand")]
1320 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1322 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1324 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1325 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1330 (define_expand "reduc_umin_scal_<mode>"
1331 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1332 (match_operand:VDI 1 "s_register_operand")]
1335 rtx vec = gen_reg_rtx (<MODE>mode);
1336 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1337 &gen_neon_vpumin<mode>);
1338 /* The result is computed into every element of the vector. */
1339 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1343 (define_expand "reduc_umin_scal_<mode>"
1344 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1345 (match_operand:VQI 1 "s_register_operand")]
1346 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1348 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1350 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1351 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1356 (define_expand "reduc_umax_scal_<mode>"
1357 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1358 (match_operand:VDI 1 "s_register_operand")]
1361 rtx vec = gen_reg_rtx (<MODE>mode);
1362 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1363 &gen_neon_vpumax<mode>);
1364 /* The result is computed into every element of the vector. */
1365 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1369 (define_expand "reduc_umax_scal_<mode>"
1370 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1371 (match_operand:VQI 1 "s_register_operand")]
1372 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1374 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1376 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1377 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1382 (define_insn "neon_vpadd_internal<mode>"
1383 [(set (match_operand:VD 0 "s_register_operand" "=w")
1384 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1385 (match_operand:VD 2 "s_register_operand" "w")]
1388 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1389 ;; Assume this schedules like vadd.
1391 (if_then_else (match_test "<Is_float_mode>")
1392 (const_string "neon_fp_reduc_add_s<q>")
1393 (const_string "neon_reduc_add<q>")))]
1396 (define_insn "neon_vpaddv4hf"
1398 (match_operand:V4HF 0 "s_register_operand" "=w")
1399 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1400 (match_operand:V4HF 2 "s_register_operand" "w")]
1402 "TARGET_NEON_FP16INST"
1403 "vpadd.f16\t%P0, %P1, %P2"
1404 [(set_attr "type" "neon_reduc_add")]
1407 (define_insn "neon_vpsmin<mode>"
1408 [(set (match_operand:VD 0 "s_register_operand" "=w")
1409 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1410 (match_operand:VD 2 "s_register_operand" "w")]
1413 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1415 (if_then_else (match_test "<Is_float_mode>")
1416 (const_string "neon_fp_reduc_minmax_s<q>")
1417 (const_string "neon_reduc_minmax<q>")))]
1420 (define_insn "neon_vpsmax<mode>"
1421 [(set (match_operand:VD 0 "s_register_operand" "=w")
1422 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1423 (match_operand:VD 2 "s_register_operand" "w")]
1426 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1428 (if_then_else (match_test "<Is_float_mode>")
1429 (const_string "neon_fp_reduc_minmax_s<q>")
1430 (const_string "neon_reduc_minmax<q>")))]
1433 (define_insn "neon_vpumin<mode>"
1434 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1435 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1436 (match_operand:VDI 2 "s_register_operand" "w")]
1439 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1440 [(set_attr "type" "neon_reduc_minmax<q>")]
1443 (define_insn "neon_vpumax<mode>"
1444 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1445 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1446 (match_operand:VDI 2 "s_register_operand" "w")]
1449 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1450 [(set_attr "type" "neon_reduc_minmax<q>")]
1453 ;; Saturating arithmetic
1455 ; NOTE: Neon supports many more saturating variants of instructions than the
1456 ; following, but these are all GCC currently understands.
1457 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1458 ; yet either, although these patterns may be used by intrinsics when they're
1461 (define_insn "*ss_add<mode>_neon"
1462 [(set (match_operand:VD 0 "s_register_operand" "=w")
1463 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1464 (match_operand:VD 2 "s_register_operand" "w")))]
1466 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1467 [(set_attr "type" "neon_qadd<q>")]
1470 (define_insn "*us_add<mode>_neon"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")))]
1475 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1476 [(set_attr "type" "neon_qadd<q>")]
1479 (define_insn "*ss_sub<mode>_neon"
1480 [(set (match_operand:VD 0 "s_register_operand" "=w")
1481 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1482 (match_operand:VD 2 "s_register_operand" "w")))]
1484 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1485 [(set_attr "type" "neon_qsub<q>")]
1488 (define_insn "*us_sub<mode>_neon"
1489 [(set (match_operand:VD 0 "s_register_operand" "=w")
1490 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1491 (match_operand:VD 2 "s_register_operand" "w")))]
1493 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1494 [(set_attr "type" "neon_qsub<q>")]
1497 (define_expand "vec_cmp<mode><v_cmp_result>"
1498 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
1499 (match_operator:<V_cmp_result> 1 "comparison_operator"
1500 [(match_operand:VDQW 2 "s_register_operand")
1501 (match_operand:VDQW 3 "reg_or_zero_operand")]))]
1502 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1504 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1505 operands[2], operands[3], false);
1509 (define_expand "vec_cmpu<mode><mode>"
1510 [(set (match_operand:VDQIW 0 "s_register_operand")
1511 (match_operator:VDQIW 1 "comparison_operator"
1512 [(match_operand:VDQIW 2 "s_register_operand")
1513 (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
1516 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1517 operands[2], operands[3], false);
1521 ;; Conditional instructions. These are comparisons with conditional moves for
1522 ;; vectors. They perform the assignment:
1524 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1526 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1529 (define_expand "vcond<mode><mode>"
1530 [(set (match_operand:VDQW 0 "s_register_operand")
1532 (match_operator 3 "comparison_operator"
1533 [(match_operand:VDQW 4 "s_register_operand")
1534 (match_operand:VDQW 5 "reg_or_zero_operand")])
1535 (match_operand:VDQW 1 "s_register_operand")
1536 (match_operand:VDQW 2 "s_register_operand")))]
1537 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1539 arm_expand_vcond (operands, <V_cmp_result>mode);
1543 (define_expand "vcond<V_cvtto><mode>"
1544 [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
1545 (if_then_else:<V_CVTTO>
1546 (match_operator 3 "comparison_operator"
1547 [(match_operand:V32 4 "s_register_operand")
1548 (match_operand:V32 5 "reg_or_zero_operand")])
1549 (match_operand:<V_CVTTO> 1 "s_register_operand")
1550 (match_operand:<V_CVTTO> 2 "s_register_operand")))]
1551 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1553 arm_expand_vcond (operands, <V_cmp_result>mode);
1557 (define_expand "vcondu<mode><v_cmp_result>"
1558 [(set (match_operand:VDQW 0 "s_register_operand")
1560 (match_operator 3 "arm_comparison_operator"
1561 [(match_operand:<V_cmp_result> 4 "s_register_operand")
1562 (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
1563 (match_operand:VDQW 1 "s_register_operand")
1564 (match_operand:VDQW 2 "s_register_operand")))]
1567 arm_expand_vcond (operands, <V_cmp_result>mode);
1571 (define_expand "vcond_mask_<mode><v_cmp_result>"
1572 [(set (match_operand:VDQW 0 "s_register_operand")
1574 (match_operand:<V_cmp_result> 3 "s_register_operand")
1575 (match_operand:VDQW 1 "s_register_operand")
1576 (match_operand:VDQW 2 "s_register_operand")))]
1579 emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
1584 ;; Patterns for builtins.
1586 ; good for plain vadd, vaddq.
1588 (define_expand "neon_vadd<mode>"
1589 [(match_operand:VCVTF 0 "s_register_operand")
1590 (match_operand:VCVTF 1 "s_register_operand")
1591 (match_operand:VCVTF 2 "s_register_operand")]
1594 if (ARM_HAVE_NEON_<MODE>_ARITH)
1595 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1597 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1602 (define_expand "neon_vadd<mode>"
1603 [(match_operand:VH 0 "s_register_operand")
1604 (match_operand:VH 1 "s_register_operand")
1605 (match_operand:VH 2 "s_register_operand")]
1606 "TARGET_NEON_FP16INST"
1608 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1612 (define_expand "neon_vsub<mode>"
1613 [(match_operand:VH 0 "s_register_operand")
1614 (match_operand:VH 1 "s_register_operand")
1615 (match_operand:VH 2 "s_register_operand")]
1616 "TARGET_NEON_FP16INST"
1618 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
1622 ; Note that NEON operations don't support the full IEEE 754 standard: in
1623 ; particular, denormal values are flushed to zero. This means that GCC cannot
1624 ; use those instructions for autovectorization, etc. unless
1625 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1626 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
1627 ; header) must work in either case: if -funsafe-math-optimizations is given,
1628 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1629 ; expand to unspecs (which may potentially limit the extent to which they might
1630 ; be optimized by generic code).
1632 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1634 (define_insn "neon_vadd<mode>_unspec"
1635 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1636 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1637 (match_operand:VCVTF 2 "s_register_operand" "w")]
1640 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1642 (if_then_else (match_test "<Is_float_mode>")
1643 (const_string "neon_fp_addsub_s<q>")
1644 (const_string "neon_add<q>")))]
1647 (define_insn "neon_vaddl<sup><mode>"
1648 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1649 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1650 (match_operand:VDI 2 "s_register_operand" "w")]
1653 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1654 [(set_attr "type" "neon_add_long")]
1657 (define_insn "neon_vaddw<sup><mode>"
1658 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1659 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1660 (match_operand:VDI 2 "s_register_operand" "w")]
1663 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1664 [(set_attr "type" "neon_add_widen")]
1669 (define_insn "neon_v<r>hadd<sup><mode>"
1670 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1671 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1672 (match_operand:VDQIW 2 "s_register_operand" "w")]
1675 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1676 [(set_attr "type" "neon_add_halve_q")]
1679 (define_insn "neon_vqadd<sup><mode>"
1680 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1681 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1682 (match_operand:VDQIX 2 "s_register_operand" "w")]
1685 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1686 [(set_attr "type" "neon_qadd<q>")]
1689 (define_insn "neon_v<r>addhn<mode>"
1690 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1691 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1692 (match_operand:VN 2 "s_register_operand" "w")]
1695 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1696 [(set_attr "type" "neon_add_halve_narrow_q")]
1699 ;; Polynomial and Float multiplication.
1700 (define_insn "neon_vmul<pf><mode>"
1701 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1702 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1703 (match_operand:VPF 2 "s_register_operand" "w")]
1706 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1708 (if_then_else (match_test "<Is_float_mode>")
1709 (const_string "neon_fp_mul_s<q>")
1710 (const_string "neon_mul_<V_elem_ch><q>")))]
1713 (define_insn "neon_vmulf<mode>"
1715 (match_operand:VH 0 "s_register_operand" "=w")
1717 (match_operand:VH 1 "s_register_operand" "w")
1718 (match_operand:VH 2 "s_register_operand" "w")))]
1719 "TARGET_NEON_FP16INST"
1720 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1721 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1724 (define_expand "neon_vmla<mode>"
1725 [(match_operand:VDQW 0 "s_register_operand")
1726 (match_operand:VDQW 1 "s_register_operand")
1727 (match_operand:VDQW 2 "s_register_operand")
1728 (match_operand:VDQW 3 "s_register_operand")]
1731 if (ARM_HAVE_NEON_<MODE>_ARITH)
1732 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1733 operands[2], operands[3]));
1735 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1736 operands[2], operands[3]));
1740 (define_expand "neon_vfma<VCVTF:mode>"
1741 [(match_operand:VCVTF 0 "s_register_operand")
1742 (match_operand:VCVTF 1 "s_register_operand")
1743 (match_operand:VCVTF 2 "s_register_operand")
1744 (match_operand:VCVTF 3 "s_register_operand")]
1745 "TARGET_NEON && TARGET_FMA"
1747 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1752 (define_expand "neon_vfma<VH:mode>"
1753 [(match_operand:VH 0 "s_register_operand")
1754 (match_operand:VH 1 "s_register_operand")
1755 (match_operand:VH 2 "s_register_operand")
1756 (match_operand:VH 3 "s_register_operand")]
1757 "TARGET_NEON_FP16INST"
1759 emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
1764 (define_expand "neon_vfms<VCVTF:mode>"
1765 [(match_operand:VCVTF 0 "s_register_operand")
1766 (match_operand:VCVTF 1 "s_register_operand")
1767 (match_operand:VCVTF 2 "s_register_operand")
1768 (match_operand:VCVTF 3 "s_register_operand")]
1769 "TARGET_NEON && TARGET_FMA"
1771 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1776 (define_expand "neon_vfms<VH:mode>"
1777 [(match_operand:VH 0 "s_register_operand")
1778 (match_operand:VH 1 "s_register_operand")
1779 (match_operand:VH 2 "s_register_operand")
1780 (match_operand:VH 3 "s_register_operand")]
1781 "TARGET_NEON_FP16INST"
1783 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1788 ;; The expand RTL structure here is not important.
1789 ;; We use the gen_* functions anyway.
1790 ;; We just need something to wrap the iterators around.
1792 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
1793 [(set (match_operand:VCVTF 0 "s_register_operand")
1795 [(match_operand:VCVTF 1 "s_register_operand")
1797 (match_operand:<VFML> 2 "s_register_operand")
1798 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
1801 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1802 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
1810 (define_insn "vfmal_low<mode>_intrinsic"
1811 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1814 (vec_select:<VFMLSEL>
1815 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1816 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1818 (vec_select:<VFMLSEL>
1819 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1820 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1821 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1823 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1824 [(set_attr "type" "neon_fp_mla_s<q>")]
1827 (define_insn "vfmsl_high<mode>_intrinsic"
1828 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1832 (vec_select:<VFMLSEL>
1833 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1834 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
1836 (vec_select:<VFMLSEL>
1837 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1838 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1839 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1841 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1842 [(set_attr "type" "neon_fp_mla_s<q>")]
1845 (define_insn "vfmal_high<mode>_intrinsic"
1846 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1849 (vec_select:<VFMLSEL>
1850 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1851 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1853 (vec_select:<VFMLSEL>
1854 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1855 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1856 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1858 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1859 [(set_attr "type" "neon_fp_mla_s<q>")]
1862 (define_insn "vfmsl_low<mode>_intrinsic"
1863 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1867 (vec_select:<VFMLSEL>
1868 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1869 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1871 (vec_select:<VFMLSEL>
1872 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1873 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1874 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1876 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1877 [(set_attr "type" "neon_fp_mla_s<q>")]
1880 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
1881 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1883 [(match_operand:VCVTF 1 "s_register_operand")
1885 (match_operand:<VFML> 2 "s_register_operand")
1886 (match_operand:<VFML> 3 "s_register_operand"))
1887 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1890 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
1891 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1892 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
1893 (operands[0], operands[1],
1894 operands[2], operands[3],
1899 (define_insn "vfmal_lane_low<mode>_intrinsic"
1900 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1903 (vec_select:<VFMLSEL>
1904 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1905 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1907 (vec_duplicate:<VFMLSEL>
1909 (match_operand:<VFML> 3 "s_register_operand" "x")
1910 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1911 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1914 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1915 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1917 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1918 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
1922 operands[5] = GEN_INT (lane);
1923 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
1926 [(set_attr "type" "neon_fp_mla_s<q>")]
1929 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
1930 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1932 [(match_operand:VCVTF 1 "s_register_operand")
1934 (match_operand:<VFML> 2 "s_register_operand")
1935 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
1936 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1940 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
1941 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1942 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
1943 (operands[0], operands[1], operands[2], operands[3],
1948 ;; Used to implement the intrinsics:
1949 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1950 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1951 ;; Needs a bit of care to get the modes of the different sub-expressions right
1952 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1953 ;; S or D subregister to select the appropriate lane from.
1955 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
1956 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1959 (vec_select:<VFMLSEL>
1960 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1961 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1963 (vec_duplicate:<VFMLSEL>
1965 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1966 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1967 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1970 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1971 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1972 int new_lane = lane % elts_per_reg;
1973 int regdiff = lane / elts_per_reg;
1974 operands[5] = GEN_INT (new_lane);
1975 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
1976 because we want the print_operand code to print the appropriate
1977 S or D register prefix. */
1978 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1979 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
1980 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
1982 [(set_attr "type" "neon_fp_mla_s<q>")]
1985 ;; Used to implement the intrinsics:
1986 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1987 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1988 ;; Needs a bit of care to get the modes of the different sub-expressions right
1989 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1990 ;; S or D subregister to select the appropriate lane from.
1992 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
1993 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1996 (vec_select:<VFMLSEL>
1997 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1998 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2000 (vec_duplicate:<VFMLSEL>
2002 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2003 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2004 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2007 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2008 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2009 int new_lane = lane % elts_per_reg;
2010 int regdiff = lane / elts_per_reg;
2011 operands[5] = GEN_INT (new_lane);
2012 /* We re-create operands[3] in the halved VFMLSEL mode
2013 because we've calculated the correct half-width subreg to extract
2014 the lane from and we want to print *that* subreg instead. */
2015 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2016 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2018 [(set_attr "type" "neon_fp_mla_s<q>")]
2021 (define_insn "vfmal_lane_high<mode>_intrinsic"
2022 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2025 (vec_select:<VFMLSEL>
2026 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2027 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2029 (vec_duplicate:<VFMLSEL>
2031 (match_operand:<VFML> 3 "s_register_operand" "x")
2032 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2033 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2036 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2037 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2039 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2040 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2044 operands[5] = GEN_INT (lane);
2045 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2048 [(set_attr "type" "neon_fp_mla_s<q>")]
2051 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2052 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2056 (vec_select:<VFMLSEL>
2057 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2058 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2060 (vec_duplicate:<VFMLSEL>
2062 (match_operand:<VFML> 3 "s_register_operand" "x")
2063 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2064 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2067 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2068 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2070 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2071 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2075 operands[5] = GEN_INT (lane);
2076 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2079 [(set_attr "type" "neon_fp_mla_s<q>")]
2082 ;; Used to implement the intrinsics:
2083 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2084 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2085 ;; Needs a bit of care to get the modes of the different sub-expressions right
2086 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2087 ;; S or D subregister to select the appropriate lane from.
2089 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2090 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2094 (vec_select:<VFMLSEL>
2095 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2096 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2098 (vec_duplicate:<VFMLSEL>
2100 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2101 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2102 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2105 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2106 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2107 int new_lane = lane % elts_per_reg;
2108 int regdiff = lane / elts_per_reg;
2109 operands[5] = GEN_INT (new_lane);
2110 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2111 because we want the print_operand code to print the appropriate
2112 S or D register prefix. */
2113 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2114 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2115 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2117 [(set_attr "type" "neon_fp_mla_s<q>")]
2120 ;; Used to implement the intrinsics:
2121 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2122 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2123 ;; Needs a bit of care to get the modes of the different sub-expressions right
2124 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2125 ;; S or D subregister to select the appropriate lane from.
2127 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2128 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2132 (vec_select:<VFMLSEL>
2133 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2134 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2136 (vec_duplicate:<VFMLSEL>
2138 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2139 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2140 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2143 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2144 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2145 int new_lane = lane % elts_per_reg;
2146 int regdiff = lane / elts_per_reg;
2147 operands[5] = GEN_INT (new_lane);
2148 /* We re-create operands[3] in the halved VFMLSEL mode
2149 because we've calculated the correct half-width subreg to extract
2150 the lane from and we want to print *that* subreg instead. */
2151 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2152 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2154 [(set_attr "type" "neon_fp_mla_s<q>")]
2157 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2158 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2162 (vec_select:<VFMLSEL>
2163 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2164 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2166 (vec_duplicate:<VFMLSEL>
2168 (match_operand:<VFML> 3 "s_register_operand" "x")
2169 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2170 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2173 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2174 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2176 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2177 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2181 operands[5] = GEN_INT (lane);
2182 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2185 [(set_attr "type" "neon_fp_mla_s<q>")]
2188 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2190 (define_insn "neon_vmla<mode>_unspec"
2191 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2192 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2193 (match_operand:VDQW 2 "s_register_operand" "w")
2194 (match_operand:VDQW 3 "s_register_operand" "w")]
2197 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2199 (if_then_else (match_test "<Is_float_mode>")
2200 (const_string "neon_fp_mla_s<q>")
2201 (const_string "neon_mla_<V_elem_ch><q>")))]
2204 (define_insn "neon_vmlal<sup><mode>"
2205 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2206 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2207 (match_operand:VW 2 "s_register_operand" "w")
2208 (match_operand:VW 3 "s_register_operand" "w")]
2211 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2212 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2215 (define_expand "neon_vmls<mode>"
2216 [(match_operand:VDQW 0 "s_register_operand")
2217 (match_operand:VDQW 1 "s_register_operand")
2218 (match_operand:VDQW 2 "s_register_operand")
2219 (match_operand:VDQW 3 "s_register_operand")]
2222 if (ARM_HAVE_NEON_<MODE>_ARITH)
2223 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2224 operands[1], operands[2], operands[3]));
2226 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2227 operands[2], operands[3]));
2231 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2233 (define_insn "neon_vmls<mode>_unspec"
2234 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2235 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2236 (match_operand:VDQW 2 "s_register_operand" "w")
2237 (match_operand:VDQW 3 "s_register_operand" "w")]
2240 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2242 (if_then_else (match_test "<Is_float_mode>")
2243 (const_string "neon_fp_mla_s<q>")
2244 (const_string "neon_mla_<V_elem_ch><q>")))]
2247 (define_insn "neon_vmlsl<sup><mode>"
2248 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2249 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2250 (match_operand:VW 2 "s_register_operand" "w")
2251 (match_operand:VW 3 "s_register_operand" "w")]
2254 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2255 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2258 ;; vqdmulh, vqrdmulh
2259 (define_insn "neon_vq<r>dmulh<mode>"
2260 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2261 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2262 (match_operand:VMDQI 2 "s_register_operand" "w")]
2265 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2266 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2269 ;; vqrdmlah, vqrdmlsh
2270 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2271 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2272 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2273 (match_operand:VMDQI 2 "s_register_operand" "w")
2274 (match_operand:VMDQI 3 "s_register_operand" "w")]
2277 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2278 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2281 (define_insn "neon_vqdmlal<mode>"
2282 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2283 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2284 (match_operand:VMDI 2 "s_register_operand" "w")
2285 (match_operand:VMDI 3 "s_register_operand" "w")]
2288 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2289 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2292 (define_insn "neon_vqdmlsl<mode>"
2293 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2294 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2295 (match_operand:VMDI 2 "s_register_operand" "w")
2296 (match_operand:VMDI 3 "s_register_operand" "w")]
2299 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2300 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2303 (define_insn "neon_vmull<sup><mode>"
2304 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2305 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2306 (match_operand:VW 2 "s_register_operand" "w")]
2309 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2310 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2313 (define_insn "neon_vqdmull<mode>"
2314 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2315 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2316 (match_operand:VMDI 2 "s_register_operand" "w")]
2319 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2320 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2323 (define_expand "neon_vsub<mode>"
2324 [(match_operand:VCVTF 0 "s_register_operand")
2325 (match_operand:VCVTF 1 "s_register_operand")
2326 (match_operand:VCVTF 2 "s_register_operand")]
2329 if (ARM_HAVE_NEON_<MODE>_ARITH)
2330 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2332 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2337 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2339 (define_insn "neon_vsub<mode>_unspec"
2340 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2341 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2342 (match_operand:VCVTF 2 "s_register_operand" "w")]
2345 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2347 (if_then_else (match_test "<Is_float_mode>")
2348 (const_string "neon_fp_addsub_s<q>")
2349 (const_string "neon_sub<q>")))]
2352 (define_insn "neon_vsubl<sup><mode>"
2353 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2354 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2355 (match_operand:VDI 2 "s_register_operand" "w")]
2358 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2359 [(set_attr "type" "neon_sub_long")]
2362 (define_insn "neon_vsubw<sup><mode>"
2363 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2364 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2365 (match_operand:VDI 2 "s_register_operand" "w")]
2368 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2369 [(set_attr "type" "neon_sub_widen")]
2372 (define_insn "neon_vqsub<sup><mode>"
2373 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2374 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2375 (match_operand:VDQIX 2 "s_register_operand" "w")]
2378 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2379 [(set_attr "type" "neon_qsub<q>")]
2382 (define_insn "neon_vhsub<sup><mode>"
2383 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2384 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2385 (match_operand:VDQIW 2 "s_register_operand" "w")]
2388 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2389 [(set_attr "type" "neon_sub_halve<q>")]
2392 (define_insn "neon_v<r>subhn<mode>"
2393 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2394 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2395 (match_operand:VN 2 "s_register_operand" "w")]
2398 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2399 [(set_attr "type" "neon_sub_halve_narrow_q")]
2402 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2403 ;; without unsafe math optimizations.
2404 (define_expand "@neon_vc<cmp_op><mode>"
2405 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2407 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2408 (match_operand:VDQW 2 "reg_or_zero_operand")))]
2411 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2413 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2414 && !flag_unsafe_math_optimizations)
2416 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2417 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2418 whereas this expander iterates over the integer modes as well,
2419 but we will never expand to UNSPECs for the integer comparisons. */
2423 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2428 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2437 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2444 (define_insn "@neon_vc<cmp_op><mode>_insn"
2445 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2447 (COMPARISONS:<V_cmp_result>
2448 (match_operand:VDQW 1 "s_register_operand" "w,w")
2449 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2450 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2451 && !flag_unsafe_math_optimizations)"
2454 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2456 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2457 ? "f" : "<cmp_type>",
2458 which_alternative == 0
2459 ? "%<V_reg>2" : "#0");
2460 output_asm_insn (pattern, operands);
2464 (if_then_else (match_operand 2 "zero_operand")
2465 (const_string "neon_compare_zero<q>")
2466 (const_string "neon_compare<q>")))]
2469 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2470 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2471 (unspec:<V_cmp_result>
2472 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2473 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2478 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2480 which_alternative == 0
2481 ? "%<V_reg>2" : "#0");
2482 output_asm_insn (pattern, operands);
2485 [(set_attr "type" "neon_fp_compare_s<q>")]
2488 (define_expand "@neon_vc<cmp_op><mode>"
2489 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2492 (match_operand:VH 1 "s_register_operand")
2493 (match_operand:VH 2 "reg_or_zero_operand")))]
2494 "TARGET_NEON_FP16INST"
2496 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2498 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2499 && !flag_unsafe_math_optimizations)
2501 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2502 (operands[0], operands[1], operands[2]));
2505 (gen_neon_vc<cmp_op><mode>_fp16insn
2506 (operands[0], operands[1], operands[2]));
2510 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2511 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2513 (COMPARISONS:<V_cmp_result>
2514 (match_operand:VH 1 "s_register_operand" "w,w")
2515 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2516 "TARGET_NEON_FP16INST
2517 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2518 && !flag_unsafe_math_optimizations)"
2521 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2523 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2524 ? "f" : "<cmp_type>",
2525 which_alternative == 0
2526 ? "%<V_reg>2" : "#0");
2527 output_asm_insn (pattern, operands);
2531 (if_then_else (match_operand 2 "zero_operand")
2532 (const_string "neon_compare_zero<q>")
2533 (const_string "neon_compare<q>")))])
2535 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2537 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2538 (unspec:<V_cmp_result>
2539 [(match_operand:VH 1 "s_register_operand" "w,w")
2540 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2542 "TARGET_NEON_FP16INST"
2545 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2547 which_alternative == 0
2548 ? "%<V_reg>2" : "#0");
2549 output_asm_insn (pattern, operands);
2552 [(set_attr "type" "neon_fp_compare_s<q>")])
2554 (define_insn "@neon_vc<code><mode>"
2555 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2557 (GTUGEU:<V_cmp_result>
2558 (match_operand:VDQIW 1 "s_register_operand" "w")
2559 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2561 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2562 [(set_attr "type" "neon_compare<q>")]
2565 (define_expand "neon_vca<cmp_op><mode>"
2566 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2568 (GTGE:<V_cmp_result>
2569 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2570 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2573 if (flag_unsafe_math_optimizations)
2574 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2577 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2584 (define_insn "neon_vca<cmp_op><mode>_insn"
2585 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2587 (GTGE:<V_cmp_result>
2588 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2589 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2590 "TARGET_NEON && flag_unsafe_math_optimizations"
2591 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2592 [(set_attr "type" "neon_fp_compare_s<q>")]
2595 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2596 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2597 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2598 (match_operand:VCVTF 2 "s_register_operand" "w")]
2601 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2602 [(set_attr "type" "neon_fp_compare_s<q>")]
2605 (define_expand "neon_vca<cmp_op><mode>"
2607 (match_operand:<V_cmp_result> 0 "s_register_operand")
2609 (GLTE:<V_cmp_result>
2610 (abs:VH (match_operand:VH 1 "s_register_operand"))
2611 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2612 "TARGET_NEON_FP16INST"
2614 if (flag_unsafe_math_optimizations)
2615 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2616 (operands[0], operands[1], operands[2]));
2618 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2619 (operands[0], operands[1], operands[2]));
2623 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2625 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2627 (GLTE:<V_cmp_result>
2628 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2629 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2630 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2631 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2632 [(set_attr "type" "neon_fp_compare_s<q>")]
2635 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2636 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2637 (unspec:<V_cmp_result>
2638 [(match_operand:VH 1 "s_register_operand" "w")
2639 (match_operand:VH 2 "s_register_operand" "w")]
2642 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2643 [(set_attr "type" "neon_fp_compare_s<q>")]
2646 (define_expand "neon_vc<cmp_op>z<mode>"
2648 (match_operand:<V_cmp_result> 0 "s_register_operand")
2649 (COMPARISONS:<V_cmp_result>
2650 (match_operand:VH 1 "s_register_operand")
2652 "TARGET_NEON_FP16INST"
2654 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2655 CONST0_RTX (<MODE>mode)));
2659 (define_insn "neon_vtst<mode>"
2660 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2661 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2662 (match_operand:VDQIW 2 "s_register_operand" "w")]
2665 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2666 [(set_attr "type" "neon_tst<q>")]
2669 (define_insn "neon_vabd<sup><mode>"
2670 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2671 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2672 (match_operand:VDQIW 2 "s_register_operand" "w")]
2675 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2676 [(set_attr "type" "neon_abd<q>")]
2679 (define_insn "neon_vabd<mode>"
2680 [(set (match_operand:VH 0 "s_register_operand" "=w")
2681 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2682 (match_operand:VH 2 "s_register_operand" "w")]
2684 "TARGET_NEON_FP16INST"
2685 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2686 [(set_attr "type" "neon_abd<q>")]
2689 (define_insn "neon_vabdf<mode>"
2690 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2691 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2692 (match_operand:VCVTF 2 "s_register_operand" "w")]
2695 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2696 [(set_attr "type" "neon_fp_abd_s<q>")]
2699 (define_insn "neon_vabdl<sup><mode>"
2700 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2701 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2702 (match_operand:VW 2 "s_register_operand" "w")]
2705 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2706 [(set_attr "type" "neon_abd_long")]
2709 (define_insn "neon_vaba<sup><mode>"
2710 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2711 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2712 (match_operand:VDQIW 3 "s_register_operand" "w")]
2714 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2716 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2717 [(set_attr "type" "neon_arith_acc<q>")]
2720 (define_insn "neon_vabal<sup><mode>"
2721 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2722 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2723 (match_operand:VW 3 "s_register_operand" "w")]
2725 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2727 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2728 [(set_attr "type" "neon_arith_acc<q>")]
2731 (define_expand "<sup>sadv16qi"
2732 [(use (match_operand:V4SI 0 "register_operand"))
2733 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
2734 (use (match_operand:V16QI 2 "register_operand"))] VABAL)
2735 (use (match_operand:V4SI 3 "register_operand"))]
2738 rtx reduc = gen_reg_rtx (V8HImode);
2739 rtx op1_highpart = gen_reg_rtx (V8QImode);
2740 rtx op2_highpart = gen_reg_rtx (V8QImode);
2742 emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
2743 gen_lowpart (V8QImode, operands[1]),
2744 gen_lowpart (V8QImode, operands[2])));
2746 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
2747 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
2748 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
2749 op1_highpart, op2_highpart));
2750 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
2752 emit_move_insn (operands[0], operands[3]);
2757 (define_insn "neon_v<maxmin><sup><mode>"
2758 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2759 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2760 (match_operand:VDQIW 2 "s_register_operand" "w")]
2763 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2764 [(set_attr "type" "neon_minmax<q>")]
2767 (define_insn "neon_v<maxmin>f<mode>"
2768 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2769 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2770 (match_operand:VCVTF 2 "s_register_operand" "w")]
2773 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2774 [(set_attr "type" "neon_fp_minmax_s<q>")]
2777 (define_insn "neon_v<maxmin>f<mode>"
2778 [(set (match_operand:VH 0 "s_register_operand" "=w")
2780 [(match_operand:VH 1 "s_register_operand" "w")
2781 (match_operand:VH 2 "s_register_operand" "w")]
2783 "TARGET_NEON_FP16INST"
2784 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2785 [(set_attr "type" "neon_fp_minmax_s<q>")]
2788 (define_insn "neon_vp<maxmin>fv4hf"
2789 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2791 [(match_operand:V4HF 1 "s_register_operand" "w")
2792 (match_operand:V4HF 2 "s_register_operand" "w")]
2794 "TARGET_NEON_FP16INST"
2795 "vp<maxmin>.f16\t%P0, %P1, %P2"
2796 [(set_attr "type" "neon_reduc_minmax")]
2799 (define_insn "neon_<fmaxmin_op><mode>"
2801 (match_operand:VH 0 "s_register_operand" "=w")
2803 [(match_operand:VH 1 "s_register_operand" "w")
2804 (match_operand:VH 2 "s_register_operand" "w")]
2806 "TARGET_NEON_FP16INST"
2807 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2808 [(set_attr "type" "neon_fp_minmax_s<q>")]
2811 ;; v<maxmin>nm intrinsics.
2812 (define_insn "neon_<fmaxmin_op><mode>"
2813 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2814 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2815 (match_operand:VCVTF 2 "s_register_operand" "w")]
2817 "TARGET_NEON && TARGET_VFP5"
2818 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2819 [(set_attr "type" "neon_fp_minmax_s<q>")]
2822 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2823 (define_insn "<fmaxmin><mode>3"
2824 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2825 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2826 (match_operand:VCVTF 2 "s_register_operand" "w")]
2828 "TARGET_NEON && TARGET_VFP5"
2829 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2830 [(set_attr "type" "neon_fp_minmax_s<q>")]
2833 (define_expand "neon_vpadd<mode>"
2834 [(match_operand:VD 0 "s_register_operand")
2835 (match_operand:VD 1 "s_register_operand")
2836 (match_operand:VD 2 "s_register_operand")]
2839 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2844 (define_insn "neon_vpaddl<sup><mode>"
2845 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2846 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2849 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2850 [(set_attr "type" "neon_reduc_add_long")]
2853 (define_insn "neon_vpadal<sup><mode>"
2854 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2855 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2856 (match_operand:VDQIW 2 "s_register_operand" "w")]
2859 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2860 [(set_attr "type" "neon_reduc_add_acc")]
2863 (define_insn "neon_vp<maxmin><sup><mode>"
2864 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2865 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2866 (match_operand:VDI 2 "s_register_operand" "w")]
2869 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2870 [(set_attr "type" "neon_reduc_minmax<q>")]
2873 (define_insn "neon_vp<maxmin>f<mode>"
2874 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2875 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2876 (match_operand:VCVTF 2 "s_register_operand" "w")]
2879 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2880 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2883 (define_insn "neon_vrecps<mode>"
2884 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2885 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2886 (match_operand:VCVTF 2 "s_register_operand" "w")]
2889 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2890 [(set_attr "type" "neon_fp_recps_s<q>")]
2893 (define_insn "neon_vrecps<mode>"
2895 (match_operand:VH 0 "s_register_operand" "=w")
2896 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2897 (match_operand:VH 2 "s_register_operand" "w")]
2899 "TARGET_NEON_FP16INST"
2900 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2901 [(set_attr "type" "neon_fp_recps_s<q>")]
2904 (define_insn "neon_vrsqrts<mode>"
2905 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2906 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2907 (match_operand:VCVTF 2 "s_register_operand" "w")]
2910 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2911 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2914 (define_insn "neon_vrsqrts<mode>"
2916 (match_operand:VH 0 "s_register_operand" "=w")
2917 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2918 (match_operand:VH 2 "s_register_operand" "w")]
2920 "TARGET_NEON_FP16INST"
2921 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2922 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2925 (define_expand "neon_vabs<mode>"
2926 [(match_operand:VDQW 0 "s_register_operand")
2927 (match_operand:VDQW 1 "s_register_operand")]
2930 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2934 (define_insn "neon_vqabs<mode>"
2935 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2936 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2939 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2940 [(set_attr "type" "neon_qabs<q>")]
2943 (define_insn "neon_bswap<mode>"
2944 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2945 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2947 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2948 [(set_attr "type" "neon_rev<q>")]
2951 (define_expand "neon_vneg<mode>"
2952 [(match_operand:VDQW 0 "s_register_operand")
2953 (match_operand:VDQW 1 "s_register_operand")]
2956 emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1]));
2961 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
2962 ;; fact that their usage need to guarantee that the source vectors are
2963 ;; contiguous. It would be wrong to describe the operation without being able
2964 ;; to describe the permute that is also required, but even if that is done
2965 ;; the permute would have been created as a LOAD_LANES which means the values
2966 ;; in the registers are in the wrong order.
2967 (define_insn "neon_vcadd<rot><mode>"
2968 [(set (match_operand:VF 0 "register_operand" "=w")
2969 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
2970 (match_operand:VF 2 "register_operand" "w")]
2973 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
2974 [(set_attr "type" "neon_fcadd")]
2977 (define_insn "neon_vcmla<rot><mode>"
2978 [(set (match_operand:VF 0 "register_operand" "=w")
2979 (plus:VF (match_operand:VF 1 "register_operand" "0")
2980 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
2981 (match_operand:VF 3 "register_operand" "w")]
2984 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
2985 [(set_attr "type" "neon_fcmla")]
2988 (define_insn "neon_vcmla_lane<rot><mode>"
2989 [(set (match_operand:VF 0 "s_register_operand" "=w")
2990 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
2991 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
2992 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
2993 (match_operand:SI 4 "const_int_operand" "n")]
2997 operands = neon_vcmla_lane_prepare_operands (operands);
2998 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3000 [(set_attr "type" "neon_fcmla")]
3003 (define_insn "neon_vcmla_laneq<rot><mode>"
3004 [(set (match_operand:VDF 0 "s_register_operand" "=w")
3005 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3006 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3007 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3008 (match_operand:SI 4 "const_int_operand" "n")]
3012 operands = neon_vcmla_lane_prepare_operands (operands);
3013 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3015 [(set_attr "type" "neon_fcmla")]
3018 (define_insn "neon_vcmlaq_lane<rot><mode>"
3019 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3020 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3021 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3022 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3023 (match_operand:SI 4 "const_int_operand" "n")]
3027 operands = neon_vcmla_lane_prepare_operands (operands);
3028 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3030 [(set_attr "type" "neon_fcmla")]
3034 ;; These instructions map to the __builtins for the Dot Product operations.
3035 (define_insn "neon_<sup>dot<vsi2qi>"
3036 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3037 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3038 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3039 "register_operand" "w")
3040 (match_operand:<VSI2QI> 3
3041 "register_operand" "w")]
3044 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3045 [(set_attr "type" "neon_dot<q>")]
3048 ;; These instructions map to the __builtins for the Dot Product operations.
3049 (define_insn "neon_usdot<vsi2qi>"
3050 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3053 [(match_operand:<VSI2QI> 2 "register_operand" "w")
3054 (match_operand:<VSI2QI> 3 "register_operand" "w")]
3056 (match_operand:VCVTI 1 "register_operand" "0")))]
3058 "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3059 [(set_attr "type" "neon_dot<q>")]
3062 ;; These instructions map to the __builtins for the Dot Product
3063 ;; indexed operations.
3064 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3065 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3066 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3067 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3068 "register_operand" "w")
3069 (match_operand:V8QI 3 "register_operand" "t")
3070 (match_operand:SI 4 "immediate_operand" "i")]
3075 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3076 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3078 [(set_attr "type" "neon_dot<q>")]
3081 ;; These instructions map to the __builtins for the Dot Product
3082 ;; indexed operations in the v8.6 I8MM extension.
3083 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3084 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3087 [(match_operand:<VSI2QI> 2 "register_operand" "w")
3088 (match_operand:V8QI 3 "register_operand" "t")
3089 (match_operand:SI 4 "immediate_operand" "i")]
3091 (match_operand:VCVTI 1 "register_operand" "0")))]
3094 operands[4] = GEN_INT (INTVAL (operands[4]));
3095 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3097 [(set_attr "type" "neon_dot<q>")]
3100 ;; These expands map to the Dot Product optab the vectorizer checks for.
3101 ;; The auto-vectorizer expects a dot product builtin that also does an
3102 ;; accumulation into the provided register.
3103 ;; Given the following pattern
3105 ;; for (i=0; i<len; i++) {
3111 ;; This can be auto-vectorized to
3112 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3114 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3115 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3116 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3119 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3120 (define_expand "<sup>dot_prod<vsi2qi>"
3121 [(set (match_operand:VCVTI 0 "register_operand")
3122 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3124 (match_operand:<VSI2QI> 2
3125 "register_operand")]
3127 (match_operand:VCVTI 3 "register_operand")))]
3131 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3133 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3137 (define_expand "neon_copysignf<mode>"
3138 [(match_operand:VCVTF 0 "register_operand")
3139 (match_operand:VCVTF 1 "register_operand")
3140 (match_operand:VCVTF 2 "register_operand")]
3144 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3145 rtx c = gen_int_mode (0x80000000, SImode);
3147 emit_move_insn (v_bitmask,
3148 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3149 emit_move_insn (operands[0], operands[2]);
3150 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3151 <VCVTF:V_cmp_result>mode, 0);
3152 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3159 (define_insn "neon_vqneg<mode>"
3160 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3161 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3164 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3165 [(set_attr "type" "neon_qneg<q>")]
3168 (define_insn "neon_vcls<mode>"
3169 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3170 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3173 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3174 [(set_attr "type" "neon_cls<q>")]
3177 (define_insn "clz<mode>2"
3178 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3179 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3181 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3182 [(set_attr "type" "neon_cnt<q>")]
3185 (define_expand "neon_vclz<mode>"
3186 [(match_operand:VDQIW 0 "s_register_operand")
3187 (match_operand:VDQIW 1 "s_register_operand")]
3190 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3194 (define_insn "popcount<mode>2"
3195 [(set (match_operand:VE 0 "s_register_operand" "=w")
3196 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3198 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3199 [(set_attr "type" "neon_cnt<q>")]
3202 (define_expand "neon_vcnt<mode>"
3203 [(match_operand:VE 0 "s_register_operand")
3204 (match_operand:VE 1 "s_register_operand")]
3207 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3211 (define_insn "neon_vrecpe<mode>"
3212 [(set (match_operand:VH 0 "s_register_operand" "=w")
3213 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3215 "TARGET_NEON_FP16INST"
3216 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3217 [(set_attr "type" "neon_fp_recpe_s<q>")]
3220 (define_insn "neon_vrecpe<mode>"
3221 [(set (match_operand:V32 0 "s_register_operand" "=w")
3222 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3225 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3226 [(set_attr "type" "neon_fp_recpe_s<q>")]
3229 (define_insn "neon_vrsqrte<mode>"
3230 [(set (match_operand:V32 0 "s_register_operand" "=w")
3231 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3234 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3235 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3238 (define_expand "neon_vmvn<mode>"
3239 [(match_operand:VDQIW 0 "s_register_operand")
3240 (match_operand:VDQIW 1 "s_register_operand")]
3243 emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1]));
3247 (define_insn "neon_vget_lane<mode>_sext_internal"
3248 [(set (match_operand:SI 0 "s_register_operand" "=r")
3250 (vec_select:<V_elem>
3251 (match_operand:VD 1 "s_register_operand" "w")
3252 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3255 if (BYTES_BIG_ENDIAN)
3257 int elt = INTVAL (operands[2]);
3258 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3259 operands[2] = GEN_INT (elt);
3261 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3263 [(set_attr "type" "neon_to_gp")]
3266 (define_insn "neon_vget_lane<mode>_zext_internal"
3267 [(set (match_operand:SI 0 "s_register_operand" "=r")
3269 (vec_select:<V_elem>
3270 (match_operand:VD 1 "s_register_operand" "w")
3271 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3274 if (BYTES_BIG_ENDIAN)
3276 int elt = INTVAL (operands[2]);
3277 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3278 operands[2] = GEN_INT (elt);
3280 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3282 [(set_attr "type" "neon_to_gp")]
3285 (define_insn "neon_vget_lane<mode>_sext_internal"
3286 [(set (match_operand:SI 0 "s_register_operand" "=r")
3288 (vec_select:<V_elem>
3289 (match_operand:VQ2 1 "s_register_operand" "w")
3290 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3294 int regno = REGNO (operands[1]);
3295 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3296 unsigned int elt = INTVAL (operands[2]);
3297 unsigned int elt_adj = elt % halfelts;
3299 if (BYTES_BIG_ENDIAN)
3300 elt_adj = halfelts - 1 - elt_adj;
3302 ops[0] = operands[0];
3303 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3304 ops[2] = GEN_INT (elt_adj);
3305 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3309 [(set_attr "type" "neon_to_gp_q")]
3312 (define_insn "neon_vget_lane<mode>_zext_internal"
3313 [(set (match_operand:SI 0 "s_register_operand" "=r")
3315 (vec_select:<V_elem>
3316 (match_operand:VQ2 1 "s_register_operand" "w")
3317 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3321 int regno = REGNO (operands[1]);
3322 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3323 unsigned int elt = INTVAL (operands[2]);
3324 unsigned int elt_adj = elt % halfelts;
3326 if (BYTES_BIG_ENDIAN)
3327 elt_adj = halfelts - 1 - elt_adj;
3329 ops[0] = operands[0];
3330 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3331 ops[2] = GEN_INT (elt_adj);
3332 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3336 [(set_attr "type" "neon_to_gp_q")]
3339 (define_expand "neon_vget_lane<mode>"
3340 [(match_operand:<V_ext> 0 "s_register_operand")
3341 (match_operand:VDQW 1 "s_register_operand")
3342 (match_operand:SI 2 "immediate_operand")]
3345 if (BYTES_BIG_ENDIAN)
3347 /* The intrinsics are defined in terms of a model where the
3348 element ordering in memory is vldm order, whereas the generic
3349 RTL is defined in terms of a model where the element ordering
3350 in memory is array order. Convert the lane number to conform
3352 unsigned int elt = INTVAL (operands[2]);
3353 unsigned int reg_nelts
3354 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3355 elt ^= reg_nelts - 1;
3356 operands[2] = GEN_INT (elt);
3359 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3360 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3363 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3369 (define_expand "neon_vget_laneu<mode>"
3370 [(match_operand:<V_ext> 0 "s_register_operand")
3371 (match_operand:VDQIW 1 "s_register_operand")
3372 (match_operand:SI 2 "immediate_operand")]
3375 if (BYTES_BIG_ENDIAN)
3377 /* The intrinsics are defined in terms of a model where the
3378 element ordering in memory is vldm order, whereas the generic
3379 RTL is defined in terms of a model where the element ordering
3380 in memory is array order. Convert the lane number to conform
3382 unsigned int elt = INTVAL (operands[2]);
3383 unsigned int reg_nelts
3384 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3385 elt ^= reg_nelts - 1;
3386 operands[2] = GEN_INT (elt);
3389 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3390 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3393 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3399 (define_expand "neon_vget_lanedi"
3400 [(match_operand:DI 0 "s_register_operand")
3401 (match_operand:DI 1 "s_register_operand")
3402 (match_operand:SI 2 "immediate_operand")]
3405 emit_move_insn (operands[0], operands[1]);
3409 (define_expand "neon_vget_lanev2di"
3410 [(match_operand:DI 0 "s_register_operand")
3411 (match_operand:V2DI 1 "s_register_operand")
3412 (match_operand:SI 2 "immediate_operand")]
3417 if (BYTES_BIG_ENDIAN)
3419 /* The intrinsics are defined in terms of a model where the
3420 element ordering in memory is vldm order, whereas the generic
3421 RTL is defined in terms of a model where the element ordering
3422 in memory is array order. Convert the lane number to conform
3424 unsigned int elt = INTVAL (operands[2]);
3425 unsigned int reg_nelts = 2;
3426 elt ^= reg_nelts - 1;
3427 operands[2] = GEN_INT (elt);
3430 lane = INTVAL (operands[2]);
3431 gcc_assert ((lane ==0) || (lane == 1));
3432 emit_move_insn (operands[0], lane == 0
3433 ? gen_lowpart (DImode, operands[1])
3434 : gen_highpart (DImode, operands[1]));
3438 (define_expand "neon_vset_lane<mode>"
3439 [(match_operand:VDQ 0 "s_register_operand")
3440 (match_operand:<V_elem> 1 "s_register_operand")
3441 (match_operand:VDQ 2 "s_register_operand")
3442 (match_operand:SI 3 "immediate_operand")]
3445 unsigned int elt = INTVAL (operands[3]);
3447 if (BYTES_BIG_ENDIAN)
3449 unsigned int reg_nelts
3450 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3451 elt ^= reg_nelts - 1;
3454 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3455 GEN_INT (1 << elt), operands[2]));
3459 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3461 (define_expand "neon_vset_lanedi"
3462 [(match_operand:DI 0 "s_register_operand")
3463 (match_operand:DI 1 "s_register_operand")
3464 (match_operand:DI 2 "s_register_operand")
3465 (match_operand:SI 3 "immediate_operand")]
3468 emit_move_insn (operands[0], operands[1]);
3472 (define_expand "neon_vcreate<mode>"
3473 [(match_operand:VD_RE 0 "s_register_operand")
3474 (match_operand:DI 1 "general_operand")]
3477 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3478 emit_move_insn (operands[0], src);
3482 (define_insn "neon_vdup_n<mode>"
3483 [(set (match_operand:VX 0 "s_register_operand" "=w")
3484 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3486 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3487 [(set_attr "type" "neon_from_gp<q>")]
3490 (define_insn "neon_vdup_nv4hf"
3491 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3492 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3495 [(set_attr "type" "neon_from_gp")]
3498 (define_insn "neon_vdup_nv8hf"
3499 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3500 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3503 [(set_attr "type" "neon_from_gp_q")]
3506 (define_insn "neon_vdup_nv4bf"
3507 [(set (match_operand:V4BF 0 "s_register_operand" "=w")
3508 (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))]
3511 [(set_attr "type" "neon_from_gp")]
3514 (define_insn "neon_vdup_nv8bf"
3515 [(set (match_operand:V8BF 0 "s_register_operand" "=w")
3516 (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))]
3519 [(set_attr "type" "neon_from_gp_q")]
3522 (define_insn "neon_vdup_n<mode>"
3523 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3524 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3527 vdup.<V_sz_elem>\t%<V_reg>0, %1
3528 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3529 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3532 (define_expand "neon_vdup_ndi"
3533 [(match_operand:DI 0 "s_register_operand")
3534 (match_operand:DI 1 "s_register_operand")]
3537 emit_move_insn (operands[0], operands[1]);
3542 (define_insn "neon_vdup_nv2di"
3543 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3544 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3547 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3548 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3549 [(set_attr "length" "8")
3550 (set_attr "type" "multiple")]
3553 (define_insn "neon_vdup_lane<mode>_internal"
3554 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3556 (vec_select:<V_elem>
3557 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3558 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3561 if (BYTES_BIG_ENDIAN)
3563 int elt = INTVAL (operands[2]);
3564 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3565 operands[2] = GEN_INT (elt);
3568 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3570 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3572 [(set_attr "type" "neon_dup<q>")]
3575 (define_insn "neon_vdup_lane<mode>_internal"
3576 [(set (match_operand:VHFBF 0 "s_register_operand" "=w")
3577 (vec_duplicate:VHFBF
3578 (vec_select:<V_elem>
3579 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3580 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3581 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3583 if (BYTES_BIG_ENDIAN)
3585 int elt = INTVAL (operands[2]);
3586 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3587 operands[2] = GEN_INT (elt);
3590 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3592 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3594 [(set_attr "type" "neon_dup<q>")]
3597 (define_expand "neon_vdup_lane<mode>"
3598 [(match_operand:VDQW 0 "s_register_operand")
3599 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3600 (match_operand:SI 2 "immediate_operand")]
3603 if (BYTES_BIG_ENDIAN)
3605 unsigned int elt = INTVAL (operands[2]);
3606 unsigned int reg_nelts
3607 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3608 elt ^= reg_nelts - 1;
3609 operands[2] = GEN_INT (elt);
3611 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3616 (define_expand "neon_vdup_lane<mode>"
3617 [(match_operand:VHFBF 0 "s_register_operand")
3618 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3619 (match_operand:SI 2 "immediate_operand")]
3620 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3622 if (BYTES_BIG_ENDIAN)
3624 unsigned int elt = INTVAL (operands[2]);
3625 unsigned int reg_nelts
3626 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3627 elt ^= reg_nelts - 1;
3628 operands[2] = GEN_INT (elt);
3630 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3635 ; Scalar index is ignored, since only zero is valid here.
3636 (define_expand "neon_vdup_lanedi"
3637 [(match_operand:DI 0 "s_register_operand")
3638 (match_operand:DI 1 "s_register_operand")
3639 (match_operand:SI 2 "immediate_operand")]
3642 emit_move_insn (operands[0], operands[1]);
3646 ; Likewise for v2di, as the DImode second operand has only a single element.
3647 (define_expand "neon_vdup_lanev2di"
3648 [(match_operand:V2DI 0 "s_register_operand")
3649 (match_operand:DI 1 "s_register_operand")
3650 (match_operand:SI 2 "immediate_operand")]
3653 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3657 ; Disabled before reload because we don't want combine doing something silly,
3658 ; but used by the post-reload expansion of neon_vcombine.
3659 (define_insn "*neon_vswp<mode>"
3660 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3661 (match_operand:VDQX 1 "s_register_operand" "+w"))
3662 (set (match_dup 1) (match_dup 0))]
3663 "TARGET_NEON && reload_completed"
3664 "vswp\t%<V_reg>0, %<V_reg>1"
3665 [(set_attr "type" "neon_permute<q>")]
3668 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3670 ;; FIXME: A different implementation of this builtin could make it much
3671 ;; more likely that we wouldn't actually need to output anything (we could make
3672 ;; it so that the reg allocator puts things in the right places magically
3673 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3675 (define_insn_and_split "neon_vcombine<mode>"
3676 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3677 (vec_concat:<V_DOUBLE>
3678 (match_operand:VDX 1 "s_register_operand" "w")
3679 (match_operand:VDX 2 "s_register_operand" "w")))]
3682 "&& reload_completed"
3685 neon_split_vcombine (operands);
3688 [(set_attr "type" "multiple")]
3691 (define_expand "neon_vget_high<mode>"
3692 [(match_operand:<V_HALF> 0 "s_register_operand")
3693 (match_operand:VQXBF 1 "s_register_operand")]
3696 emit_move_insn (operands[0],
3697 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3698 GET_MODE_SIZE (<V_HALF>mode)));
3702 (define_expand "neon_vget_low<mode>"
3703 [(match_operand:<V_HALF> 0 "s_register_operand")
3704 (match_operand:VQX 1 "s_register_operand")]
3707 emit_move_insn (operands[0],
3708 simplify_gen_subreg (<V_HALF>mode, operands[1],
3713 (define_insn "float<mode><V_cvtto>2"
3714 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3715 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3716 "TARGET_NEON && !flag_rounding_math"
3717 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3718 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3721 (define_insn "floatuns<mode><V_cvtto>2"
3722 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3723 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3724 "TARGET_NEON && !flag_rounding_math"
3725 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3726 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3729 (define_insn "fix_trunc<mode><V_cvtto>2"
3730 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3731 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3733 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3734 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3737 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3738 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3739 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3741 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3742 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3745 (define_insn "neon_vcvt<sup><mode>"
3746 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3747 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3750 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3751 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3754 (define_insn "neon_vcvt<sup><mode>"
3755 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3756 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3759 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3760 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3763 (define_insn "neon_vcvtv4sfv4hf"
3764 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3765 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3767 "TARGET_NEON && TARGET_FP16"
3768 "vcvt.f32.f16\t%q0, %P1"
3769 [(set_attr "type" "neon_fp_cvt_widen_h")]
3772 (define_insn "neon_vcvtv4hfv4sf"
3773 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3774 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3776 "TARGET_NEON && TARGET_FP16"
3777 "vcvt.f16.f32\t%P0, %q1"
3778 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3781 (define_insn "neon_vcvt<sup><mode>"
3783 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3785 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3787 "TARGET_NEON_FP16INST"
3788 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3789 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3792 (define_insn "neon_vcvt<sup><mode>"
3794 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3796 [(match_operand:VH 1 "s_register_operand" "w")]
3798 "TARGET_NEON_FP16INST"
3799 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3800 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3803 (define_insn "neon_vcvt<sup>_n<mode>"
3804 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3805 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3806 (match_operand:SI 2 "immediate_operand" "i")]
3810 arm_const_bounds (operands[2], 1, 33);
3811 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3813 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3816 (define_insn "neon_vcvt<sup>_n<mode>"
3817 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3819 [(match_operand:VH 1 "s_register_operand" "w")
3820 (match_operand:SI 2 "immediate_operand" "i")]
3822 "TARGET_NEON_FP16INST"
3824 arm_const_bounds (operands[2], 0, 17);
3825 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3827 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3830 (define_insn "neon_vcvt<sup>_n<mode>"
3831 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3832 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3833 (match_operand:SI 2 "immediate_operand" "i")]
3837 arm_const_bounds (operands[2], 1, 33);
3838 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3840 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3843 (define_insn "neon_vcvt<sup>_n<mode>"
3844 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3846 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3847 (match_operand:SI 2 "immediate_operand" "i")]
3849 "TARGET_NEON_FP16INST"
3851 arm_const_bounds (operands[2], 0, 17);
3852 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3854 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3857 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3859 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3861 [(match_operand:VH 1 "s_register_operand" "w")]
3863 "TARGET_NEON_FP16INST"
3864 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3865 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3868 (define_insn "neon_vmovn<mode>"
3869 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3870 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3873 "vmovn.<V_if_elem>\t%P0, %q1"
3874 [(set_attr "type" "neon_shift_imm_narrow_q")]
3877 (define_insn "neon_vqmovn<sup><mode>"
3878 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3879 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3882 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3883 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3886 (define_insn "neon_vqmovun<mode>"
3887 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3888 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3891 "vqmovun.<V_s_elem>\t%P0, %q1"
3892 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3895 (define_insn "neon_vmovl<sup><mode>"
3896 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3897 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3900 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3901 [(set_attr "type" "neon_shift_imm_long")]
3904 (define_insn "neon_vmul_lane<mode>"
3905 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3906 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3907 (match_operand:VMD 2 "s_register_operand"
3908 "<scalar_mul_constraint>")
3909 (match_operand:SI 3 "immediate_operand" "i")]
3913 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3916 (if_then_else (match_test "<Is_float_mode>")
3917 (const_string "neon_fp_mul_s_scalar<q>")
3918 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3921 (define_insn "neon_vmul_lane<mode>"
3922 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3923 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3924 (match_operand:<V_HALF> 2 "s_register_operand"
3925 "<scalar_mul_constraint>")
3926 (match_operand:SI 3 "immediate_operand" "i")]
3930 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3933 (if_then_else (match_test "<Is_float_mode>")
3934 (const_string "neon_fp_mul_s_scalar<q>")
3935 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3938 (define_insn "neon_vmul_lane<mode>"
3939 [(set (match_operand:VH 0 "s_register_operand" "=w")
3940 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3941 (match_operand:V4HF 2 "s_register_operand"
3942 "<scalar_mul_constraint>")
3943 (match_operand:SI 3 "immediate_operand" "i")]
3945 "TARGET_NEON_FP16INST"
3946 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3947 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3950 (define_insn "neon_vmull<sup>_lane<mode>"
3951 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3952 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3953 (match_operand:VMDI 2 "s_register_operand"
3954 "<scalar_mul_constraint>")
3955 (match_operand:SI 3 "immediate_operand" "i")]
3959 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3961 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3964 (define_insn "neon_vqdmull_lane<mode>"
3965 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3966 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3967 (match_operand:VMDI 2 "s_register_operand"
3968 "<scalar_mul_constraint>")
3969 (match_operand:SI 3 "immediate_operand" "i")]
3970 UNSPEC_VQDMULL_LANE))]
3973 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3975 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3978 (define_insn "neon_vq<r>dmulh_lane<mode>"
3979 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3980 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3981 (match_operand:<V_HALF> 2 "s_register_operand"
3982 "<scalar_mul_constraint>")
3983 (match_operand:SI 3 "immediate_operand" "i")]
3987 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3989 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3992 (define_insn "neon_vq<r>dmulh_lane<mode>"
3993 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3994 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3995 (match_operand:VMDI 2 "s_register_operand"
3996 "<scalar_mul_constraint>")
3997 (match_operand:SI 3 "immediate_operand" "i")]
4001 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4003 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4006 ;; vqrdmlah_lane, vqrdmlsh_lane
4007 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4008 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4009 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4010 (match_operand:VMQI 2 "s_register_operand" "w")
4011 (match_operand:<V_HALF> 3 "s_register_operand"
4012 "<scalar_mul_constraint>")
4013 (match_operand:SI 4 "immediate_operand" "i")]
4018 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4020 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4023 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4024 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4025 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4026 (match_operand:VMDI 2 "s_register_operand" "w")
4027 (match_operand:VMDI 3 "s_register_operand"
4028 "<scalar_mul_constraint>")
4029 (match_operand:SI 4 "immediate_operand" "i")]
4034 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4036 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4039 (define_insn "neon_vmla_lane<mode>"
4040 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4041 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4042 (match_operand:VMD 2 "s_register_operand" "w")
4043 (match_operand:VMD 3 "s_register_operand"
4044 "<scalar_mul_constraint>")
4045 (match_operand:SI 4 "immediate_operand" "i")]
4049 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4052 (if_then_else (match_test "<Is_float_mode>")
4053 (const_string "neon_fp_mla_s_scalar<q>")
4054 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4057 (define_insn "neon_vmla_lane<mode>"
4058 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4059 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4060 (match_operand:VMQ 2 "s_register_operand" "w")
4061 (match_operand:<V_HALF> 3 "s_register_operand"
4062 "<scalar_mul_constraint>")
4063 (match_operand:SI 4 "immediate_operand" "i")]
4067 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4070 (if_then_else (match_test "<Is_float_mode>")
4071 (const_string "neon_fp_mla_s_scalar<q>")
4072 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4075 (define_insn "neon_vmlal<sup>_lane<mode>"
4076 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4077 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4078 (match_operand:VMDI 2 "s_register_operand" "w")
4079 (match_operand:VMDI 3 "s_register_operand"
4080 "<scalar_mul_constraint>")
4081 (match_operand:SI 4 "immediate_operand" "i")]
4085 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4087 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4090 (define_insn "neon_vqdmlal_lane<mode>"
4091 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4092 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4093 (match_operand:VMDI 2 "s_register_operand" "w")
4094 (match_operand:VMDI 3 "s_register_operand"
4095 "<scalar_mul_constraint>")
4096 (match_operand:SI 4 "immediate_operand" "i")]
4097 UNSPEC_VQDMLAL_LANE))]
4100 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4102 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4105 (define_insn "neon_vmls_lane<mode>"
4106 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4107 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4108 (match_operand:VMD 2 "s_register_operand" "w")
4109 (match_operand:VMD 3 "s_register_operand"
4110 "<scalar_mul_constraint>")
4111 (match_operand:SI 4 "immediate_operand" "i")]
4115 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4118 (if_then_else (match_test "<Is_float_mode>")
4119 (const_string "neon_fp_mla_s_scalar<q>")
4120 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4123 (define_insn "neon_vmls_lane<mode>"
4124 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4125 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4126 (match_operand:VMQ 2 "s_register_operand" "w")
4127 (match_operand:<V_HALF> 3 "s_register_operand"
4128 "<scalar_mul_constraint>")
4129 (match_operand:SI 4 "immediate_operand" "i")]
4133 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4136 (if_then_else (match_test "<Is_float_mode>")
4137 (const_string "neon_fp_mla_s_scalar<q>")
4138 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4141 (define_insn "neon_vmlsl<sup>_lane<mode>"
4142 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4143 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4144 (match_operand:VMDI 2 "s_register_operand" "w")
4145 (match_operand:VMDI 3 "s_register_operand"
4146 "<scalar_mul_constraint>")
4147 (match_operand:SI 4 "immediate_operand" "i")]
4151 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4153 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4156 (define_insn "neon_vqdmlsl_lane<mode>"
4157 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4158 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4159 (match_operand:VMDI 2 "s_register_operand" "w")
4160 (match_operand:VMDI 3 "s_register_operand"
4161 "<scalar_mul_constraint>")
4162 (match_operand:SI 4 "immediate_operand" "i")]
4163 UNSPEC_VQDMLSL_LANE))]
4166 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4168 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4171 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4172 ; core register into a temp register, then use a scalar taken from that. This
4173 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4174 ; or extracted from another vector. The latter case it's currently better to
4175 ; use the "_lane" variant, and the former case can probably be implemented
4176 ; using vld1_lane, but that hasn't been done yet.
4178 (define_expand "neon_vmul_n<mode>"
4179 [(match_operand:VMD 0 "s_register_operand")
4180 (match_operand:VMD 1 "s_register_operand")
4181 (match_operand:<V_elem> 2 "s_register_operand")]
4184 rtx tmp = gen_reg_rtx (<MODE>mode);
4185 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4186 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4191 (define_expand "neon_vmul_n<mode>"
4192 [(match_operand:VMQ 0 "s_register_operand")
4193 (match_operand:VMQ 1 "s_register_operand")
4194 (match_operand:<V_elem> 2 "s_register_operand")]
4197 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4198 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4199 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4204 (define_expand "neon_vmul_n<mode>"
4205 [(match_operand:VH 0 "s_register_operand")
4206 (match_operand:VH 1 "s_register_operand")
4207 (match_operand:<V_elem> 2 "s_register_operand")]
4208 "TARGET_NEON_FP16INST"
4210 rtx tmp = gen_reg_rtx (V4HFmode);
4211 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4212 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4217 (define_expand "neon_vmulls_n<mode>"
4218 [(match_operand:<V_widen> 0 "s_register_operand")
4219 (match_operand:VMDI 1 "s_register_operand")
4220 (match_operand:<V_elem> 2 "s_register_operand")]
4223 rtx tmp = gen_reg_rtx (<MODE>mode);
4224 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4225 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4230 (define_expand "neon_vmullu_n<mode>"
4231 [(match_operand:<V_widen> 0 "s_register_operand")
4232 (match_operand:VMDI 1 "s_register_operand")
4233 (match_operand:<V_elem> 2 "s_register_operand")]
4236 rtx tmp = gen_reg_rtx (<MODE>mode);
4237 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4238 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4243 (define_expand "neon_vqdmull_n<mode>"
4244 [(match_operand:<V_widen> 0 "s_register_operand")
4245 (match_operand:VMDI 1 "s_register_operand")
4246 (match_operand:<V_elem> 2 "s_register_operand")]
4249 rtx tmp = gen_reg_rtx (<MODE>mode);
4250 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4251 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4256 (define_expand "neon_vqdmulh_n<mode>"
4257 [(match_operand:VMDI 0 "s_register_operand")
4258 (match_operand:VMDI 1 "s_register_operand")
4259 (match_operand:<V_elem> 2 "s_register_operand")]
4262 rtx tmp = gen_reg_rtx (<MODE>mode);
4263 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4264 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4269 (define_expand "neon_vqrdmulh_n<mode>"
4270 [(match_operand:VMDI 0 "s_register_operand")
4271 (match_operand:VMDI 1 "s_register_operand")
4272 (match_operand:<V_elem> 2 "s_register_operand")]
4275 rtx tmp = gen_reg_rtx (<MODE>mode);
4276 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4277 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4282 (define_expand "neon_vqdmulh_n<mode>"
4283 [(match_operand:VMQI 0 "s_register_operand")
4284 (match_operand:VMQI 1 "s_register_operand")
4285 (match_operand:<V_elem> 2 "s_register_operand")]
4288 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4289 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4290 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4295 (define_expand "neon_vqrdmulh_n<mode>"
4296 [(match_operand:VMQI 0 "s_register_operand")
4297 (match_operand:VMQI 1 "s_register_operand")
4298 (match_operand:<V_elem> 2 "s_register_operand")]
4301 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4302 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4303 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4308 (define_expand "neon_vmla_n<mode>"
4309 [(match_operand:VMD 0 "s_register_operand")
4310 (match_operand:VMD 1 "s_register_operand")
4311 (match_operand:VMD 2 "s_register_operand")
4312 (match_operand:<V_elem> 3 "s_register_operand")]
4315 rtx tmp = gen_reg_rtx (<MODE>mode);
4316 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4317 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4322 (define_expand "neon_vmla_n<mode>"
4323 [(match_operand:VMQ 0 "s_register_operand")
4324 (match_operand:VMQ 1 "s_register_operand")
4325 (match_operand:VMQ 2 "s_register_operand")
4326 (match_operand:<V_elem> 3 "s_register_operand")]
4329 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4330 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4331 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4336 (define_expand "neon_vmlals_n<mode>"
4337 [(match_operand:<V_widen> 0 "s_register_operand")
4338 (match_operand:<V_widen> 1 "s_register_operand")
4339 (match_operand:VMDI 2 "s_register_operand")
4340 (match_operand:<V_elem> 3 "s_register_operand")]
4343 rtx tmp = gen_reg_rtx (<MODE>mode);
4344 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4345 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4350 (define_expand "neon_vmlalu_n<mode>"
4351 [(match_operand:<V_widen> 0 "s_register_operand")
4352 (match_operand:<V_widen> 1 "s_register_operand")
4353 (match_operand:VMDI 2 "s_register_operand")
4354 (match_operand:<V_elem> 3 "s_register_operand")]
4357 rtx tmp = gen_reg_rtx (<MODE>mode);
4358 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4359 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4364 (define_expand "neon_vqdmlal_n<mode>"
4365 [(match_operand:<V_widen> 0 "s_register_operand")
4366 (match_operand:<V_widen> 1 "s_register_operand")
4367 (match_operand:VMDI 2 "s_register_operand")
4368 (match_operand:<V_elem> 3 "s_register_operand")]
4371 rtx tmp = gen_reg_rtx (<MODE>mode);
4372 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4373 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4378 (define_expand "neon_vmls_n<mode>"
4379 [(match_operand:VMD 0 "s_register_operand")
4380 (match_operand:VMD 1 "s_register_operand")
4381 (match_operand:VMD 2 "s_register_operand")
4382 (match_operand:<V_elem> 3 "s_register_operand")]
4385 rtx tmp = gen_reg_rtx (<MODE>mode);
4386 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4387 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4392 (define_expand "neon_vmls_n<mode>"
4393 [(match_operand:VMQ 0 "s_register_operand")
4394 (match_operand:VMQ 1 "s_register_operand")
4395 (match_operand:VMQ 2 "s_register_operand")
4396 (match_operand:<V_elem> 3 "s_register_operand")]
4399 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4400 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4401 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4406 (define_expand "neon_vmlsls_n<mode>"
4407 [(match_operand:<V_widen> 0 "s_register_operand")
4408 (match_operand:<V_widen> 1 "s_register_operand")
4409 (match_operand:VMDI 2 "s_register_operand")
4410 (match_operand:<V_elem> 3 "s_register_operand")]
4413 rtx tmp = gen_reg_rtx (<MODE>mode);
4414 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4415 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4420 (define_expand "neon_vmlslu_n<mode>"
4421 [(match_operand:<V_widen> 0 "s_register_operand")
4422 (match_operand:<V_widen> 1 "s_register_operand")
4423 (match_operand:VMDI 2 "s_register_operand")
4424 (match_operand:<V_elem> 3 "s_register_operand")]
4427 rtx tmp = gen_reg_rtx (<MODE>mode);
4428 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4429 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4434 (define_expand "neon_vqdmlsl_n<mode>"
4435 [(match_operand:<V_widen> 0 "s_register_operand")
4436 (match_operand:<V_widen> 1 "s_register_operand")
4437 (match_operand:VMDI 2 "s_register_operand")
4438 (match_operand:<V_elem> 3 "s_register_operand")]
4441 rtx tmp = gen_reg_rtx (<MODE>mode);
4442 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4443 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4448 (define_insn "@neon_vext<mode>"
4449 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4450 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4451 (match_operand:VDQX 2 "s_register_operand" "w")
4452 (match_operand:SI 3 "immediate_operand" "i")]
4456 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4457 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4459 [(set_attr "type" "neon_ext<q>")]
4462 (define_insn "@neon_vrev64<mode>"
4463 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4464 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4467 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4468 [(set_attr "type" "neon_rev<q>")]
4471 (define_insn "@neon_vrev32<mode>"
4472 [(set (match_operand:VX 0 "s_register_operand" "=w")
4473 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4476 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4477 [(set_attr "type" "neon_rev<q>")]
4480 (define_insn "@neon_vrev16<mode>"
4481 [(set (match_operand:VE 0 "s_register_operand" "=w")
4482 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4485 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4486 [(set_attr "type" "neon_rev<q>")]
4489 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4490 ; allocation. For an intrinsic of form:
4491 ; rD = vbsl_* (rS, rN, rM)
4492 ; We can use any of:
4493 ; vbsl rS, rN, rM (if D = S)
4494 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4495 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4497 (define_insn "neon_vbsl<mode>_internal"
4498 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4499 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4500 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4501 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4505 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4506 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4507 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4508 [(set_attr "type" "neon_bsl<q>")]
4511 (define_expand "@neon_vbsl<mode>"
4512 [(set (match_operand:VDQX 0 "s_register_operand")
4513 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4514 (match_operand:VDQX 2 "s_register_operand")
4515 (match_operand:VDQX 3 "s_register_operand")]
4519 /* We can't alias operands together if they have different modes. */
4520 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4524 (define_insn "neon_v<shift_op><sup><mode>"
4525 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4526 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4527 (match_operand:VDQIX 2 "s_register_operand" "w")]
4530 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4531 [(set_attr "type" "neon_shift_imm<q>")]
4535 (define_insn "neon_v<shift_op><sup><mode>"
4536 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4537 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4538 (match_operand:VDQIX 2 "s_register_operand" "w")]
4541 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4542 [(set_attr "type" "neon_sat_shift_imm<q>")]
4546 (define_insn "neon_v<shift_op><sup>_n<mode>"
4547 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4548 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4549 (match_operand:SI 2 "immediate_operand" "i")]
4553 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4554 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4556 [(set_attr "type" "neon_shift_imm<q>")]
4559 ;; vshrn_n, vrshrn_n
4560 (define_insn "neon_v<shift_op>_n<mode>"
4561 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4562 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4563 (match_operand:SI 2 "immediate_operand" "i")]
4567 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4568 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4570 [(set_attr "type" "neon_shift_imm_narrow_q")]
4573 ;; vqshrn_n, vqrshrn_n
4574 (define_insn "neon_v<shift_op><sup>_n<mode>"
4575 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4576 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4577 (match_operand:SI 2 "immediate_operand" "i")]
4581 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4582 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4584 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4587 ;; vqshrun_n, vqrshrun_n
4588 (define_insn "neon_v<shift_op>_n<mode>"
4589 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4590 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4591 (match_operand:SI 2 "immediate_operand" "i")]
4595 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4596 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4598 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4601 (define_insn "neon_vshl_n<mode>"
4602 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4603 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4604 (match_operand:SI 2 "immediate_operand" "i")]
4608 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4609 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4611 [(set_attr "type" "neon_shift_imm<q>")]
4614 (define_insn "neon_vqshl_<sup>_n<mode>"
4615 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4616 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4617 (match_operand:SI 2 "immediate_operand" "i")]
4621 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4622 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4624 [(set_attr "type" "neon_sat_shift_imm<q>")]
4627 (define_insn "neon_vqshlu_n<mode>"
4628 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4629 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4630 (match_operand:SI 2 "immediate_operand" "i")]
4634 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4635 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4637 [(set_attr "type" "neon_sat_shift_imm<q>")]
4640 (define_insn "neon_vshll<sup>_n<mode>"
4641 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4642 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4643 (match_operand:SI 2 "immediate_operand" "i")]
4647 /* The boundaries are: 0 < imm <= size. */
4648 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4649 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4651 [(set_attr "type" "neon_shift_imm_long")]
4655 (define_insn "neon_v<shift_op><sup>_n<mode>"
4656 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4657 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4658 (match_operand:VDQIX 2 "s_register_operand" "w")
4659 (match_operand:SI 3 "immediate_operand" "i")]
4663 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4664 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4666 [(set_attr "type" "neon_shift_acc<q>")]
4669 (define_insn "neon_vsri_n<mode>"
4670 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4671 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4672 (match_operand:VDQIX 2 "s_register_operand" "w")
4673 (match_operand:SI 3 "immediate_operand" "i")]
4677 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4678 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4680 [(set_attr "type" "neon_shift_reg<q>")]
4683 (define_insn "neon_vsli_n<mode>"
4684 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4685 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4686 (match_operand:VDQIX 2 "s_register_operand" "w")
4687 (match_operand:SI 3 "immediate_operand" "i")]
4691 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4692 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4694 [(set_attr "type" "neon_shift_reg<q>")]
4697 (define_insn "neon_vtbl1v8qi"
4698 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4699 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4700 (match_operand:V8QI 2 "s_register_operand" "w")]
4703 "vtbl.8\t%P0, {%P1}, %P2"
4704 [(set_attr "type" "neon_tbl1")]
4707 (define_insn "neon_vtbl2v8qi"
4708 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4709 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4710 (match_operand:V8QI 2 "s_register_operand" "w")]
4715 int tabbase = REGNO (operands[1]);
4717 ops[0] = operands[0];
4718 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4719 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4720 ops[3] = operands[2];
4721 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4725 [(set_attr "type" "neon_tbl2")]
4728 (define_insn "neon_vtbl3v8qi"
4729 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4730 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4731 (match_operand:V8QI 2 "s_register_operand" "w")]
4736 int tabbase = REGNO (operands[1]);
4738 ops[0] = operands[0];
4739 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4740 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4741 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4742 ops[4] = operands[2];
4743 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4747 [(set_attr "type" "neon_tbl3")]
4750 (define_insn "neon_vtbl4v8qi"
4751 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4752 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4753 (match_operand:V8QI 2 "s_register_operand" "w")]
4758 int tabbase = REGNO (operands[1]);
4760 ops[0] = operands[0];
4761 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4762 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4763 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4764 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4765 ops[5] = operands[2];
4766 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4770 [(set_attr "type" "neon_tbl4")]
4773 ;; These three are used by the vec_perm infrastructure for V16QImode.
4774 (define_insn_and_split "neon_vtbl1v16qi"
4775 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4776 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4777 (match_operand:V16QI 2 "s_register_operand" "w")]
4781 "&& reload_completed"
4784 rtx op0, op1, op2, part0, part2;
4788 op1 = gen_lowpart (TImode, operands[1]);
4791 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4792 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4793 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4794 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4796 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4797 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4798 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4799 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4802 [(set_attr "type" "multiple")]
4805 (define_insn_and_split "neon_vtbl2v16qi"
4806 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4807 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4808 (match_operand:V16QI 2 "s_register_operand" "w")]
4812 "&& reload_completed"
4815 rtx op0, op1, op2, part0, part2;
4822 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4823 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4824 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4825 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4827 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4828 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4829 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4830 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4833 [(set_attr "type" "multiple")]
4836 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4837 ;; handle quad-word input modes, producing octa-word output modes. But
4838 ;; that requires us to add support for octa-word vector modes in moves.
4839 ;; That seems overkill for this one use in vec_perm.
4840 (define_insn_and_split "neon_vcombinev16qi"
4841 [(set (match_operand:OI 0 "s_register_operand" "=w")
4842 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4843 (match_operand:V16QI 2 "s_register_operand" "w")]
4847 "&& reload_completed"
4850 neon_split_vcombine (operands);
4853 [(set_attr "type" "multiple")]
4856 (define_insn "neon_vtbx1v8qi"
4857 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4858 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4859 (match_operand:V8QI 2 "s_register_operand" "w")
4860 (match_operand:V8QI 3 "s_register_operand" "w")]
4863 "vtbx.8\t%P0, {%P2}, %P3"
4864 [(set_attr "type" "neon_tbl1")]
4867 (define_insn "neon_vtbx2v8qi"
4868 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4869 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4870 (match_operand:TI 2 "s_register_operand" "w")
4871 (match_operand:V8QI 3 "s_register_operand" "w")]
4876 int tabbase = REGNO (operands[2]);
4878 ops[0] = operands[0];
4879 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4880 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4881 ops[3] = operands[3];
4882 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4886 [(set_attr "type" "neon_tbl2")]
4889 (define_insn "neon_vtbx3v8qi"
4890 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4891 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4892 (match_operand:EI 2 "s_register_operand" "w")
4893 (match_operand:V8QI 3 "s_register_operand" "w")]
4898 int tabbase = REGNO (operands[2]);
4900 ops[0] = operands[0];
4901 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4902 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4903 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4904 ops[4] = operands[3];
4905 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4909 [(set_attr "type" "neon_tbl3")]
4912 (define_insn "neon_vtbx4v8qi"
4913 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4914 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4915 (match_operand:OI 2 "s_register_operand" "w")
4916 (match_operand:V8QI 3 "s_register_operand" "w")]
4921 int tabbase = REGNO (operands[2]);
4923 ops[0] = operands[0];
4924 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4925 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4926 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4927 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4928 ops[5] = operands[3];
4929 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4933 [(set_attr "type" "neon_tbl4")]
4936 (define_expand "@neon_vtrn<mode>_internal"
4938 [(set (match_operand:VDQWH 0 "s_register_operand")
4939 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4940 (match_operand:VDQWH 2 "s_register_operand")]
4942 (set (match_operand:VDQWH 3 "s_register_operand")
4943 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4948 ;; Note: Different operand numbering to handle tied registers correctly.
4949 (define_insn "*neon_vtrn<mode>_insn"
4950 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4951 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4952 (match_operand:VDQWH 3 "s_register_operand" "2")]
4954 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4955 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4958 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4959 [(set_attr "type" "neon_permute<q>")]
4962 (define_expand "@neon_vzip<mode>_internal"
4964 [(set (match_operand:VDQWH 0 "s_register_operand")
4965 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4966 (match_operand:VDQWH 2 "s_register_operand")]
4968 (set (match_operand:VDQWH 3 "s_register_operand")
4969 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4974 ;; Note: Different operand numbering to handle tied registers correctly.
4975 (define_insn "*neon_vzip<mode>_insn"
4976 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4977 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4978 (match_operand:VDQWH 3 "s_register_operand" "2")]
4980 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4981 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4984 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4985 [(set_attr "type" "neon_zip<q>")]
4988 (define_expand "@neon_vuzp<mode>_internal"
4990 [(set (match_operand:VDQWH 0 "s_register_operand")
4991 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4992 (match_operand:VDQWH 2 "s_register_operand")]
4994 (set (match_operand:VDQWH 3 "s_register_operand")
4995 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5000 ;; Note: Different operand numbering to handle tied registers correctly.
5001 (define_insn "*neon_vuzp<mode>_insn"
5002 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5003 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5004 (match_operand:VDQWH 3 "s_register_operand" "2")]
5006 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5007 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5010 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5011 [(set_attr "type" "neon_zip<q>")]
5014 (define_expand "vec_load_lanes<mode><mode>"
5015 [(set (match_operand:VDQX 0 "s_register_operand")
5016 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5020 (define_insn "neon_vld1<mode>"
5021 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5022 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5025 "vld1.<V_sz_elem>\t%h0, %A1"
5026 [(set_attr "type" "neon_load1_1reg<q>")]
5029 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5030 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5032 (define_insn "neon_vld1_lane<mode>"
5033 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5034 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5035 (match_operand:VDX 2 "s_register_operand" "0")
5036 (match_operand:SI 3 "immediate_operand" "i")]
5040 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5041 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5042 operands[3] = GEN_INT (lane);
5044 return "vld1.<V_sz_elem>\t%P0, %A1";
5046 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5048 [(set_attr "type" "neon_load1_one_lane<q>")]
5051 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5052 ;; here on big endian targets.
5053 (define_insn "neon_vld1_lane<mode>"
5054 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5055 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5056 (match_operand:VQX 2 "s_register_operand" "0")
5057 (match_operand:SI 3 "immediate_operand" "i")]
5061 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5062 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5063 operands[3] = GEN_INT (lane);
5064 int regno = REGNO (operands[0]);
5065 if (lane >= max / 2)
5069 operands[3] = GEN_INT (lane);
5071 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5073 return "vld1.<V_sz_elem>\t%P0, %A1";
5075 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5077 [(set_attr "type" "neon_load1_one_lane<q>")]
5080 (define_insn "neon_vld1_dup<mode>"
5081 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5082 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5084 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5085 [(set_attr "type" "neon_load1_all_lanes<q>")]
5088 ;; Special case for DImode. Treat it exactly like a simple load.
5089 (define_expand "neon_vld1_dupdi"
5090 [(set (match_operand:DI 0 "s_register_operand")
5091 (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
5097 (define_insn "neon_vld1_dup<mode>"
5098 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5099 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5102 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5104 [(set_attr "type" "neon_load1_all_lanes<q>")]
5107 (define_insn_and_split "neon_vld1_dupv2di"
5108 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5109 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5112 "&& reload_completed"
5115 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5116 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5117 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5120 [(set_attr "length" "8")
5121 (set_attr "type" "neon_load1_all_lanes_q")]
5124 (define_expand "vec_store_lanes<mode><mode>"
5125 [(set (match_operand:VDQX 0 "neon_struct_operand")
5126 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5130 (define_insn "neon_vst1<mode>"
5131 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5132 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5135 "vst1.<V_sz_elem>\t%h1, %A0"
5136 [(set_attr "type" "neon_store1_1reg<q>")])
5138 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5139 ;; here on big endian targets.
5140 (define_insn "neon_vst1_lane<mode>"
5141 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5143 [(match_operand:VDX 1 "s_register_operand" "w")
5144 (match_operand:SI 2 "immediate_operand" "i")]
5148 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5149 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5150 operands[2] = GEN_INT (lane);
5152 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5154 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5156 [(set_attr "type" "neon_store1_one_lane<q>")]
5159 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5160 ;; here on big endian targets.
5161 (define_insn "neon_vst1_lane<mode>"
5162 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5164 [(match_operand:VQX 1 "s_register_operand" "w")
5165 (match_operand:SI 2 "immediate_operand" "i")]
5169 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5170 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5171 int regno = REGNO (operands[1]);
5172 if (lane >= max / 2)
5177 operands[2] = GEN_INT (lane);
5178 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5180 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5182 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5184 [(set_attr "type" "neon_store1_one_lane<q>")]
5187 (define_expand "vec_load_lanesti<mode>"
5188 [(set (match_operand:TI 0 "s_register_operand")
5189 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5190 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5194 (define_insn "neon_vld2<mode>"
5195 [(set (match_operand:TI 0 "s_register_operand" "=w")
5196 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5197 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5201 if (<V_sz_elem> == 64)
5202 return "vld1.64\t%h0, %A1";
5204 return "vld2.<V_sz_elem>\t%h0, %A1";
5207 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5208 (const_string "neon_load1_2reg<q>")
5209 (const_string "neon_load2_2reg<q>")))]
5212 (define_expand "vec_load_lanesoi<mode>"
5213 [(set (match_operand:OI 0 "s_register_operand")
5214 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5215 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5219 (define_insn "neon_vld2<mode>"
5220 [(set (match_operand:OI 0 "s_register_operand" "=w")
5221 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5222 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5225 "vld2.<V_sz_elem>\t%h0, %A1"
5226 [(set_attr "type" "neon_load2_2reg_q")])
5228 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5229 ;; here on big endian targets.
5230 (define_insn "neon_vld2_lane<mode>"
5231 [(set (match_operand:TI 0 "s_register_operand" "=w")
5232 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5233 (match_operand:TI 2 "s_register_operand" "0")
5234 (match_operand:SI 3 "immediate_operand" "i")
5235 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5239 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5240 int regno = REGNO (operands[0]);
5242 ops[0] = gen_rtx_REG (DImode, regno);
5243 ops[1] = gen_rtx_REG (DImode, regno + 2);
5244 ops[2] = operands[1];
5245 ops[3] = GEN_INT (lane);
5246 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5249 [(set_attr "type" "neon_load2_one_lane<q>")]
5252 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5253 ;; here on big endian targets.
5254 (define_insn "neon_vld2_lane<mode>"
5255 [(set (match_operand:OI 0 "s_register_operand" "=w")
5256 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5257 (match_operand:OI 2 "s_register_operand" "0")
5258 (match_operand:SI 3 "immediate_operand" "i")
5259 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5263 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5264 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5265 int regno = REGNO (operands[0]);
5267 if (lane >= max / 2)
5272 ops[0] = gen_rtx_REG (DImode, regno);
5273 ops[1] = gen_rtx_REG (DImode, regno + 4);
5274 ops[2] = operands[1];
5275 ops[3] = GEN_INT (lane);
5276 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5279 [(set_attr "type" "neon_load2_one_lane<q>")]
5282 (define_insn "neon_vld2_dup<mode>"
5283 [(set (match_operand:TI 0 "s_register_operand" "=w")
5284 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5285 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5289 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5290 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5292 return "vld1.<V_sz_elem>\t%h0, %A1";
5295 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5296 (const_string "neon_load2_all_lanes<q>")
5297 (const_string "neon_load1_1reg<q>")))]
5300 (define_insn "neon_vld2_dupv8bf"
5301 [(set (match_operand:OI 0 "s_register_operand" "=w")
5302 (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5303 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5308 int tabbase = REGNO (operands[0]);
5310 ops[4] = operands[1];
5311 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5312 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5313 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5314 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5315 output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
5318 [(set_attr "type" "neon_load2_all_lanes_q")]
5321 (define_expand "vec_store_lanesti<mode>"
5322 [(set (match_operand:TI 0 "neon_struct_operand")
5323 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5324 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5328 (define_insn "neon_vst2<mode>"
5329 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5330 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5331 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5335 if (<V_sz_elem> == 64)
5336 return "vst1.64\t%h1, %A0";
5338 return "vst2.<V_sz_elem>\t%h1, %A0";
5341 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5342 (const_string "neon_store1_2reg<q>")
5343 (const_string "neon_store2_one_lane<q>")))]
5346 (define_expand "vec_store_lanesoi<mode>"
5347 [(set (match_operand:OI 0 "neon_struct_operand")
5348 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5349 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5353 (define_insn "neon_vst2<mode>"
5354 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5355 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5356 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5359 "vst2.<V_sz_elem>\t%h1, %A0"
5360 [(set_attr "type" "neon_store2_4reg<q>")]
5363 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5364 ;; here on big endian targets.
5365 (define_insn "neon_vst2_lane<mode>"
5366 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5367 (unspec:<V_two_elem>
5368 [(match_operand:TI 1 "s_register_operand" "w")
5369 (match_operand:SI 2 "immediate_operand" "i")
5370 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5374 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5375 int regno = REGNO (operands[1]);
5377 ops[0] = operands[0];
5378 ops[1] = gen_rtx_REG (DImode, regno);
5379 ops[2] = gen_rtx_REG (DImode, regno + 2);
5380 ops[3] = GEN_INT (lane);
5381 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5384 [(set_attr "type" "neon_store2_one_lane<q>")]
5387 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5388 ;; here on big endian targets.
5389 (define_insn "neon_vst2_lane<mode>"
5390 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5391 (unspec:<V_two_elem>
5392 [(match_operand:OI 1 "s_register_operand" "w")
5393 (match_operand:SI 2 "immediate_operand" "i")
5394 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5398 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5399 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5400 int regno = REGNO (operands[1]);
5402 if (lane >= max / 2)
5407 ops[0] = operands[0];
5408 ops[1] = gen_rtx_REG (DImode, regno);
5409 ops[2] = gen_rtx_REG (DImode, regno + 4);
5410 ops[3] = GEN_INT (lane);
5411 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5414 [(set_attr "type" "neon_store2_one_lane<q>")]
5417 (define_expand "vec_load_lanesei<mode>"
5418 [(set (match_operand:EI 0 "s_register_operand")
5419 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5420 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5424 (define_insn "neon_vld3<mode>"
5425 [(set (match_operand:EI 0 "s_register_operand" "=w")
5426 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5427 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5431 if (<V_sz_elem> == 64)
5432 return "vld1.64\t%h0, %A1";
5434 return "vld3.<V_sz_elem>\t%h0, %A1";
5437 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5438 (const_string "neon_load1_3reg<q>")
5439 (const_string "neon_load3_3reg<q>")))]
5442 (define_expand "vec_load_lanesci<mode>"
5443 [(match_operand:CI 0 "s_register_operand")
5444 (match_operand:CI 1 "neon_struct_operand")
5445 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5448 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5452 (define_expand "neon_vld3<mode>"
5453 [(match_operand:CI 0 "s_register_operand")
5454 (match_operand:CI 1 "neon_struct_operand")
5455 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5460 mem = adjust_address (operands[1], EImode, 0);
5461 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5462 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5463 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5467 (define_insn "neon_vld3qa<mode>"
5468 [(set (match_operand:CI 0 "s_register_operand" "=w")
5469 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5470 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5474 int regno = REGNO (operands[0]);
5476 ops[0] = gen_rtx_REG (DImode, regno);
5477 ops[1] = gen_rtx_REG (DImode, regno + 4);
5478 ops[2] = gen_rtx_REG (DImode, regno + 8);
5479 ops[3] = operands[1];
5480 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5483 [(set_attr "type" "neon_load3_3reg<q>")]
5486 (define_insn "neon_vld3qb<mode>"
5487 [(set (match_operand:CI 0 "s_register_operand" "=w")
5488 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5489 (match_operand:CI 2 "s_register_operand" "0")
5490 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5494 int regno = REGNO (operands[0]);
5496 ops[0] = gen_rtx_REG (DImode, regno + 2);
5497 ops[1] = gen_rtx_REG (DImode, regno + 6);
5498 ops[2] = gen_rtx_REG (DImode, regno + 10);
5499 ops[3] = operands[1];
5500 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5503 [(set_attr "type" "neon_load3_3reg<q>")]
5506 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5507 ;; here on big endian targets.
5508 (define_insn "neon_vld3_lane<mode>"
5509 [(set (match_operand:EI 0 "s_register_operand" "=w")
5510 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5511 (match_operand:EI 2 "s_register_operand" "0")
5512 (match_operand:SI 3 "immediate_operand" "i")
5513 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5517 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5518 int regno = REGNO (operands[0]);
5520 ops[0] = gen_rtx_REG (DImode, regno);
5521 ops[1] = gen_rtx_REG (DImode, regno + 2);
5522 ops[2] = gen_rtx_REG (DImode, regno + 4);
5523 ops[3] = operands[1];
5524 ops[4] = GEN_INT (lane);
5525 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5529 [(set_attr "type" "neon_load3_one_lane<q>")]
5532 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5533 ;; here on big endian targets.
5534 (define_insn "neon_vld3_lane<mode>"
5535 [(set (match_operand:CI 0 "s_register_operand" "=w")
5536 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5537 (match_operand:CI 2 "s_register_operand" "0")
5538 (match_operand:SI 3 "immediate_operand" "i")
5539 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5543 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5544 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5545 int regno = REGNO (operands[0]);
5547 if (lane >= max / 2)
5552 ops[0] = gen_rtx_REG (DImode, regno);
5553 ops[1] = gen_rtx_REG (DImode, regno + 4);
5554 ops[2] = gen_rtx_REG (DImode, regno + 8);
5555 ops[3] = operands[1];
5556 ops[4] = GEN_INT (lane);
5557 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5561 [(set_attr "type" "neon_load3_one_lane<q>")]
5564 (define_insn "neon_vld3_dup<mode>"
5565 [(set (match_operand:EI 0 "s_register_operand" "=w")
5566 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5567 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5571 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5573 int regno = REGNO (operands[0]);
5575 ops[0] = gen_rtx_REG (DImode, regno);
5576 ops[1] = gen_rtx_REG (DImode, regno + 2);
5577 ops[2] = gen_rtx_REG (DImode, regno + 4);
5578 ops[3] = operands[1];
5579 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5583 return "vld1.<V_sz_elem>\t%h0, %A1";
5586 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5587 (const_string "neon_load3_all_lanes<q>")
5588 (const_string "neon_load1_1reg<q>")))])
5590 (define_insn "neon_vld3_dupv8bf"
5591 [(set (match_operand:CI 0 "s_register_operand" "=w")
5592 (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5593 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5598 int tabbase = REGNO (operands[0]);
5600 ops[3] = operands[1];
5601 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5602 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5603 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5604 output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
5607 [(set_attr "type" "neon_load3_all_lanes_q")]
5610 (define_expand "vec_store_lanesei<mode>"
5611 [(set (match_operand:EI 0 "neon_struct_operand")
5612 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5613 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5617 (define_insn "neon_vst3<mode>"
5618 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5619 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5620 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5624 if (<V_sz_elem> == 64)
5625 return "vst1.64\t%h1, %A0";
5627 return "vst3.<V_sz_elem>\t%h1, %A0";
5630 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5631 (const_string "neon_store1_3reg<q>")
5632 (const_string "neon_store3_one_lane<q>")))])
5634 (define_expand "vec_store_lanesci<mode>"
5635 [(match_operand:CI 0 "neon_struct_operand")
5636 (match_operand:CI 1 "s_register_operand")
5637 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5640 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5644 (define_expand "neon_vst3<mode>"
5645 [(match_operand:CI 0 "neon_struct_operand")
5646 (match_operand:CI 1 "s_register_operand")
5647 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5652 mem = adjust_address (operands[0], EImode, 0);
5653 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5654 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5655 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5659 (define_insn "neon_vst3qa<mode>"
5660 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5661 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5662 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5666 int regno = REGNO (operands[1]);
5668 ops[0] = operands[0];
5669 ops[1] = gen_rtx_REG (DImode, regno);
5670 ops[2] = gen_rtx_REG (DImode, regno + 4);
5671 ops[3] = gen_rtx_REG (DImode, regno + 8);
5672 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5675 [(set_attr "type" "neon_store3_3reg<q>")]
5678 (define_insn "neon_vst3qb<mode>"
5679 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5680 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5681 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5685 int regno = REGNO (operands[1]);
5687 ops[0] = operands[0];
5688 ops[1] = gen_rtx_REG (DImode, regno + 2);
5689 ops[2] = gen_rtx_REG (DImode, regno + 6);
5690 ops[3] = gen_rtx_REG (DImode, regno + 10);
5691 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5694 [(set_attr "type" "neon_store3_3reg<q>")]
5697 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5698 ;; here on big endian targets.
5699 (define_insn "neon_vst3_lane<mode>"
5700 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5701 (unspec:<V_three_elem>
5702 [(match_operand:EI 1 "s_register_operand" "w")
5703 (match_operand:SI 2 "immediate_operand" "i")
5704 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5708 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5709 int regno = REGNO (operands[1]);
5711 ops[0] = operands[0];
5712 ops[1] = gen_rtx_REG (DImode, regno);
5713 ops[2] = gen_rtx_REG (DImode, regno + 2);
5714 ops[3] = gen_rtx_REG (DImode, regno + 4);
5715 ops[4] = GEN_INT (lane);
5716 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5720 [(set_attr "type" "neon_store3_one_lane<q>")]
5723 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5724 ;; here on big endian targets.
5725 (define_insn "neon_vst3_lane<mode>"
5726 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5727 (unspec:<V_three_elem>
5728 [(match_operand:CI 1 "s_register_operand" "w")
5729 (match_operand:SI 2 "immediate_operand" "i")
5730 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5734 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5735 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5736 int regno = REGNO (operands[1]);
5738 if (lane >= max / 2)
5743 ops[0] = operands[0];
5744 ops[1] = gen_rtx_REG (DImode, regno);
5745 ops[2] = gen_rtx_REG (DImode, regno + 4);
5746 ops[3] = gen_rtx_REG (DImode, regno + 8);
5747 ops[4] = GEN_INT (lane);
5748 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5752 [(set_attr "type" "neon_store3_one_lane<q>")]
5755 (define_expand "vec_load_lanesoi<mode>"
5756 [(set (match_operand:OI 0 "s_register_operand")
5757 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5758 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5762 (define_insn "neon_vld4<mode>"
5763 [(set (match_operand:OI 0 "s_register_operand" "=w")
5764 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5765 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5769 if (<V_sz_elem> == 64)
5770 return "vld1.64\t%h0, %A1";
5772 return "vld4.<V_sz_elem>\t%h0, %A1";
5775 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5776 (const_string "neon_load1_4reg<q>")
5777 (const_string "neon_load4_4reg<q>")))]
5780 (define_expand "vec_load_lanesxi<mode>"
5781 [(match_operand:XI 0 "s_register_operand")
5782 (match_operand:XI 1 "neon_struct_operand")
5783 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5786 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5790 (define_expand "neon_vld4<mode>"
5791 [(match_operand:XI 0 "s_register_operand")
5792 (match_operand:XI 1 "neon_struct_operand")
5793 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5798 mem = adjust_address (operands[1], OImode, 0);
5799 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5800 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5801 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5805 (define_insn "neon_vld4qa<mode>"
5806 [(set (match_operand:XI 0 "s_register_operand" "=w")
5807 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5808 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5812 int regno = REGNO (operands[0]);
5814 ops[0] = gen_rtx_REG (DImode, regno);
5815 ops[1] = gen_rtx_REG (DImode, regno + 4);
5816 ops[2] = gen_rtx_REG (DImode, regno + 8);
5817 ops[3] = gen_rtx_REG (DImode, regno + 12);
5818 ops[4] = operands[1];
5819 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5822 [(set_attr "type" "neon_load4_4reg<q>")]
5825 (define_insn "neon_vld4qb<mode>"
5826 [(set (match_operand:XI 0 "s_register_operand" "=w")
5827 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5828 (match_operand:XI 2 "s_register_operand" "0")
5829 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5833 int regno = REGNO (operands[0]);
5835 ops[0] = gen_rtx_REG (DImode, regno + 2);
5836 ops[1] = gen_rtx_REG (DImode, regno + 6);
5837 ops[2] = gen_rtx_REG (DImode, regno + 10);
5838 ops[3] = gen_rtx_REG (DImode, regno + 14);
5839 ops[4] = operands[1];
5840 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5843 [(set_attr "type" "neon_load4_4reg<q>")]
5846 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5847 ;; here on big endian targets.
5848 (define_insn "neon_vld4_lane<mode>"
5849 [(set (match_operand:OI 0 "s_register_operand" "=w")
5850 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5851 (match_operand:OI 2 "s_register_operand" "0")
5852 (match_operand:SI 3 "immediate_operand" "i")
5853 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5857 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5858 int regno = REGNO (operands[0]);
5860 ops[0] = gen_rtx_REG (DImode, regno);
5861 ops[1] = gen_rtx_REG (DImode, regno + 2);
5862 ops[2] = gen_rtx_REG (DImode, regno + 4);
5863 ops[3] = gen_rtx_REG (DImode, regno + 6);
5864 ops[4] = operands[1];
5865 ops[5] = GEN_INT (lane);
5866 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5870 [(set_attr "type" "neon_load4_one_lane<q>")]
5873 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5874 ;; here on big endian targets.
5875 (define_insn "neon_vld4_lane<mode>"
5876 [(set (match_operand:XI 0 "s_register_operand" "=w")
5877 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5878 (match_operand:XI 2 "s_register_operand" "0")
5879 (match_operand:SI 3 "immediate_operand" "i")
5880 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5884 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5885 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5886 int regno = REGNO (operands[0]);
5888 if (lane >= max / 2)
5893 ops[0] = gen_rtx_REG (DImode, regno);
5894 ops[1] = gen_rtx_REG (DImode, regno + 4);
5895 ops[2] = gen_rtx_REG (DImode, regno + 8);
5896 ops[3] = gen_rtx_REG (DImode, regno + 12);
5897 ops[4] = operands[1];
5898 ops[5] = GEN_INT (lane);
5899 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5903 [(set_attr "type" "neon_load4_one_lane<q>")]
5906 (define_insn "neon_vld4_dup<mode>"
5907 [(set (match_operand:OI 0 "s_register_operand" "=w")
5908 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5909 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5913 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5915 int regno = REGNO (operands[0]);
5917 ops[0] = gen_rtx_REG (DImode, regno);
5918 ops[1] = gen_rtx_REG (DImode, regno + 2);
5919 ops[2] = gen_rtx_REG (DImode, regno + 4);
5920 ops[3] = gen_rtx_REG (DImode, regno + 6);
5921 ops[4] = operands[1];
5922 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5927 return "vld1.<V_sz_elem>\t%h0, %A1";
5930 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5931 (const_string "neon_load4_all_lanes<q>")
5932 (const_string "neon_load1_1reg<q>")))]
5935 (define_insn "neon_vld4_dupv8bf"
5936 [(set (match_operand:XI 0 "s_register_operand" "=w")
5937 (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5938 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5943 int tabbase = REGNO (operands[0]);
5945 ops[4] = operands[1];
5946 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5947 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5948 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5949 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5950 output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
5953 [(set_attr "type" "neon_load4_all_lanes_q")]
5956 (define_expand "vec_store_lanesoi<mode>"
5957 [(set (match_operand:OI 0 "neon_struct_operand")
5958 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5959 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5963 (define_insn "neon_vst4<mode>"
5964 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5965 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5966 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5970 if (<V_sz_elem> == 64)
5971 return "vst1.64\t%h1, %A0";
5973 return "vst4.<V_sz_elem>\t%h1, %A0";
5976 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5977 (const_string "neon_store1_4reg<q>")
5978 (const_string "neon_store4_4reg<q>")))]
5981 (define_expand "vec_store_lanesxi<mode>"
5982 [(match_operand:XI 0 "neon_struct_operand")
5983 (match_operand:XI 1 "s_register_operand")
5984 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5987 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5991 (define_expand "neon_vst4<mode>"
5992 [(match_operand:XI 0 "neon_struct_operand")
5993 (match_operand:XI 1 "s_register_operand")
5994 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5999 mem = adjust_address (operands[0], OImode, 0);
6000 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6001 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6002 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6006 (define_insn "neon_vst4qa<mode>"
6007 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6008 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6009 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6013 int regno = REGNO (operands[1]);
6015 ops[0] = operands[0];
6016 ops[1] = gen_rtx_REG (DImode, regno);
6017 ops[2] = gen_rtx_REG (DImode, regno + 4);
6018 ops[3] = gen_rtx_REG (DImode, regno + 8);
6019 ops[4] = gen_rtx_REG (DImode, regno + 12);
6020 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6023 [(set_attr "type" "neon_store4_4reg<q>")]
6026 (define_insn "neon_vst4qb<mode>"
6027 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6028 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6029 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6033 int regno = REGNO (operands[1]);
6035 ops[0] = operands[0];
6036 ops[1] = gen_rtx_REG (DImode, regno + 2);
6037 ops[2] = gen_rtx_REG (DImode, regno + 6);
6038 ops[3] = gen_rtx_REG (DImode, regno + 10);
6039 ops[4] = gen_rtx_REG (DImode, regno + 14);
6040 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6043 [(set_attr "type" "neon_store4_4reg<q>")]
6046 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6047 ;; here on big endian targets.
6048 (define_insn "neon_vst4_lane<mode>"
6049 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6050 (unspec:<V_four_elem>
6051 [(match_operand:OI 1 "s_register_operand" "w")
6052 (match_operand:SI 2 "immediate_operand" "i")
6053 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6057 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6058 int regno = REGNO (operands[1]);
6060 ops[0] = operands[0];
6061 ops[1] = gen_rtx_REG (DImode, regno);
6062 ops[2] = gen_rtx_REG (DImode, regno + 2);
6063 ops[3] = gen_rtx_REG (DImode, regno + 4);
6064 ops[4] = gen_rtx_REG (DImode, regno + 6);
6065 ops[5] = GEN_INT (lane);
6066 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6070 [(set_attr "type" "neon_store4_one_lane<q>")]
6073 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6074 ;; here on big endian targets.
6075 (define_insn "neon_vst4_lane<mode>"
6076 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6077 (unspec:<V_four_elem>
6078 [(match_operand:XI 1 "s_register_operand" "w")
6079 (match_operand:SI 2 "immediate_operand" "i")
6080 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6084 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6085 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6086 int regno = REGNO (operands[1]);
6088 if (lane >= max / 2)
6093 ops[0] = operands[0];
6094 ops[1] = gen_rtx_REG (DImode, regno);
6095 ops[2] = gen_rtx_REG (DImode, regno + 4);
6096 ops[3] = gen_rtx_REG (DImode, regno + 8);
6097 ops[4] = gen_rtx_REG (DImode, regno + 12);
6098 ops[5] = GEN_INT (lane);
6099 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6103 [(set_attr "type" "neon_store4_4reg<q>")]
6106 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6107 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6108 (SE:<V_unpack> (vec_select:<V_HALF>
6109 (match_operand:VU 1 "register_operand" "w")
6110 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6111 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6112 "vmovl.<US><V_sz_elem> %q0, %e1"
6113 [(set_attr "type" "neon_shift_imm_long")]
6116 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6117 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6118 (SE:<V_unpack> (vec_select:<V_HALF>
6119 (match_operand:VU 1 "register_operand" "w")
6120 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6121 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6122 "vmovl.<US><V_sz_elem> %q0, %f1"
6123 [(set_attr "type" "neon_shift_imm_long")]
6126 (define_expand "vec_unpack<US>_hi_<mode>"
6127 [(match_operand:<V_unpack> 0 "register_operand")
6128 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6129 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6131 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6134 for (i = 0; i < (<V_mode_nunits>/2); i++)
6135 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6137 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6138 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6145 (define_expand "vec_unpack<US>_lo_<mode>"
6146 [(match_operand:<V_unpack> 0 "register_operand")
6147 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6148 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6150 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6153 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6154 RTVEC_ELT (v, i) = GEN_INT (i);
6155 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6156 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6163 (define_insn "neon_vec_<US>mult_lo_<mode>"
6164 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6165 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6166 (match_operand:VU 1 "register_operand" "w")
6167 (match_operand:VU 2 "vect_par_constant_low" "")))
6168 (SE:<V_unpack> (vec_select:<V_HALF>
6169 (match_operand:VU 3 "register_operand" "w")
6171 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6172 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6173 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6176 (define_expand "vec_widen_<US>mult_lo_<mode>"
6177 [(match_operand:<V_unpack> 0 "register_operand")
6178 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6179 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6180 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6182 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6185 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6186 RTVEC_ELT (v, i) = GEN_INT (i);
6187 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6189 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6197 (define_insn "neon_vec_<US>mult_hi_<mode>"
6198 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6199 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6200 (match_operand:VU 1 "register_operand" "w")
6201 (match_operand:VU 2 "vect_par_constant_high" "")))
6202 (SE:<V_unpack> (vec_select:<V_HALF>
6203 (match_operand:VU 3 "register_operand" "w")
6205 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6206 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6207 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6210 (define_expand "vec_widen_<US>mult_hi_<mode>"
6211 [(match_operand:<V_unpack> 0 "register_operand")
6212 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6213 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6214 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6216 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6219 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6220 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6221 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6223 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6232 (define_insn "neon_vec_<US>shiftl_<mode>"
6233 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6234 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6235 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6238 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6240 [(set_attr "type" "neon_shift_imm_long")]
6243 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6244 [(match_operand:<V_unpack> 0 "register_operand")
6245 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6246 (match_operand:SI 2 "immediate_operand")]
6247 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6249 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6250 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6256 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6257 [(match_operand:<V_unpack> 0 "register_operand")
6258 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6259 (match_operand:SI 2 "immediate_operand")]
6260 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6262 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6263 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6264 GET_MODE_SIZE (<V_HALF>mode)),
6270 ;; Vectorize for non-neon-quad case
6271 (define_insn "neon_unpack<US>_<mode>"
6272 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6273 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6275 "vmovl.<US><V_sz_elem> %q0, %P1"
6276 [(set_attr "type" "neon_move")]
6279 (define_expand "vec_unpack<US>_lo_<mode>"
6280 [(match_operand:<V_double_width> 0 "register_operand")
6281 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6284 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6285 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6286 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6292 (define_expand "vec_unpack<US>_hi_<mode>"
6293 [(match_operand:<V_double_width> 0 "register_operand")
6294 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6297 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6298 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6299 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6305 (define_insn "neon_vec_<US>mult_<mode>"
6306 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6307 (mult:<V_widen> (SE:<V_widen>
6308 (match_operand:VDI 1 "register_operand" "w"))
6310 (match_operand:VDI 2 "register_operand" "w"))))]
6312 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6313 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6316 (define_expand "vec_widen_<US>mult_hi_<mode>"
6317 [(match_operand:<V_double_width> 0 "register_operand")
6318 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6319 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6322 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6323 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6324 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6331 (define_expand "vec_widen_<US>mult_lo_<mode>"
6332 [(match_operand:<V_double_width> 0 "register_operand")
6333 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6334 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6337 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6338 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6339 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6346 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6347 [(match_operand:<V_double_width> 0 "register_operand")
6348 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6349 (match_operand:SI 2 "immediate_operand")]
6352 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6353 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6354 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6360 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6361 [(match_operand:<V_double_width> 0 "register_operand")
6362 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6363 (match_operand:SI 2 "immediate_operand")]
6366 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6367 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6368 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6374 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6375 ; because the ordering of vector elements in Q registers is different from what
6376 ; the semantics of the instructions require.
6378 (define_insn "vec_pack_trunc_<mode>"
6379 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6380 (vec_concat:<V_narrow_pack>
6381 (truncate:<V_narrow>
6382 (match_operand:VN 1 "register_operand" "w"))
6383 (truncate:<V_narrow>
6384 (match_operand:VN 2 "register_operand" "w"))))]
6385 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6386 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6387 [(set_attr "type" "multiple")
6388 (set_attr "length" "8")]
6391 ;; For the non-quad case.
6392 (define_insn "neon_vec_pack_trunc_<mode>"
6393 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6394 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6395 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6396 "vmovn.i<V_sz_elem>\t%P0, %q1"
6397 [(set_attr "type" "neon_move_narrow_q")]
6400 (define_expand "vec_pack_trunc_<mode>"
6401 [(match_operand:<V_narrow_pack> 0 "register_operand")
6402 (match_operand:VSHFT 1 "register_operand")
6403 (match_operand:VSHFT 2 "register_operand")]
6404 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6406 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6408 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6409 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6410 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6414 (define_insn "neon_vabd<mode>_2"
6415 [(set (match_operand:VF 0 "s_register_operand" "=w")
6416 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6417 (match_operand:VF 2 "s_register_operand" "w"))))]
6418 "ARM_HAVE_NEON_<MODE>_ARITH"
6419 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6420 [(set_attr "type" "neon_fp_abd_s<q>")]
6423 (define_insn "neon_vabd<mode>_3"
6424 [(set (match_operand:VF 0 "s_register_operand" "=w")
6425 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6426 (match_operand:VF 2 "s_register_operand" "w")]
6428 "ARM_HAVE_NEON_<MODE>_ARITH"
6429 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6430 [(set_attr "type" "neon_fp_abd_s<q>")]
6433 (define_insn "neon_<sup>mmlav16qi"
6434 [(set (match_operand:V4SI 0 "register_operand" "=w")
6436 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
6437 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
6438 (match_operand:V4SI 1 "register_operand" "0")))]
6440 "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3"
6441 [(set_attr "type" "neon_mla_s_q")]
6444 (define_insn "neon_vbfdot<VCVTF:mode>"
6445 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6446 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6448 (match_operand:<VSF2BF> 2 "register_operand" "w")
6449 (match_operand:<VSF2BF> 3 "register_operand" "w")]
6452 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
6453 [(set_attr "type" "neon_dot<q>")]
6456 (define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>"
6457 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6458 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6460 (match_operand:<VSF2BF> 2 "register_operand" "w")
6461 (match_operand:V4BF 3 "register_operand" "x")
6462 (match_operand:SI 4 "immediate_operand" "i")]
6465 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
6466 [(set_attr "type" "neon_dot<q>")]
6469 (define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>"
6470 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6471 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6473 (match_operand:<VSF2BF> 2 "register_operand" "w")
6474 (match_operand:V8BF 3 "register_operand" "x")
6475 (match_operand:SI 4 "immediate_operand" "i")]
6479 int lane = INTVAL (operands[4]);
6480 int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4;
6482 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
6485 operands[4] = GEN_INT (lane - half);
6486 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
6489 [(set_attr "type" "neon_dot<q>")]
6492 (define_insn "neon_vbfcvtv4sf<VBFCVT:mode>"
6493 [(set (match_operand:VBFCVT 0 "register_operand" "=w")
6494 (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")]
6497 "vcvt.bf16.f32\\t%<V_bf_low>0, %q1"
6498 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6501 (define_insn "neon_vbfcvtv4sf_highv8bf"
6502 [(set (match_operand:V8BF 0 "register_operand" "=w")
6503 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
6504 (match_operand:V4SF 2 "register_operand" "w")]
6505 UNSPEC_BFCVT_HIGH))]
6507 "vcvt.bf16.f32\\t%f0, %q2"
6508 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6511 (define_insn "neon_vbfcvtsf"
6512 [(set (match_operand:BF 0 "register_operand" "=t")
6513 (unspec:BF [(match_operand:SF 1 "register_operand" "t")]
6516 "vcvtb.bf16.f32\\t%0, %1"
6517 [(set_attr "type" "f_cvt")]
6520 (define_insn "neon_vbfcvt<VBFCVT:mode>"
6521 [(set (match_operand:V4SF 0 "register_operand" "=w")
6522 (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")]
6525 "vshll.u32\\t%q0, %<V_bf_low>1, #16"
6526 [(set_attr "type" "neon_shift_imm_q")]
6529 (define_insn "neon_vbfcvt_highv8bf"
6530 [(set (match_operand:V4SF 0 "register_operand" "=w")
6531 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
6532 UNSPEC_BFCVT_HIGH))]
6534 "vshll.u32\\t%q0, %f1, #16"
6535 [(set_attr "type" "neon_shift_imm_q")]
6538 ;; Convert a BF scalar operand to SF via VSHL.
6539 ;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands
6540 ;; would be allocated, therefore the operands must be converted to intermediate
6541 ;; vectors (i.e. V2SI) in order to apply 64-bit registers.
6542 (define_expand "neon_vbfcvtbf"
6543 [(match_operand:SF 0 "register_operand")
6544 (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)]
6547 rtx op0 = gen_reg_rtx (V2SImode);
6548 rtx op1 = gen_reg_rtx (V2SImode);
6549 emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1]));
6550 emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode)));
6551 emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0));
6555 ;; Convert BF mode to V2SI and V2SI to SF.
6556 ;; Implement this by allocating a 32-bit operand in the low half of a 64-bit
6557 ;; register indexed by a 32-bit sub-register number.
6558 ;; This will generate reloads but compiler can optimize out the moves.
6559 ;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable
6560 ;; range so that to avoid extra moves.
6561 (define_insn "neon_vbfcvtbf_cvtmode<mode>"
6562 [(set (match_operand:VBFCVTM 0 "register_operand" "=x")
6563 (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")]
6569 (define_insn "neon_vmmlav8bf"
6570 [(set (match_operand:V4SF 0 "register_operand" "=w")
6571 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
6572 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6573 (match_operand:V8BF 3 "register_operand" "w")]
6576 "vmmla.bf16\\t%q0, %q2, %q3"
6577 [(set_attr "type" "neon_fp_mla_s_q")]
6580 (define_insn "neon_vfma<bt>v8bf"
6581 [(set (match_operand:V4SF 0 "register_operand" "=w")
6582 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6583 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6584 (match_operand:V8BF 3 "register_operand" "w")]
6587 "vfma<bt>.bf16\\t%q0, %q2, %q3"
6588 [(set_attr "type" "neon_fp_mla_s_q")]
6591 (define_insn "neon_vfma<bt>_lanev8bf"
6592 [(set (match_operand:V4SF 0 "register_operand" "=w")
6593 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6594 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6595 (match_operand:V4BF 3 "register_operand" "x")
6596 (match_operand:SI 4 "const_int_operand" "n")]
6599 "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]"
6600 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6603 (define_expand "neon_vfma<bt>_laneqv8bf"
6604 [(set (match_operand:V4SF 0 "register_operand" "=w")
6605 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6606 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6607 (match_operand:V8BF 3 "register_operand" "x")
6608 (match_operand:SI 4 "const_int_operand" "n")]
6612 int lane = INTVAL (operands[4]);
6613 gcc_assert (IN_RANGE(lane, 0, 7));
6616 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4]));
6620 rtx op_highpart = gen_reg_rtx (V4BFmode);
6621 emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3]));
6622 operands[4] = GEN_INT (lane - 4);
6623 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4]));
6627 [(set_attr "type" "neon_fp_mla_s_scalar_q")]