]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/neon.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / arm / neon.md
1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2021 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
26 (define_insn "unaligned_storev8qi"
27 [(set (match_operand:V8QI 0 "memory_operand" "=Un")
28 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")]
29 UNSPEC_UNALIGNED_STORE))]
30 "TARGET_NEON"
31 "*
32 return output_move_neon (operands);
33 "
34 [(set_attr "type" "neon_store1_1reg")])
35
36 (define_insn "*neon_mov<mode>"
37 [(set (match_operand:VDXMOV 0 "nonimmediate_operand"
38 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r")
39 (match_operand:VDXMOV 1 "general_operand"
40 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
41 "TARGET_NEON
42 && (register_operand (operands[0], <MODE>mode)
43 || register_operand (operands[1], <MODE>mode))"
44 {
45 if (which_alternative == 2 || which_alternative == 3)
46 {
47 int width, is_valid;
48 static char templ[40];
49
50 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
51 &operands[1], &width);
52
53 gcc_assert (is_valid != 0);
54
55 if (width == 0)
56 return "vmov.f32\t%P0, %1 @ <mode>";
57 else
58 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
59
60 return templ;
61 }
62
63 switch (which_alternative)
64 {
65 case 0: return "vmov\t%P0, %P1 @ <mode>";
66 case 1: case 4: return output_move_neon (operands);
67 case 2: case 3: gcc_unreachable ();
68 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>";
69 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>";
70 case 9: return "#";
71 default: return output_move_double (operands, true, NULL);
72 }
73 }
74 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
75 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
76 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
77 multiple")
78 (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
79 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*")
80 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*")
81 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
82
83 (define_insn "*neon_mov<mode>"
84 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
85 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us")
86 (match_operand:VQXMOV 1 "general_operand"
87 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
88 "TARGET_NEON
89 && (register_operand (operands[0], <MODE>mode)
90 || register_operand (operands[1], <MODE>mode))"
91 {
92 if (which_alternative == 2 || which_alternative == 3)
93 {
94 int width, is_valid;
95 static char templ[40];
96
97 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
98 &operands[1], &width);
99
100 gcc_assert (is_valid != 0);
101
102 if (width == 0)
103 return "vmov.f32\t%q0, %1 @ <mode>";
104 else
105 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
106
107 return templ;
108 }
109
110 switch (which_alternative)
111 {
112 case 0: return "vmov\t%q0, %q1 @ <mode>";
113 case 1: case 4: return output_move_neon (operands);
114 case 2: case 3: gcc_unreachable ();
115 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
116 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
117 default: return output_move_quad (operands);
118 }
119 }
120 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
121 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
122 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
123 (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
124 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
125 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
126 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
127
128 /* We define these mov expanders to match the standard mov$a optab to prevent
129 the mid-end from trying to do a subreg for these modes which is the most
130 inefficient way to expand the move. Also big-endian subreg's aren't
131 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
132 Without these RTL generation patterns the mid-end would attempt to take a
133 sub-reg and may ICE if it can't. */
134
135 (define_expand "movti"
136 [(set (match_operand:TI 0 "nonimmediate_operand")
137 (match_operand:TI 1 "general_operand"))]
138 "TARGET_NEON"
139 {
140 gcc_checking_assert (aligned_operand (operands[0], TImode));
141 gcc_checking_assert (aligned_operand (operands[1], TImode));
142 if (can_create_pseudo_p ())
143 {
144 if (!REG_P (operands[0]))
145 operands[1] = force_reg (TImode, operands[1]);
146 }
147 })
148
149 (define_expand "mov<mode>"
150 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
151 (match_operand:VSTRUCT 1 "general_operand"))]
152 "TARGET_NEON || TARGET_HAVE_MVE"
153 {
154 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
155 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
156 if (can_create_pseudo_p ())
157 {
158 if (!REG_P (operands[0]))
159 operands[1] = force_reg (<MODE>mode, operands[1]);
160 }
161 })
162
163 ;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into
164 ;; two groups. The pattern movv8hf is common for MVE and NEON, so it is moved
165 ;; into vec-common.md file. Remaining mov expand patterns with half float and
166 ;; bfloats are implemented below.
167 (define_expand "mov<mode>"
168 [(set (match_operand:VHFBF_split 0 "s_register_operand")
169 (match_operand:VHFBF_split 1 "s_register_operand"))]
170 "TARGET_NEON"
171 {
172 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
173 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
174 if (can_create_pseudo_p ())
175 {
176 if (!REG_P (operands[0]))
177 operands[1] = force_reg (<MODE>mode, operands[1]);
178 }
179 })
180
181 (define_insn "*neon_mov<mode>"
182 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
183 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
184 "(TARGET_NEON || TARGET_HAVE_MVE)
185 && (register_operand (operands[0], <MODE>mode)
186 || register_operand (operands[1], <MODE>mode))"
187 {
188 switch (which_alternative)
189 {
190 case 0: return "#";
191 case 1: case 2: return output_move_neon (operands);
192 default: gcc_unreachable ();
193 }
194 }
195 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
196 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
197
198 (define_split
199 [(set (match_operand:EI 0 "s_register_operand" "")
200 (match_operand:EI 1 "s_register_operand" ""))]
201 "TARGET_NEON && reload_completed"
202 [(set (match_dup 0) (match_dup 1))
203 (set (match_dup 2) (match_dup 3))]
204 {
205 int rdest = REGNO (operands[0]);
206 int rsrc = REGNO (operands[1]);
207 rtx dest[2], src[2];
208
209 dest[0] = gen_rtx_REG (TImode, rdest);
210 src[0] = gen_rtx_REG (TImode, rsrc);
211 dest[1] = gen_rtx_REG (DImode, rdest + 4);
212 src[1] = gen_rtx_REG (DImode, rsrc + 4);
213
214 neon_disambiguate_copy (operands, dest, src, 2);
215 })
216
217 (define_split
218 [(set (match_operand:OI 0 "s_register_operand" "")
219 (match_operand:OI 1 "s_register_operand" ""))]
220 "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))]
223 {
224 int rdest = REGNO (operands[0]);
225 int rsrc = REGNO (operands[1]);
226 rtx dest[2], src[2];
227
228 dest[0] = gen_rtx_REG (TImode, rdest);
229 src[0] = gen_rtx_REG (TImode, rsrc);
230 dest[1] = gen_rtx_REG (TImode, rdest + 4);
231 src[1] = gen_rtx_REG (TImode, rsrc + 4);
232
233 neon_disambiguate_copy (operands, dest, src, 2);
234 })
235
236 (define_split
237 [(set (match_operand:CI 0 "s_register_operand" "")
238 (match_operand:CI 1 "s_register_operand" ""))]
239 "TARGET_NEON && reload_completed"
240 [(set (match_dup 0) (match_dup 1))
241 (set (match_dup 2) (match_dup 3))
242 (set (match_dup 4) (match_dup 5))]
243 {
244 int rdest = REGNO (operands[0]);
245 int rsrc = REGNO (operands[1]);
246 rtx dest[3], src[3];
247
248 dest[0] = gen_rtx_REG (TImode, rdest);
249 src[0] = gen_rtx_REG (TImode, rsrc);
250 dest[1] = gen_rtx_REG (TImode, rdest + 4);
251 src[1] = gen_rtx_REG (TImode, rsrc + 4);
252 dest[2] = gen_rtx_REG (TImode, rdest + 8);
253 src[2] = gen_rtx_REG (TImode, rsrc + 8);
254
255 neon_disambiguate_copy (operands, dest, src, 3);
256 })
257
258 (define_split
259 [(set (match_operand:XI 0 "s_register_operand" "")
260 (match_operand:XI 1 "s_register_operand" ""))]
261 "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed"
262 [(set (match_dup 0) (match_dup 1))
263 (set (match_dup 2) (match_dup 3))
264 (set (match_dup 4) (match_dup 5))
265 (set (match_dup 6) (match_dup 7))]
266 {
267 int rdest = REGNO (operands[0]);
268 int rsrc = REGNO (operands[1]);
269 rtx dest[4], src[4];
270
271 dest[0] = gen_rtx_REG (TImode, rdest);
272 src[0] = gen_rtx_REG (TImode, rsrc);
273 dest[1] = gen_rtx_REG (TImode, rdest + 4);
274 src[1] = gen_rtx_REG (TImode, rsrc + 4);
275 dest[2] = gen_rtx_REG (TImode, rdest + 8);
276 src[2] = gen_rtx_REG (TImode, rsrc + 8);
277 dest[3] = gen_rtx_REG (TImode, rdest + 12);
278 src[3] = gen_rtx_REG (TImode, rsrc + 12);
279
280 neon_disambiguate_copy (operands, dest, src, 4);
281 })
282
283 (define_expand "movmisalign<mode>"
284 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
285 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
286 UNSPEC_MISALIGNED_ACCESS))]
287 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
288 {
289 rtx adjust_mem;
290 /* This pattern is not permitted to fail during expansion: if both arguments
291 are non-registers (e.g. memory := constant, which can be created by the
292 auto-vectorizer), force operand 1 into a register. */
293 if (!s_register_operand (operands[0], <MODE>mode)
294 && !s_register_operand (operands[1], <MODE>mode))
295 operands[1] = force_reg (<MODE>mode, operands[1]);
296
297 if (s_register_operand (operands[0], <MODE>mode))
298 adjust_mem = operands[1];
299 else
300 adjust_mem = operands[0];
301
302 /* Legitimize address. */
303 if (!neon_vector_mem_operand (adjust_mem, 2, true))
304 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
305
306 })
307
308 (define_insn "*movmisalign<mode>_neon_store"
309 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
310 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
311 UNSPEC_MISALIGNED_ACCESS))]
312 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
313 "vst1.<V_sz_elem>\t{%P1}, %A0"
314 [(set_attr "type" "neon_store1_1reg<q>")])
315
316 (define_insn "*movmisalign<mode>_neon_load"
317 [(set (match_operand:VDX 0 "s_register_operand" "=w")
318 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
319 " Um")]
320 UNSPEC_MISALIGNED_ACCESS))]
321 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
322 "vld1.<V_sz_elem>\t{%P0}, %A1"
323 [(set_attr "type" "neon_load1_1reg<q>")])
324
325 (define_insn "*movmisalign<mode>_neon_store"
326 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
327 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
328 UNSPEC_MISALIGNED_ACCESS))]
329 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
330 "vst1.<V_sz_elem>\t{%q1}, %A0"
331 [(set_attr "type" "neon_store1_1reg<q>")])
332
333 (define_insn "*movmisalign<mode>_neon_load"
334 [(set (match_operand:VQX 0 "s_register_operand" "=w")
335 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
336 " Um")]
337 UNSPEC_MISALIGNED_ACCESS))]
338 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
339 "vld1.<V_sz_elem>\t{%q0}, %A1"
340 [(set_attr "type" "neon_load1_1reg<q>")])
341
342 (define_insn "@vec_set<mode>_internal"
343 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
344 (vec_merge:VD_LANE
345 (vec_duplicate:VD_LANE
346 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
347 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
348 (match_operand:SI 2 "immediate_operand" "i,i")))]
349 "TARGET_NEON"
350 {
351 int elt = ffs ((int) INTVAL (operands[2])) - 1;
352 if (BYTES_BIG_ENDIAN)
353 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
354 operands[2] = GEN_INT (elt);
355
356 if (which_alternative == 0)
357 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
358 else
359 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
360 }
361 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
362
363 (define_insn "@vec_set<mode>_internal"
364 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
365 (vec_merge:VQ2
366 (vec_duplicate:VQ2
367 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
368 (match_operand:VQ2 3 "s_register_operand" "0,0")
369 (match_operand:SI 2 "immediate_operand" "i,i")))]
370 "TARGET_NEON"
371 {
372 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
373 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
374 int elt = elem % half_elts;
375 int hi = (elem / half_elts) * 2;
376 int regno = REGNO (operands[0]);
377
378 if (BYTES_BIG_ENDIAN)
379 elt = half_elts - 1 - elt;
380
381 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
382 operands[2] = GEN_INT (elt);
383
384 if (which_alternative == 0)
385 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
386 else
387 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
388 }
389 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
390 )
391
392 (define_insn "@vec_set<mode>_internal"
393 [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
394 (vec_merge:V2DI_ONLY
395 (vec_duplicate:V2DI_ONLY
396 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
397 (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
398 (match_operand:SI 2 "immediate_operand" "i,i")))]
399 "TARGET_NEON"
400 {
401 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
402 int regno = REGNO (operands[0]) + 2 * elem;
403
404 operands[0] = gen_rtx_REG (DImode, regno);
405
406 if (which_alternative == 0)
407 return "vld1.64\t%P0, %A1";
408 else
409 return "vmov\t%P0, %Q1, %R1";
410 }
411 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
412 )
413
414 (define_insn "vec_extract<mode><V_elem_l>"
415 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
416 (vec_select:<V_elem>
417 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
418 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
419 "TARGET_NEON"
420 {
421 if (BYTES_BIG_ENDIAN)
422 {
423 int elt = INTVAL (operands[2]);
424 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
425 operands[2] = GEN_INT (elt);
426 }
427
428 if (which_alternative == 0)
429 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
430 else
431 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
432 }
433 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
434 )
435
436 ;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to
437 ;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called
438 ;; by define_expand in vec-common.md file.
439 (define_insn "neon_vec_extract<mode><V_elem_l>"
440 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
441 (vec_select:<V_elem>
442 (match_operand:VQ2 1 "s_register_operand" "w,w")
443 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
444 "TARGET_NEON"
445 {
446 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
447 int elt = INTVAL (operands[2]) % half_elts;
448 int hi = (INTVAL (operands[2]) / half_elts) * 2;
449 int regno = REGNO (operands[1]);
450
451 if (BYTES_BIG_ENDIAN)
452 elt = half_elts - 1 - elt;
453
454 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
455 operands[2] = GEN_INT (elt);
456
457 if (which_alternative == 0)
458 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
459 else
460 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
461 }
462 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463 )
464
465 ;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi"
466 ;; and this pattern is called by define_expand in vec-common.md file.
467 (define_insn "neon_vec_extractv2didi"
468 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
469 (vec_select:DI
470 (match_operand:V2DI 1 "s_register_operand" "w,w")
471 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
472 "TARGET_NEON"
473 {
474 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
475
476 operands[1] = gen_rtx_REG (DImode, regno);
477
478 if (which_alternative == 0)
479 return "vst1.64\t{%P1}, %A0 @ v2di";
480 else
481 return "vmov\t%Q0, %R0, %P1 @ v2di";
482 }
483 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
484 )
485
486 (define_expand "vec_init<mode><V_elem_l>"
487 [(match_operand:VDQ 0 "s_register_operand")
488 (match_operand 1 "" "")]
489 "TARGET_NEON || TARGET_HAVE_MVE"
490 {
491 neon_expand_vector_init (operands[0], operands[1]);
492 DONE;
493 })
494
495 ;; Doubleword and quadword arithmetic.
496
497 ;; NOTE: some other instructions also support 64-bit integer
498 ;; element size, which we could potentially use for "long long" operations.
499
500 (define_insn "*add<mode>3_neon"
501 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
502 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
503 (match_operand:VDQ 2 "s_register_operand" "w")))]
504 "ARM_HAVE_NEON_<MODE>_ARITH"
505 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
506 [(set (attr "type")
507 (if_then_else (match_test "<Is_float_mode>")
508 (const_string "neon_fp_addsub_s<q>")
509 (const_string "neon_add<q>")))]
510 )
511
512 (define_insn "*sub<mode>3_neon"
513 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
514 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
515 (match_operand:VDQ 2 "s_register_operand" "w")))]
516 "ARM_HAVE_NEON_<MODE>_ARITH"
517 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
518 [(set (attr "type")
519 (if_then_else (match_test "<Is_float_mode>")
520 (const_string "neon_fp_addsub_s<q>")
521 (const_string "neon_sub<q>")))]
522 )
523
524 (define_insn "*mul<mode>3_neon"
525 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
526 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
527 (match_operand:VDQW 2 "s_register_operand" "w")))]
528 "ARM_HAVE_NEON_<MODE>_ARITH"
529 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
530 [(set (attr "type")
531 (if_then_else (match_test "<Is_float_mode>")
532 (const_string "neon_fp_mul_s<q>")
533 (const_string "neon_mul_<V_elem_ch><q>")))]
534 )
535
536 /* Perform division using multiply-by-reciprocal.
537 Reciprocal is calculated using Newton-Raphson method.
538 Enabled with -funsafe-math-optimizations -freciprocal-math
539 and disabled for -Os since it increases code size . */
540
541 (define_expand "div<mode>3"
542 [(set (match_operand:VCVTF 0 "s_register_operand")
543 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
544 (match_operand:VCVTF 2 "s_register_operand")))]
545 "TARGET_NEON && !optimize_size
546 && flag_reciprocal_math"
547 {
548 rtx rec = gen_reg_rtx (<MODE>mode);
549 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
550
551 /* Reciprocal estimate. */
552 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
553
554 /* Perform 2 iterations of newton-raphson method. */
555 for (int i = 0; i < 2; i++)
556 {
557 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
558 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
559 }
560
561 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
562 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
563 DONE;
564 }
565 )
566
567
568 (define_insn "mul<mode>3add<mode>_neon"
569 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
570 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
571 (match_operand:VDQW 3 "s_register_operand" "w"))
572 (match_operand:VDQW 1 "s_register_operand" "0")))]
573 "ARM_HAVE_NEON_<MODE>_ARITH"
574 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
575 [(set (attr "type")
576 (if_then_else (match_test "<Is_float_mode>")
577 (const_string "neon_fp_mla_s<q>")
578 (const_string "neon_mla_<V_elem_ch><q>")))]
579 )
580
581 (define_insn "mul<mode>3add<mode>_neon"
582 [(set (match_operand:VH 0 "s_register_operand" "=w")
583 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
584 (match_operand:VH 3 "s_register_operand" "w"))
585 (match_operand:VH 1 "s_register_operand" "0")))]
586 "ARM_HAVE_NEON_<MODE>_ARITH"
587 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
588 [(set_attr "type" "neon_fp_mla_s<q>")]
589 )
590
591 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
592 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
593 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
594 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
595 (match_operand:VDQW 3 "s_register_operand" "w"))))]
596 "ARM_HAVE_NEON_<MODE>_ARITH"
597 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
598 [(set (attr "type")
599 (if_then_else (match_test "<Is_float_mode>")
600 (const_string "neon_fp_mla_s<q>")
601 (const_string "neon_mla_<V_elem_ch><q>")))]
602 )
603
604 ;; Fused multiply-accumulate
605 ;; We define each insn twice here:
606 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
607 ;; to be able to use when converting to FMA.
608 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
609 (define_insn "fma<VCVTF:mode>4"
610 [(set (match_operand:VCVTF 0 "register_operand" "=w")
611 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
612 (match_operand:VCVTF 2 "register_operand" "w")
613 (match_operand:VCVTF 3 "register_operand" "0")))]
614 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
615 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
616 [(set_attr "type" "neon_fp_mla_s<q>")]
617 )
618
619 (define_insn "fma<VCVTF:mode>4_intrinsic"
620 [(set (match_operand:VCVTF 0 "register_operand" "=w")
621 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
622 (match_operand:VCVTF 2 "register_operand" "w")
623 (match_operand:VCVTF 3 "register_operand" "0")))]
624 "TARGET_NEON && TARGET_FMA"
625 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
626 [(set_attr "type" "neon_fp_mla_s<q>")]
627 )
628
629 (define_insn "fma<VH:mode>4"
630 [(set (match_operand:VH 0 "register_operand" "=w")
631 (fma:VH
632 (match_operand:VH 1 "register_operand" "w")
633 (match_operand:VH 2 "register_operand" "w")
634 (match_operand:VH 3 "register_operand" "0")))]
635 "ARM_HAVE_NEON_<MODE>_ARITH"
636 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
637 [(set_attr "type" "neon_fp_mla_s<q>")]
638 )
639
640 (define_insn "*fmsub<VCVTF:mode>4"
641 [(set (match_operand:VCVTF 0 "register_operand" "=w")
642 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
643 (match_operand:VCVTF 2 "register_operand" "w")
644 (match_operand:VCVTF 3 "register_operand" "0")))]
645 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
646 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
647 [(set_attr "type" "neon_fp_mla_s<q>")]
648 )
649
650 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
651 [(set (match_operand:VCVTF 0 "register_operand" "=w")
652 (fma:VCVTF
653 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
654 (match_operand:VCVTF 2 "register_operand" "w")
655 (match_operand:VCVTF 3 "register_operand" "0")))]
656 "TARGET_NEON && TARGET_FMA"
657 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
658 [(set_attr "type" "neon_fp_mla_s<q>")]
659 )
660
661 (define_insn "fmsub<VH:mode>4_intrinsic"
662 [(set (match_operand:VH 0 "register_operand" "=w")
663 (fma:VH
664 (neg:VH (match_operand:VH 1 "register_operand" "w"))
665 (match_operand:VH 2 "register_operand" "w")
666 (match_operand:VH 3 "register_operand" "0")))]
667 "TARGET_NEON_FP16INST"
668 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
669 [(set_attr "type" "neon_fp_mla_s<q>")]
670 )
671
672 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
673 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
674 (unspec:VCVTF [(match_operand:VCVTF 1
675 "s_register_operand" "w")]
676 NEON_VRINT))]
677 "TARGET_NEON && TARGET_VFP5"
678 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
679 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
680 )
681
682 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
683 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
684 (FIXUORS:<V_cmp_result> (unspec:VCVTF
685 [(match_operand:VCVTF 1 "register_operand" "w")]
686 NEON_VCVT)))]
687 "TARGET_NEON && TARGET_VFP5"
688 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
689 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
690 (set_attr "predicable" "no")]
691 )
692
693 (define_insn "ior<mode>3_neon"
694 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
695 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
696 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
697 "TARGET_NEON"
698 {
699 switch (which_alternative)
700 {
701 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
702 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
703 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
704 default: gcc_unreachable ();
705 }
706 }
707 [(set_attr "type" "neon_logic<q>")]
708 )
709
710 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
711 ;; vorr. We support the pseudo-instruction vand instead, because that
712 ;; corresponds to the canonical form the middle-end expects to use for
713 ;; immediate bitwise-ANDs.
714
715 (define_insn "and<mode>3_neon"
716 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
717 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
718 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
719 "TARGET_NEON"
720 {
721 switch (which_alternative)
722 {
723 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
724 case 1: return neon_output_logic_immediate ("vand", &operands[2],
725 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
726 default: gcc_unreachable ();
727 }
728 }
729 [(set_attr "type" "neon_logic<q>")]
730 )
731
732 (define_insn "orn<mode>3_neon"
733 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
734 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
735 (match_operand:VDQ 1 "s_register_operand" "w")))]
736 "TARGET_NEON"
737 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
738 [(set_attr "type" "neon_logic<q>")]
739 )
740
741 (define_insn "bic<mode>3_neon"
742 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
743 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
744 (match_operand:VDQ 1 "s_register_operand" "w")))]
745 "TARGET_NEON"
746 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
747 [(set_attr "type" "neon_logic<q>")]
748 )
749
750 (define_insn "xor<mode>3_neon"
751 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
752 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
753 (match_operand:VDQ 2 "s_register_operand" "w")))]
754 "TARGET_NEON"
755 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
756 [(set_attr "type" "neon_logic<q>")]
757 )
758
759 (define_insn "one_cmpl<mode>2_neon"
760 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
761 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
762 "TARGET_NEON"
763 "vmvn\t%<V_reg>0, %<V_reg>1"
764 [(set_attr "type" "neon_move<q>")]
765 )
766
767 (define_insn "abs<mode>2"
768 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
769 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
770 "TARGET_NEON"
771 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
772 [(set (attr "type")
773 (if_then_else (match_test "<Is_float_mode>")
774 (const_string "neon_fp_abs_s<q>")
775 (const_string "neon_abs<q>")))]
776 )
777
778 (define_insn "neon_neg<mode>2"
779 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
780 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
781 "TARGET_NEON"
782 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
783 [(set (attr "type")
784 (if_then_else (match_test "<Is_float_mode>")
785 (const_string "neon_fp_neg_s<q>")
786 (const_string "neon_neg<q>")))]
787 )
788
789 (define_insn "neon_<absneg_str><mode>2"
790 [(set (match_operand:VH 0 "s_register_operand" "=w")
791 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
792 "TARGET_NEON_FP16INST"
793 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
794 [(set_attr "type" "neon_abs<q>")]
795 )
796
797 (define_expand "neon_v<absneg_str><mode>"
798 [(set
799 (match_operand:VH 0 "s_register_operand")
800 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
801 "TARGET_NEON_FP16INST"
802 {
803 emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1]));
804 DONE;
805 })
806
807 (define_insn "neon_v<fp16_rnd_str><mode>"
808 [(set (match_operand:VH 0 "s_register_operand" "=w")
809 (unspec:VH
810 [(match_operand:VH 1 "s_register_operand" "w")]
811 FP16_RND))]
812 "TARGET_NEON_FP16INST"
813 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
814 [(set_attr "type" "neon_fp_round_s<q>")]
815 )
816
817 (define_insn "neon_vrsqrte<mode>"
818 [(set (match_operand:VH 0 "s_register_operand" "=w")
819 (unspec:VH
820 [(match_operand:VH 1 "s_register_operand" "w")]
821 UNSPEC_VRSQRTE))]
822 "TARGET_NEON_FP16INST"
823 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
824 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
825 )
826
827 (define_insn "*umin<mode>3_neon"
828 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
829 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
830 (match_operand:VDQIW 2 "s_register_operand" "w")))]
831 "TARGET_NEON"
832 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
833 [(set_attr "type" "neon_minmax<q>")]
834 )
835
836 (define_insn "*umax<mode>3_neon"
837 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
838 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
839 (match_operand:VDQIW 2 "s_register_operand" "w")))]
840 "TARGET_NEON"
841 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
842 [(set_attr "type" "neon_minmax<q>")]
843 )
844
845 (define_insn "*smin<mode>3_neon"
846 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
847 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
848 (match_operand:VDQW 2 "s_register_operand" "w")))]
849 "TARGET_NEON"
850 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
851 [(set (attr "type")
852 (if_then_else (match_test "<Is_float_mode>")
853 (const_string "neon_fp_minmax_s<q>")
854 (const_string "neon_minmax<q>")))]
855 )
856
857 (define_insn "*smax<mode>3_neon"
858 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
859 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
860 (match_operand:VDQW 2 "s_register_operand" "w")))]
861 "TARGET_NEON"
862 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
863 [(set (attr "type")
864 (if_then_else (match_test "<Is_float_mode>")
865 (const_string "neon_fp_minmax_s<q>")
866 (const_string "neon_minmax<q>")))]
867 )
868
869 ; TODO: V2DI shifts are current disabled because there are bugs in the
870 ; generic vectorizer code. It ends up creating a V2DI constructor with
871 ; SImode elements.
872
873 (define_insn "vashl<mode>3"
874 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
875 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
876 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
877 "TARGET_NEON"
878 {
879 switch (which_alternative)
880 {
881 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
882 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
883 <MODE>mode,
884 VALID_NEON_QREG_MODE (<MODE>mode),
885 true);
886 default: gcc_unreachable ();
887 }
888 }
889 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
890 )
891
892 (define_insn "vashr<mode>3_imm"
893 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
894 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
895 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
896 "TARGET_NEON"
897 {
898 return neon_output_shift_immediate ("vshr", 's', &operands[2],
899 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
900 false);
901 }
902 [(set_attr "type" "neon_shift_imm<q>")]
903 )
904
905 (define_insn "vlshr<mode>3_imm"
906 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
907 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
908 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
909 "TARGET_NEON"
910 {
911 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
912 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
913 false);
914 }
915 [(set_attr "type" "neon_shift_imm<q>")]
916 )
917
918 ; Used for implementing logical shift-right, which is a left-shift by a negative
919 ; amount, with signed operands. This is essentially the same as ashl<mode>3
920 ; above, but using an unspec in case GCC tries anything tricky with negative
921 ; shift amounts.
922
923 (define_insn "ashl<mode>3_signed"
924 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
925 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
926 (match_operand:VDQI 2 "s_register_operand" "w")]
927 UNSPEC_ASHIFT_SIGNED))]
928 "TARGET_NEON"
929 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
930 [(set_attr "type" "neon_shift_reg<q>")]
931 )
932
933 ; Used for implementing logical shift-right, which is a left-shift by a negative
934 ; amount, with unsigned operands.
935
936 (define_insn "ashl<mode>3_unsigned"
937 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
938 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
939 (match_operand:VDQI 2 "s_register_operand" "w")]
940 UNSPEC_ASHIFT_UNSIGNED))]
941 "TARGET_NEON"
942 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
943 [(set_attr "type" "neon_shift_reg<q>")]
944 )
945
946 (define_expand "vashr<mode>3"
947 [(set (match_operand:VDQIW 0 "s_register_operand")
948 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
949 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
950 "TARGET_NEON"
951 {
952 if (s_register_operand (operands[2], <MODE>mode))
953 {
954 rtx neg = gen_reg_rtx (<MODE>mode);
955 emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
956 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
957 }
958 else
959 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
960 DONE;
961 })
962
963 (define_expand "vlshr<mode>3"
964 [(set (match_operand:VDQIW 0 "s_register_operand")
965 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
966 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
967 "TARGET_NEON"
968 {
969 if (s_register_operand (operands[2], <MODE>mode))
970 {
971 rtx neg = gen_reg_rtx (<MODE>mode);
972 emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
973 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
974 }
975 else
976 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
977 DONE;
978 })
979
980 ;; 64-bit shifts
981
982 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
983 ;; leaving the upper half uninitalized. This is OK since the shift
984 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
985 ;; data flow analysis however, we pretend the full register is set
986 ;; using an unspec.
987 (define_insn "neon_load_count"
988 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
989 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
990 UNSPEC_LOAD_COUNT))]
991 "TARGET_NEON"
992 "@
993 vld1.32\t{%P0[0]}, %A1
994 vmov.32\t%P0[0], %1"
995 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
996 )
997
998 ;; Widening operations
999
1000 (define_expand "widen_ssum<mode>3"
1001 [(set (match_operand:<V_double_width> 0 "s_register_operand")
1002 (plus:<V_double_width>
1003 (sign_extend:<V_double_width>
1004 (match_operand:VQI 1 "s_register_operand"))
1005 (match_operand:<V_double_width> 2 "s_register_operand")))]
1006 "TARGET_NEON"
1007 {
1008 machine_mode mode = GET_MODE (operands[1]);
1009 rtx p1, p2;
1010
1011 p1 = arm_simd_vect_par_cnst_half (mode, false);
1012 p2 = arm_simd_vect_par_cnst_half (mode, true);
1013
1014 if (operands[0] != operands[2])
1015 emit_move_insn (operands[0], operands[2]);
1016
1017 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1018 operands[1],
1019 p1,
1020 operands[0]));
1021 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1022 operands[1],
1023 p2,
1024 operands[0]));
1025 DONE;
1026 }
1027 )
1028
1029 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1030 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1031 (plus:<V_double_width>
1032 (sign_extend:<V_double_width>
1033 (vec_select:<V_HALF>
1034 (match_operand:VQI 1 "s_register_operand" "%w")
1035 (match_operand:VQI 2 "vect_par_constant_low" "")))
1036 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1037 "TARGET_NEON"
1038 {
1039 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1040 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1041 }
1042 [(set_attr "type" "neon_add_widen")])
1043
1044 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1045 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1046 (plus:<V_double_width>
1047 (sign_extend:<V_double_width>
1048 (vec_select:<V_HALF>
1049 (match_operand:VQI 1 "s_register_operand" "%w")
1050 (match_operand:VQI 2 "vect_par_constant_high" "")))
1051 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1052 "TARGET_NEON"
1053 {
1054 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1055 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1056 }
1057 [(set_attr "type" "neon_add_widen")])
1058
1059 (define_insn "widen_ssum<mode>3"
1060 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1061 (plus:<V_widen>
1062 (sign_extend:<V_widen>
1063 (match_operand:VW 1 "s_register_operand" "%w"))
1064 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1065 "TARGET_NEON"
1066 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1067 [(set_attr "type" "neon_add_widen")]
1068 )
1069
1070 (define_expand "widen_usum<mode>3"
1071 [(set (match_operand:<V_double_width> 0 "s_register_operand")
1072 (plus:<V_double_width>
1073 (zero_extend:<V_double_width>
1074 (match_operand:VQI 1 "s_register_operand"))
1075 (match_operand:<V_double_width> 2 "s_register_operand")))]
1076 "TARGET_NEON"
1077 {
1078 machine_mode mode = GET_MODE (operands[1]);
1079 rtx p1, p2;
1080
1081 p1 = arm_simd_vect_par_cnst_half (mode, false);
1082 p2 = arm_simd_vect_par_cnst_half (mode, true);
1083
1084 if (operands[0] != operands[2])
1085 emit_move_insn (operands[0], operands[2]);
1086
1087 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1088 operands[1],
1089 p1,
1090 operands[0]));
1091 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1092 operands[1],
1093 p2,
1094 operands[0]));
1095 DONE;
1096 }
1097 )
1098
1099 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1100 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1101 (plus:<V_double_width>
1102 (zero_extend:<V_double_width>
1103 (vec_select:<V_HALF>
1104 (match_operand:VQI 1 "s_register_operand" "%w")
1105 (match_operand:VQI 2 "vect_par_constant_low" "")))
1106 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1107 "TARGET_NEON"
1108 {
1109 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1110 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1111 }
1112 [(set_attr "type" "neon_add_widen")])
1113
1114 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1115 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1116 (plus:<V_double_width>
1117 (zero_extend:<V_double_width>
1118 (vec_select:<V_HALF>
1119 (match_operand:VQI 1 "s_register_operand" "%w")
1120 (match_operand:VQI 2 "vect_par_constant_high" "")))
1121 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1122 "TARGET_NEON"
1123 {
1124 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1125 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1126 }
1127 [(set_attr "type" "neon_add_widen")])
1128
1129 (define_insn "widen_usum<mode>3"
1130 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1131 (plus:<V_widen> (zero_extend:<V_widen>
1132 (match_operand:VW 1 "s_register_operand" "%w"))
1133 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1134 "TARGET_NEON"
1135 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1136 [(set_attr "type" "neon_add_widen")]
1137 )
1138
1139 ;; Helpers for quad-word reduction operations
1140
1141 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1142 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1143 ; N/2-element vector.
1144
1145 (define_insn "quad_halves_<code>v4si"
1146 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1147 (VQH_OPS:V2SI
1148 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1149 (parallel [(const_int 0) (const_int 1)]))
1150 (vec_select:V2SI (match_dup 1)
1151 (parallel [(const_int 2) (const_int 3)]))))]
1152 "TARGET_NEON"
1153 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1154 [(set_attr "vqh_mnem" "<VQH_mnem>")
1155 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1156 )
1157
1158 (define_insn "quad_halves_<code>v4sf"
1159 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1160 (VQHS_OPS:V2SF
1161 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1162 (parallel [(const_int 0) (const_int 1)]))
1163 (vec_select:V2SF (match_dup 1)
1164 (parallel [(const_int 2) (const_int 3)]))))]
1165 "ARM_HAVE_NEON_V4SF_ARITH"
1166 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1167 [(set_attr "vqh_mnem" "<VQH_mnem>")
1168 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1169 )
1170
1171 (define_insn "quad_halves_<code>v8hi"
1172 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1173 (VQH_OPS:V4HI
1174 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1175 (parallel [(const_int 0) (const_int 1)
1176 (const_int 2) (const_int 3)]))
1177 (vec_select:V4HI (match_dup 1)
1178 (parallel [(const_int 4) (const_int 5)
1179 (const_int 6) (const_int 7)]))))]
1180 "TARGET_NEON"
1181 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1182 [(set_attr "vqh_mnem" "<VQH_mnem>")
1183 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1184 )
1185
1186 (define_insn "quad_halves_<code>v16qi"
1187 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1188 (VQH_OPS:V8QI
1189 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1190 (parallel [(const_int 0) (const_int 1)
1191 (const_int 2) (const_int 3)
1192 (const_int 4) (const_int 5)
1193 (const_int 6) (const_int 7)]))
1194 (vec_select:V8QI (match_dup 1)
1195 (parallel [(const_int 8) (const_int 9)
1196 (const_int 10) (const_int 11)
1197 (const_int 12) (const_int 13)
1198 (const_int 14) (const_int 15)]))))]
1199 "TARGET_NEON"
1200 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1201 [(set_attr "vqh_mnem" "<VQH_mnem>")
1202 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1203 )
1204
1205 (define_expand "move_hi_quad_<mode>"
1206 [(match_operand:ANY128 0 "s_register_operand")
1207 (match_operand:<V_HALF> 1 "s_register_operand")]
1208 "TARGET_NEON"
1209 {
1210 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1211 GET_MODE_SIZE (<V_HALF>mode)),
1212 operands[1]);
1213 DONE;
1214 })
1215
1216 (define_expand "move_lo_quad_<mode>"
1217 [(match_operand:ANY128 0 "s_register_operand")
1218 (match_operand:<V_HALF> 1 "s_register_operand")]
1219 "TARGET_NEON"
1220 {
1221 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1222 <MODE>mode, 0),
1223 operands[1]);
1224 DONE;
1225 })
1226
1227 ;; Reduction operations
1228
1229 (define_expand "reduc_plus_scal_<mode>"
1230 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1231 (match_operand:VD 1 "s_register_operand")]
1232 "ARM_HAVE_NEON_<MODE>_ARITH"
1233 {
1234 rtx vec = gen_reg_rtx (<MODE>mode);
1235 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1236 &gen_neon_vpadd_internal<mode>);
1237 /* The same result is actually computed into every element. */
1238 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1239 DONE;
1240 })
1241
1242 (define_expand "reduc_plus_scal_<mode>"
1243 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1244 (match_operand:VQ 1 "s_register_operand")]
1245 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1246 {
1247 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1248
1249 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1250 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1251
1252 DONE;
1253 })
1254
1255 (define_expand "reduc_plus_scal_v2di"
1256 [(match_operand:DI 0 "nonimmediate_operand")
1257 (match_operand:V2DI 1 "s_register_operand")]
1258 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1259 {
1260 rtx vec = gen_reg_rtx (V2DImode);
1261
1262 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1263 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1264
1265 DONE;
1266 })
1267
1268 (define_insn "arm_reduc_plus_internal_v2di"
1269 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1270 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1271 UNSPEC_VPADD))]
1272 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1273 "vadd.i64\t%e0, %e1, %f1"
1274 [(set_attr "type" "neon_add_q")]
1275 )
1276
1277 (define_expand "reduc_smin_scal_<mode>"
1278 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1279 (match_operand:VD 1 "s_register_operand")]
1280 "ARM_HAVE_NEON_<MODE>_ARITH"
1281 {
1282 rtx vec = gen_reg_rtx (<MODE>mode);
1283
1284 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1285 &gen_neon_vpsmin<mode>);
1286 /* The result is computed into every element of the vector. */
1287 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1288 DONE;
1289 })
1290
1291 (define_expand "reduc_smin_scal_<mode>"
1292 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1293 (match_operand:VQ 1 "s_register_operand")]
1294 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1295 {
1296 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1297
1298 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1299 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1300
1301 DONE;
1302 })
1303
1304 (define_expand "reduc_smax_scal_<mode>"
1305 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1306 (match_operand:VD 1 "s_register_operand")]
1307 "ARM_HAVE_NEON_<MODE>_ARITH"
1308 {
1309 rtx vec = gen_reg_rtx (<MODE>mode);
1310 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1311 &gen_neon_vpsmax<mode>);
1312 /* The result is computed into every element of the vector. */
1313 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1314 DONE;
1315 })
1316
1317 (define_expand "reduc_smax_scal_<mode>"
1318 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1319 (match_operand:VQ 1 "s_register_operand")]
1320 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
1321 {
1322 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1323
1324 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1325 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1326
1327 DONE;
1328 })
1329
1330 (define_expand "reduc_umin_scal_<mode>"
1331 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1332 (match_operand:VDI 1 "s_register_operand")]
1333 "TARGET_NEON"
1334 {
1335 rtx vec = gen_reg_rtx (<MODE>mode);
1336 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1337 &gen_neon_vpumin<mode>);
1338 /* The result is computed into every element of the vector. */
1339 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1340 DONE;
1341 })
1342
1343 (define_expand "reduc_umin_scal_<mode>"
1344 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1345 (match_operand:VQI 1 "s_register_operand")]
1346 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1347 {
1348 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1349
1350 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1351 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1352
1353 DONE;
1354 })
1355
1356 (define_expand "reduc_umax_scal_<mode>"
1357 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1358 (match_operand:VDI 1 "s_register_operand")]
1359 "TARGET_NEON"
1360 {
1361 rtx vec = gen_reg_rtx (<MODE>mode);
1362 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1363 &gen_neon_vpumax<mode>);
1364 /* The result is computed into every element of the vector. */
1365 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1366 DONE;
1367 })
1368
1369 (define_expand "reduc_umax_scal_<mode>"
1370 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1371 (match_operand:VQI 1 "s_register_operand")]
1372 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1373 {
1374 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1375
1376 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1377 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1378
1379 DONE;
1380 })
1381
1382 (define_insn "neon_vpadd_internal<mode>"
1383 [(set (match_operand:VD 0 "s_register_operand" "=w")
1384 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1385 (match_operand:VD 2 "s_register_operand" "w")]
1386 UNSPEC_VPADD))]
1387 "TARGET_NEON"
1388 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1389 ;; Assume this schedules like vadd.
1390 [(set (attr "type")
1391 (if_then_else (match_test "<Is_float_mode>")
1392 (const_string "neon_fp_reduc_add_s<q>")
1393 (const_string "neon_reduc_add<q>")))]
1394 )
1395
1396 (define_insn "neon_vpaddv4hf"
1397 [(set
1398 (match_operand:V4HF 0 "s_register_operand" "=w")
1399 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1400 (match_operand:V4HF 2 "s_register_operand" "w")]
1401 UNSPEC_VPADD))]
1402 "TARGET_NEON_FP16INST"
1403 "vpadd.f16\t%P0, %P1, %P2"
1404 [(set_attr "type" "neon_reduc_add")]
1405 )
1406
1407 (define_insn "neon_vpsmin<mode>"
1408 [(set (match_operand:VD 0 "s_register_operand" "=w")
1409 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1410 (match_operand:VD 2 "s_register_operand" "w")]
1411 UNSPEC_VPSMIN))]
1412 "TARGET_NEON"
1413 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1414 [(set (attr "type")
1415 (if_then_else (match_test "<Is_float_mode>")
1416 (const_string "neon_fp_reduc_minmax_s<q>")
1417 (const_string "neon_reduc_minmax<q>")))]
1418 )
1419
1420 (define_insn "neon_vpsmax<mode>"
1421 [(set (match_operand:VD 0 "s_register_operand" "=w")
1422 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1423 (match_operand:VD 2 "s_register_operand" "w")]
1424 UNSPEC_VPSMAX))]
1425 "TARGET_NEON"
1426 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1427 [(set (attr "type")
1428 (if_then_else (match_test "<Is_float_mode>")
1429 (const_string "neon_fp_reduc_minmax_s<q>")
1430 (const_string "neon_reduc_minmax<q>")))]
1431 )
1432
1433 (define_insn "neon_vpumin<mode>"
1434 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1435 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1436 (match_operand:VDI 2 "s_register_operand" "w")]
1437 UNSPEC_VPUMIN))]
1438 "TARGET_NEON"
1439 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1440 [(set_attr "type" "neon_reduc_minmax<q>")]
1441 )
1442
1443 (define_insn "neon_vpumax<mode>"
1444 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1445 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1446 (match_operand:VDI 2 "s_register_operand" "w")]
1447 UNSPEC_VPUMAX))]
1448 "TARGET_NEON"
1449 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1450 [(set_attr "type" "neon_reduc_minmax<q>")]
1451 )
1452
1453 ;; Saturating arithmetic
1454
1455 ; NOTE: Neon supports many more saturating variants of instructions than the
1456 ; following, but these are all GCC currently understands.
1457 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1458 ; yet either, although these patterns may be used by intrinsics when they're
1459 ; added.
1460
1461 (define_insn "*ss_add<mode>_neon"
1462 [(set (match_operand:VD 0 "s_register_operand" "=w")
1463 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1464 (match_operand:VD 2 "s_register_operand" "w")))]
1465 "TARGET_NEON"
1466 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1467 [(set_attr "type" "neon_qadd<q>")]
1468 )
1469
1470 (define_insn "*us_add<mode>_neon"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")))]
1474 "TARGET_NEON"
1475 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1476 [(set_attr "type" "neon_qadd<q>")]
1477 )
1478
1479 (define_insn "*ss_sub<mode>_neon"
1480 [(set (match_operand:VD 0 "s_register_operand" "=w")
1481 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1482 (match_operand:VD 2 "s_register_operand" "w")))]
1483 "TARGET_NEON"
1484 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1485 [(set_attr "type" "neon_qsub<q>")]
1486 )
1487
1488 (define_insn "*us_sub<mode>_neon"
1489 [(set (match_operand:VD 0 "s_register_operand" "=w")
1490 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1491 (match_operand:VD 2 "s_register_operand" "w")))]
1492 "TARGET_NEON"
1493 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1494 [(set_attr "type" "neon_qsub<q>")]
1495 )
1496
1497 (define_expand "vec_cmp<mode><v_cmp_result>"
1498 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
1499 (match_operator:<V_cmp_result> 1 "comparison_operator"
1500 [(match_operand:VDQW 2 "s_register_operand")
1501 (match_operand:VDQW 3 "reg_or_zero_operand")]))]
1502 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1503 {
1504 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1505 operands[2], operands[3], false);
1506 DONE;
1507 })
1508
1509 (define_expand "vec_cmpu<mode><mode>"
1510 [(set (match_operand:VDQIW 0 "s_register_operand")
1511 (match_operator:VDQIW 1 "comparison_operator"
1512 [(match_operand:VDQIW 2 "s_register_operand")
1513 (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
1514 "TARGET_NEON"
1515 {
1516 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1517 operands[2], operands[3], false);
1518 DONE;
1519 })
1520
1521 ;; Conditional instructions. These are comparisons with conditional moves for
1522 ;; vectors. They perform the assignment:
1523 ;;
1524 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1525 ;;
1526 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1527 ;; element-wise.
1528
1529 (define_expand "vcond<mode><mode>"
1530 [(set (match_operand:VDQW 0 "s_register_operand")
1531 (if_then_else:VDQW
1532 (match_operator 3 "comparison_operator"
1533 [(match_operand:VDQW 4 "s_register_operand")
1534 (match_operand:VDQW 5 "reg_or_zero_operand")])
1535 (match_operand:VDQW 1 "s_register_operand")
1536 (match_operand:VDQW 2 "s_register_operand")))]
1537 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1538 {
1539 arm_expand_vcond (operands, <V_cmp_result>mode);
1540 DONE;
1541 })
1542
1543 (define_expand "vcond<V_cvtto><mode>"
1544 [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
1545 (if_then_else:<V_CVTTO>
1546 (match_operator 3 "comparison_operator"
1547 [(match_operand:V32 4 "s_register_operand")
1548 (match_operand:V32 5 "reg_or_zero_operand")])
1549 (match_operand:<V_CVTTO> 1 "s_register_operand")
1550 (match_operand:<V_CVTTO> 2 "s_register_operand")))]
1551 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1552 {
1553 arm_expand_vcond (operands, <V_cmp_result>mode);
1554 DONE;
1555 })
1556
1557 (define_expand "vcondu<mode><v_cmp_result>"
1558 [(set (match_operand:VDQW 0 "s_register_operand")
1559 (if_then_else:VDQW
1560 (match_operator 3 "arm_comparison_operator"
1561 [(match_operand:<V_cmp_result> 4 "s_register_operand")
1562 (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
1563 (match_operand:VDQW 1 "s_register_operand")
1564 (match_operand:VDQW 2 "s_register_operand")))]
1565 "TARGET_NEON"
1566 {
1567 arm_expand_vcond (operands, <V_cmp_result>mode);
1568 DONE;
1569 })
1570
1571 (define_expand "vcond_mask_<mode><v_cmp_result>"
1572 [(set (match_operand:VDQW 0 "s_register_operand")
1573 (if_then_else:VDQW
1574 (match_operand:<V_cmp_result> 3 "s_register_operand")
1575 (match_operand:VDQW 1 "s_register_operand")
1576 (match_operand:VDQW 2 "s_register_operand")))]
1577 "TARGET_NEON"
1578 {
1579 emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
1580 operands[2]));
1581 DONE;
1582 })
1583
1584 ;; Patterns for builtins.
1585
1586 ; good for plain vadd, vaddq.
1587
1588 (define_expand "neon_vadd<mode>"
1589 [(match_operand:VCVTF 0 "s_register_operand")
1590 (match_operand:VCVTF 1 "s_register_operand")
1591 (match_operand:VCVTF 2 "s_register_operand")]
1592 "TARGET_NEON"
1593 {
1594 if (ARM_HAVE_NEON_<MODE>_ARITH)
1595 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1596 else
1597 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1598 operands[2]));
1599 DONE;
1600 })
1601
1602 (define_expand "neon_vadd<mode>"
1603 [(match_operand:VH 0 "s_register_operand")
1604 (match_operand:VH 1 "s_register_operand")
1605 (match_operand:VH 2 "s_register_operand")]
1606 "TARGET_NEON_FP16INST"
1607 {
1608 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1609 DONE;
1610 })
1611
1612 (define_expand "neon_vsub<mode>"
1613 [(match_operand:VH 0 "s_register_operand")
1614 (match_operand:VH 1 "s_register_operand")
1615 (match_operand:VH 2 "s_register_operand")]
1616 "TARGET_NEON_FP16INST"
1617 {
1618 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
1619 DONE;
1620 })
1621
1622 ; Note that NEON operations don't support the full IEEE 754 standard: in
1623 ; particular, denormal values are flushed to zero. This means that GCC cannot
1624 ; use those instructions for autovectorization, etc. unless
1625 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1626 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
1627 ; header) must work in either case: if -funsafe-math-optimizations is given,
1628 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1629 ; expand to unspecs (which may potentially limit the extent to which they might
1630 ; be optimized by generic code).
1631
1632 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1633
1634 (define_insn "neon_vadd<mode>_unspec"
1635 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1636 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1637 (match_operand:VCVTF 2 "s_register_operand" "w")]
1638 UNSPEC_VADD))]
1639 "TARGET_NEON"
1640 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1641 [(set (attr "type")
1642 (if_then_else (match_test "<Is_float_mode>")
1643 (const_string "neon_fp_addsub_s<q>")
1644 (const_string "neon_add<q>")))]
1645 )
1646
1647 (define_insn "neon_vaddl<sup><mode>"
1648 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1649 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1650 (match_operand:VDI 2 "s_register_operand" "w")]
1651 VADDL))]
1652 "TARGET_NEON"
1653 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1654 [(set_attr "type" "neon_add_long")]
1655 )
1656
1657 (define_insn "neon_vaddw<sup><mode>"
1658 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1659 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1660 (match_operand:VDI 2 "s_register_operand" "w")]
1661 VADDW))]
1662 "TARGET_NEON"
1663 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1664 [(set_attr "type" "neon_add_widen")]
1665 )
1666
1667 ; vhadd and vrhadd.
1668
1669 (define_insn "neon_v<r>hadd<sup><mode>"
1670 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1671 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1672 (match_operand:VDQIW 2 "s_register_operand" "w")]
1673 VHADD))]
1674 "TARGET_NEON"
1675 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1676 [(set_attr "type" "neon_add_halve_q")]
1677 )
1678
1679 (define_insn "neon_vqadd<sup><mode>"
1680 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1681 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1682 (match_operand:VDQIX 2 "s_register_operand" "w")]
1683 VQADD))]
1684 "TARGET_NEON"
1685 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1686 [(set_attr "type" "neon_qadd<q>")]
1687 )
1688
1689 (define_insn "neon_v<r>addhn<mode>"
1690 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1691 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1692 (match_operand:VN 2 "s_register_operand" "w")]
1693 VADDHN))]
1694 "TARGET_NEON"
1695 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1696 [(set_attr "type" "neon_add_halve_narrow_q")]
1697 )
1698
1699 ;; Polynomial and Float multiplication.
1700 (define_insn "neon_vmul<pf><mode>"
1701 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1702 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1703 (match_operand:VPF 2 "s_register_operand" "w")]
1704 UNSPEC_VMUL))]
1705 "TARGET_NEON"
1706 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1707 [(set (attr "type")
1708 (if_then_else (match_test "<Is_float_mode>")
1709 (const_string "neon_fp_mul_s<q>")
1710 (const_string "neon_mul_<V_elem_ch><q>")))]
1711 )
1712
1713 (define_insn "neon_vmulf<mode>"
1714 [(set
1715 (match_operand:VH 0 "s_register_operand" "=w")
1716 (mult:VH
1717 (match_operand:VH 1 "s_register_operand" "w")
1718 (match_operand:VH 2 "s_register_operand" "w")))]
1719 "TARGET_NEON_FP16INST"
1720 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1721 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1722 )
1723
1724 (define_expand "neon_vmla<mode>"
1725 [(match_operand:VDQW 0 "s_register_operand")
1726 (match_operand:VDQW 1 "s_register_operand")
1727 (match_operand:VDQW 2 "s_register_operand")
1728 (match_operand:VDQW 3 "s_register_operand")]
1729 "TARGET_NEON"
1730 {
1731 if (ARM_HAVE_NEON_<MODE>_ARITH)
1732 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1733 operands[2], operands[3]));
1734 else
1735 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1736 operands[2], operands[3]));
1737 DONE;
1738 })
1739
1740 (define_expand "neon_vfma<VCVTF:mode>"
1741 [(match_operand:VCVTF 0 "s_register_operand")
1742 (match_operand:VCVTF 1 "s_register_operand")
1743 (match_operand:VCVTF 2 "s_register_operand")
1744 (match_operand:VCVTF 3 "s_register_operand")]
1745 "TARGET_NEON && TARGET_FMA"
1746 {
1747 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1748 operands[1]));
1749 DONE;
1750 })
1751
1752 (define_expand "neon_vfma<VH:mode>"
1753 [(match_operand:VH 0 "s_register_operand")
1754 (match_operand:VH 1 "s_register_operand")
1755 (match_operand:VH 2 "s_register_operand")
1756 (match_operand:VH 3 "s_register_operand")]
1757 "TARGET_NEON_FP16INST"
1758 {
1759 emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
1760 operands[1]));
1761 DONE;
1762 })
1763
1764 (define_expand "neon_vfms<VCVTF:mode>"
1765 [(match_operand:VCVTF 0 "s_register_operand")
1766 (match_operand:VCVTF 1 "s_register_operand")
1767 (match_operand:VCVTF 2 "s_register_operand")
1768 (match_operand:VCVTF 3 "s_register_operand")]
1769 "TARGET_NEON && TARGET_FMA"
1770 {
1771 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1772 operands[1]));
1773 DONE;
1774 })
1775
1776 (define_expand "neon_vfms<VH:mode>"
1777 [(match_operand:VH 0 "s_register_operand")
1778 (match_operand:VH 1 "s_register_operand")
1779 (match_operand:VH 2 "s_register_operand")
1780 (match_operand:VH 3 "s_register_operand")]
1781 "TARGET_NEON_FP16INST"
1782 {
1783 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1784 operands[1]));
1785 DONE;
1786 })
1787
1788 ;; The expand RTL structure here is not important.
1789 ;; We use the gen_* functions anyway.
1790 ;; We just need something to wrap the iterators around.
1791
1792 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
1793 [(set (match_operand:VCVTF 0 "s_register_operand")
1794 (unspec:VCVTF
1795 [(match_operand:VCVTF 1 "s_register_operand")
1796 (PLUSMINUS:<VFML>
1797 (match_operand:<VFML> 2 "s_register_operand")
1798 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
1799 "TARGET_FP16FML"
1800 {
1801 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1802 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
1803 operands[1],
1804 operands[2],
1805 operands[3],
1806 half, half));
1807 DONE;
1808 })
1809
1810 (define_insn "vfmal_low<mode>_intrinsic"
1811 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1812 (fma:VCVTF
1813 (float_extend:VCVTF
1814 (vec_select:<VFMLSEL>
1815 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1816 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1817 (float_extend:VCVTF
1818 (vec_select:<VFMLSEL>
1819 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1820 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1821 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1822 "TARGET_FP16FML"
1823 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1824 [(set_attr "type" "neon_fp_mla_s<q>")]
1825 )
1826
1827 (define_insn "vfmsl_high<mode>_intrinsic"
1828 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1829 (fma:VCVTF
1830 (float_extend:VCVTF
1831 (neg:<VFMLSEL>
1832 (vec_select:<VFMLSEL>
1833 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1834 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
1835 (float_extend:VCVTF
1836 (vec_select:<VFMLSEL>
1837 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1838 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1839 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1840 "TARGET_FP16FML"
1841 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1842 [(set_attr "type" "neon_fp_mla_s<q>")]
1843 )
1844
1845 (define_insn "vfmal_high<mode>_intrinsic"
1846 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1847 (fma:VCVTF
1848 (float_extend:VCVTF
1849 (vec_select:<VFMLSEL>
1850 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1851 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1852 (float_extend:VCVTF
1853 (vec_select:<VFMLSEL>
1854 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1855 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1856 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1857 "TARGET_FP16FML"
1858 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1859 [(set_attr "type" "neon_fp_mla_s<q>")]
1860 )
1861
1862 (define_insn "vfmsl_low<mode>_intrinsic"
1863 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1864 (fma:VCVTF
1865 (float_extend:VCVTF
1866 (neg:<VFMLSEL>
1867 (vec_select:<VFMLSEL>
1868 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1869 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1870 (float_extend:VCVTF
1871 (vec_select:<VFMLSEL>
1872 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1873 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1874 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1875 "TARGET_FP16FML"
1876 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1877 [(set_attr "type" "neon_fp_mla_s<q>")]
1878 )
1879
1880 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
1881 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1882 (unspec:VCVTF
1883 [(match_operand:VCVTF 1 "s_register_operand")
1884 (PLUSMINUS:<VFML>
1885 (match_operand:<VFML> 2 "s_register_operand")
1886 (match_operand:<VFML> 3 "s_register_operand"))
1887 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1888 "TARGET_FP16FML"
1889 {
1890 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
1891 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1892 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
1893 (operands[0], operands[1],
1894 operands[2], operands[3],
1895 half, lane));
1896 DONE;
1897 })
1898
1899 (define_insn "vfmal_lane_low<mode>_intrinsic"
1900 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1901 (fma:VCVTF
1902 (float_extend:VCVTF
1903 (vec_select:<VFMLSEL>
1904 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1905 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1906 (float_extend:VCVTF
1907 (vec_duplicate:<VFMLSEL>
1908 (vec_select:HF
1909 (match_operand:<VFML> 3 "s_register_operand" "x")
1910 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1911 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1912 "TARGET_FP16FML"
1913 {
1914 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1915 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1916 {
1917 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1918 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
1919 }
1920 else
1921 {
1922 operands[5] = GEN_INT (lane);
1923 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
1924 }
1925 }
1926 [(set_attr "type" "neon_fp_mla_s<q>")]
1927 )
1928
1929 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
1930 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1931 (unspec:VCVTF
1932 [(match_operand:VCVTF 1 "s_register_operand")
1933 (PLUSMINUS:<VFML>
1934 (match_operand:<VFML> 2 "s_register_operand")
1935 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
1936 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1937 "TARGET_FP16FML"
1938 {
1939 rtx lane
1940 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
1941 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1942 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
1943 (operands[0], operands[1], operands[2], operands[3],
1944 half, lane));
1945 DONE;
1946 })
1947
1948 ;; Used to implement the intrinsics:
1949 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1950 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1951 ;; Needs a bit of care to get the modes of the different sub-expressions right
1952 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1953 ;; S or D subregister to select the appropriate lane from.
1954
1955 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
1956 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1957 (fma:VCVTF
1958 (float_extend:VCVTF
1959 (vec_select:<VFMLSEL>
1960 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1961 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1962 (float_extend:VCVTF
1963 (vec_duplicate:<VFMLSEL>
1964 (vec_select:HF
1965 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1966 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1967 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1968 "TARGET_FP16FML"
1969 {
1970 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1971 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1972 int new_lane = lane % elts_per_reg;
1973 int regdiff = lane / elts_per_reg;
1974 operands[5] = GEN_INT (new_lane);
1975 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
1976 because we want the print_operand code to print the appropriate
1977 S or D register prefix. */
1978 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1979 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
1980 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
1981 }
1982 [(set_attr "type" "neon_fp_mla_s<q>")]
1983 )
1984
1985 ;; Used to implement the intrinsics:
1986 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1987 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
1988 ;; Needs a bit of care to get the modes of the different sub-expressions right
1989 ;; due to 'a' and 'b' having different sizes and make sure we use the right
1990 ;; S or D subregister to select the appropriate lane from.
1991
1992 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
1993 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1994 (fma:VCVTF
1995 (float_extend:VCVTF
1996 (vec_select:<VFMLSEL>
1997 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1998 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1999 (float_extend:VCVTF
2000 (vec_duplicate:<VFMLSEL>
2001 (vec_select:HF
2002 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2003 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2004 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2005 "TARGET_FP16FML"
2006 {
2007 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2008 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2009 int new_lane = lane % elts_per_reg;
2010 int regdiff = lane / elts_per_reg;
2011 operands[5] = GEN_INT (new_lane);
2012 /* We re-create operands[3] in the halved VFMLSEL mode
2013 because we've calculated the correct half-width subreg to extract
2014 the lane from and we want to print *that* subreg instead. */
2015 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2016 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2017 }
2018 [(set_attr "type" "neon_fp_mla_s<q>")]
2019 )
2020
2021 (define_insn "vfmal_lane_high<mode>_intrinsic"
2022 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2023 (fma:VCVTF
2024 (float_extend:VCVTF
2025 (vec_select:<VFMLSEL>
2026 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2027 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2028 (float_extend:VCVTF
2029 (vec_duplicate:<VFMLSEL>
2030 (vec_select:HF
2031 (match_operand:<VFML> 3 "s_register_operand" "x")
2032 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2033 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2034 "TARGET_FP16FML"
2035 {
2036 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2037 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2038 {
2039 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2040 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2041 }
2042 else
2043 {
2044 operands[5] = GEN_INT (lane);
2045 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2046 }
2047 }
2048 [(set_attr "type" "neon_fp_mla_s<q>")]
2049 )
2050
2051 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2052 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2053 (fma:VCVTF
2054 (float_extend:VCVTF
2055 (neg:<VFMLSEL>
2056 (vec_select:<VFMLSEL>
2057 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2058 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2059 (float_extend:VCVTF
2060 (vec_duplicate:<VFMLSEL>
2061 (vec_select:HF
2062 (match_operand:<VFML> 3 "s_register_operand" "x")
2063 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2064 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2065 "TARGET_FP16FML"
2066 {
2067 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2068 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2069 {
2070 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2071 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2072 }
2073 else
2074 {
2075 operands[5] = GEN_INT (lane);
2076 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2077 }
2078 }
2079 [(set_attr "type" "neon_fp_mla_s<q>")]
2080 )
2081
2082 ;; Used to implement the intrinsics:
2083 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2084 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2085 ;; Needs a bit of care to get the modes of the different sub-expressions right
2086 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2087 ;; S or D subregister to select the appropriate lane from.
2088
2089 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2090 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2091 (fma:VCVTF
2092 (float_extend:VCVTF
2093 (neg:<VFMLSEL>
2094 (vec_select:<VFMLSEL>
2095 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2096 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2097 (float_extend:VCVTF
2098 (vec_duplicate:<VFMLSEL>
2099 (vec_select:HF
2100 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2101 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2102 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2103 "TARGET_FP16FML"
2104 {
2105 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2106 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2107 int new_lane = lane % elts_per_reg;
2108 int regdiff = lane / elts_per_reg;
2109 operands[5] = GEN_INT (new_lane);
2110 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2111 because we want the print_operand code to print the appropriate
2112 S or D register prefix. */
2113 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2114 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2115 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2116 }
2117 [(set_attr "type" "neon_fp_mla_s<q>")]
2118 )
2119
2120 ;; Used to implement the intrinsics:
2121 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2122 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2123 ;; Needs a bit of care to get the modes of the different sub-expressions right
2124 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2125 ;; S or D subregister to select the appropriate lane from.
2126
2127 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2128 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2129 (fma:VCVTF
2130 (float_extend:VCVTF
2131 (neg:<VFMLSEL>
2132 (vec_select:<VFMLSEL>
2133 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2134 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2135 (float_extend:VCVTF
2136 (vec_duplicate:<VFMLSEL>
2137 (vec_select:HF
2138 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2139 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2140 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2141 "TARGET_FP16FML"
2142 {
2143 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2144 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2145 int new_lane = lane % elts_per_reg;
2146 int regdiff = lane / elts_per_reg;
2147 operands[5] = GEN_INT (new_lane);
2148 /* We re-create operands[3] in the halved VFMLSEL mode
2149 because we've calculated the correct half-width subreg to extract
2150 the lane from and we want to print *that* subreg instead. */
2151 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2152 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2153 }
2154 [(set_attr "type" "neon_fp_mla_s<q>")]
2155 )
2156
2157 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2158 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2159 (fma:VCVTF
2160 (float_extend:VCVTF
2161 (neg:<VFMLSEL>
2162 (vec_select:<VFMLSEL>
2163 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2164 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2165 (float_extend:VCVTF
2166 (vec_duplicate:<VFMLSEL>
2167 (vec_select:HF
2168 (match_operand:<VFML> 3 "s_register_operand" "x")
2169 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2170 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2171 "TARGET_FP16FML"
2172 {
2173 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2174 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2175 {
2176 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2177 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2178 }
2179 else
2180 {
2181 operands[5] = GEN_INT (lane);
2182 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2183 }
2184 }
2185 [(set_attr "type" "neon_fp_mla_s<q>")]
2186 )
2187
2188 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2189
2190 (define_insn "neon_vmla<mode>_unspec"
2191 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2192 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2193 (match_operand:VDQW 2 "s_register_operand" "w")
2194 (match_operand:VDQW 3 "s_register_operand" "w")]
2195 UNSPEC_VMLA))]
2196 "TARGET_NEON"
2197 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2198 [(set (attr "type")
2199 (if_then_else (match_test "<Is_float_mode>")
2200 (const_string "neon_fp_mla_s<q>")
2201 (const_string "neon_mla_<V_elem_ch><q>")))]
2202 )
2203
2204 (define_insn "neon_vmlal<sup><mode>"
2205 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2206 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2207 (match_operand:VW 2 "s_register_operand" "w")
2208 (match_operand:VW 3 "s_register_operand" "w")]
2209 VMLAL))]
2210 "TARGET_NEON"
2211 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2212 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2213 )
2214
2215 (define_expand "neon_vmls<mode>"
2216 [(match_operand:VDQW 0 "s_register_operand")
2217 (match_operand:VDQW 1 "s_register_operand")
2218 (match_operand:VDQW 2 "s_register_operand")
2219 (match_operand:VDQW 3 "s_register_operand")]
2220 "TARGET_NEON"
2221 {
2222 if (ARM_HAVE_NEON_<MODE>_ARITH)
2223 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2224 operands[1], operands[2], operands[3]));
2225 else
2226 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2227 operands[2], operands[3]));
2228 DONE;
2229 })
2230
2231 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2232
2233 (define_insn "neon_vmls<mode>_unspec"
2234 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2235 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2236 (match_operand:VDQW 2 "s_register_operand" "w")
2237 (match_operand:VDQW 3 "s_register_operand" "w")]
2238 UNSPEC_VMLS))]
2239 "TARGET_NEON"
2240 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2241 [(set (attr "type")
2242 (if_then_else (match_test "<Is_float_mode>")
2243 (const_string "neon_fp_mla_s<q>")
2244 (const_string "neon_mla_<V_elem_ch><q>")))]
2245 )
2246
2247 (define_insn "neon_vmlsl<sup><mode>"
2248 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2249 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2250 (match_operand:VW 2 "s_register_operand" "w")
2251 (match_operand:VW 3 "s_register_operand" "w")]
2252 VMLSL))]
2253 "TARGET_NEON"
2254 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2255 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2256 )
2257
2258 ;; vqdmulh, vqrdmulh
2259 (define_insn "neon_vq<r>dmulh<mode>"
2260 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2261 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2262 (match_operand:VMDQI 2 "s_register_operand" "w")]
2263 VQDMULH))]
2264 "TARGET_NEON"
2265 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2266 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2267 )
2268
2269 ;; vqrdmlah, vqrdmlsh
2270 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2271 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2272 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2273 (match_operand:VMDQI 2 "s_register_operand" "w")
2274 (match_operand:VMDQI 3 "s_register_operand" "w")]
2275 VQRDMLH_AS))]
2276 "TARGET_NEON_RDMA"
2277 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2278 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2279 )
2280
2281 (define_insn "neon_vqdmlal<mode>"
2282 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2283 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2284 (match_operand:VMDI 2 "s_register_operand" "w")
2285 (match_operand:VMDI 3 "s_register_operand" "w")]
2286 UNSPEC_VQDMLAL))]
2287 "TARGET_NEON"
2288 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2289 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2290 )
2291
2292 (define_insn "neon_vqdmlsl<mode>"
2293 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2294 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2295 (match_operand:VMDI 2 "s_register_operand" "w")
2296 (match_operand:VMDI 3 "s_register_operand" "w")]
2297 UNSPEC_VQDMLSL))]
2298 "TARGET_NEON"
2299 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2300 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2301 )
2302
2303 (define_insn "neon_vmull<sup><mode>"
2304 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2305 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2306 (match_operand:VW 2 "s_register_operand" "w")]
2307 VMULL))]
2308 "TARGET_NEON"
2309 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2310 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2311 )
2312
2313 (define_insn "neon_vqdmull<mode>"
2314 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2315 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2316 (match_operand:VMDI 2 "s_register_operand" "w")]
2317 UNSPEC_VQDMULL))]
2318 "TARGET_NEON"
2319 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2320 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2321 )
2322
2323 (define_expand "neon_vsub<mode>"
2324 [(match_operand:VCVTF 0 "s_register_operand")
2325 (match_operand:VCVTF 1 "s_register_operand")
2326 (match_operand:VCVTF 2 "s_register_operand")]
2327 "TARGET_NEON"
2328 {
2329 if (ARM_HAVE_NEON_<MODE>_ARITH)
2330 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2331 else
2332 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2333 operands[2]));
2334 DONE;
2335 })
2336
2337 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2338
2339 (define_insn "neon_vsub<mode>_unspec"
2340 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2341 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2342 (match_operand:VCVTF 2 "s_register_operand" "w")]
2343 UNSPEC_VSUB))]
2344 "TARGET_NEON"
2345 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2346 [(set (attr "type")
2347 (if_then_else (match_test "<Is_float_mode>")
2348 (const_string "neon_fp_addsub_s<q>")
2349 (const_string "neon_sub<q>")))]
2350 )
2351
2352 (define_insn "neon_vsubl<sup><mode>"
2353 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2354 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2355 (match_operand:VDI 2 "s_register_operand" "w")]
2356 VSUBL))]
2357 "TARGET_NEON"
2358 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2359 [(set_attr "type" "neon_sub_long")]
2360 )
2361
2362 (define_insn "neon_vsubw<sup><mode>"
2363 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2364 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2365 (match_operand:VDI 2 "s_register_operand" "w")]
2366 VSUBW))]
2367 "TARGET_NEON"
2368 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2369 [(set_attr "type" "neon_sub_widen")]
2370 )
2371
2372 (define_insn "neon_vqsub<sup><mode>"
2373 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2374 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2375 (match_operand:VDQIX 2 "s_register_operand" "w")]
2376 VQSUB))]
2377 "TARGET_NEON"
2378 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2379 [(set_attr "type" "neon_qsub<q>")]
2380 )
2381
2382 (define_insn "neon_vhsub<sup><mode>"
2383 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2384 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2385 (match_operand:VDQIW 2 "s_register_operand" "w")]
2386 VHSUB))]
2387 "TARGET_NEON"
2388 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2389 [(set_attr "type" "neon_sub_halve<q>")]
2390 )
2391
2392 (define_insn "neon_v<r>subhn<mode>"
2393 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2394 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2395 (match_operand:VN 2 "s_register_operand" "w")]
2396 VSUBHN))]
2397 "TARGET_NEON"
2398 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2399 [(set_attr "type" "neon_sub_halve_narrow_q")]
2400 )
2401
2402 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2403 ;; without unsafe math optimizations.
2404 (define_expand "@neon_vc<cmp_op><mode>"
2405 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2406 (neg:<V_cmp_result>
2407 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2408 (match_operand:VDQW 2 "reg_or_zero_operand")))]
2409 "TARGET_NEON"
2410 {
2411 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2412 are enabled. */
2413 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2414 && !flag_unsafe_math_optimizations)
2415 {
2416 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2417 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2418 whereas this expander iterates over the integer modes as well,
2419 but we will never expand to UNSPECs for the integer comparisons. */
2420 switch (<MODE>mode)
2421 {
2422 case E_V2SFmode:
2423 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2424 operands[1],
2425 operands[2]));
2426 break;
2427 case E_V4SFmode:
2428 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2429 operands[1],
2430 operands[2]));
2431 break;
2432 default:
2433 gcc_unreachable ();
2434 }
2435 }
2436 else
2437 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2438 operands[1],
2439 operands[2]));
2440 DONE;
2441 }
2442 )
2443
2444 (define_insn "@neon_vc<cmp_op><mode>_insn"
2445 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2446 (neg:<V_cmp_result>
2447 (COMPARISONS:<V_cmp_result>
2448 (match_operand:VDQW 1 "s_register_operand" "w,w")
2449 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2450 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2451 && !flag_unsafe_math_optimizations)"
2452 {
2453 char pattern[100];
2454 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2455 " %%<V_reg>1, %s",
2456 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2457 ? "f" : "<cmp_type>",
2458 which_alternative == 0
2459 ? "%<V_reg>2" : "#0");
2460 output_asm_insn (pattern, operands);
2461 return "";
2462 }
2463 [(set (attr "type")
2464 (if_then_else (match_operand 2 "zero_operand")
2465 (const_string "neon_compare_zero<q>")
2466 (const_string "neon_compare<q>")))]
2467 )
2468
2469 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2470 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2471 (unspec:<V_cmp_result>
2472 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2473 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2474 NEON_VCMP))]
2475 "TARGET_NEON"
2476 {
2477 char pattern[100];
2478 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2479 " %%<V_reg>1, %s",
2480 which_alternative == 0
2481 ? "%<V_reg>2" : "#0");
2482 output_asm_insn (pattern, operands);
2483 return "";
2484 }
2485 [(set_attr "type" "neon_fp_compare_s<q>")]
2486 )
2487
2488 (define_expand "@neon_vc<cmp_op><mode>"
2489 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2490 (neg:<V_cmp_result>
2491 (COMPARISONS:VH
2492 (match_operand:VH 1 "s_register_operand")
2493 (match_operand:VH 2 "reg_or_zero_operand")))]
2494 "TARGET_NEON_FP16INST"
2495 {
2496 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2497 are enabled. */
2498 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2499 && !flag_unsafe_math_optimizations)
2500 emit_insn
2501 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2502 (operands[0], operands[1], operands[2]));
2503 else
2504 emit_insn
2505 (gen_neon_vc<cmp_op><mode>_fp16insn
2506 (operands[0], operands[1], operands[2]));
2507 DONE;
2508 })
2509
2510 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2511 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2512 (neg:<V_cmp_result>
2513 (COMPARISONS:<V_cmp_result>
2514 (match_operand:VH 1 "s_register_operand" "w,w")
2515 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2516 "TARGET_NEON_FP16INST
2517 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2518 && !flag_unsafe_math_optimizations)"
2519 {
2520 char pattern[100];
2521 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2522 " %%<V_reg>1, %s",
2523 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2524 ? "f" : "<cmp_type>",
2525 which_alternative == 0
2526 ? "%<V_reg>2" : "#0");
2527 output_asm_insn (pattern, operands);
2528 return "";
2529 }
2530 [(set (attr "type")
2531 (if_then_else (match_operand 2 "zero_operand")
2532 (const_string "neon_compare_zero<q>")
2533 (const_string "neon_compare<q>")))])
2534
2535 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2536 [(set
2537 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2538 (unspec:<V_cmp_result>
2539 [(match_operand:VH 1 "s_register_operand" "w,w")
2540 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2541 NEON_VCMP))]
2542 "TARGET_NEON_FP16INST"
2543 {
2544 char pattern[100];
2545 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2546 " %%<V_reg>1, %s",
2547 which_alternative == 0
2548 ? "%<V_reg>2" : "#0");
2549 output_asm_insn (pattern, operands);
2550 return "";
2551 }
2552 [(set_attr "type" "neon_fp_compare_s<q>")])
2553
2554 (define_insn "@neon_vc<code><mode>"
2555 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2556 (neg:<V_cmp_result>
2557 (GTUGEU:<V_cmp_result>
2558 (match_operand:VDQIW 1 "s_register_operand" "w")
2559 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2560 "TARGET_NEON"
2561 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2562 [(set_attr "type" "neon_compare<q>")]
2563 )
2564
2565 (define_expand "neon_vca<cmp_op><mode>"
2566 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2567 (neg:<V_cmp_result>
2568 (GTGE:<V_cmp_result>
2569 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2570 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2571 "TARGET_NEON"
2572 {
2573 if (flag_unsafe_math_optimizations)
2574 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2575 operands[2]));
2576 else
2577 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2578 operands[1],
2579 operands[2]));
2580 DONE;
2581 }
2582 )
2583
2584 (define_insn "neon_vca<cmp_op><mode>_insn"
2585 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2586 (neg:<V_cmp_result>
2587 (GTGE:<V_cmp_result>
2588 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2589 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2590 "TARGET_NEON && flag_unsafe_math_optimizations"
2591 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2592 [(set_attr "type" "neon_fp_compare_s<q>")]
2593 )
2594
2595 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2596 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2597 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2598 (match_operand:VCVTF 2 "s_register_operand" "w")]
2599 NEON_VACMP))]
2600 "TARGET_NEON"
2601 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2602 [(set_attr "type" "neon_fp_compare_s<q>")]
2603 )
2604
2605 (define_expand "neon_vca<cmp_op><mode>"
2606 [(set
2607 (match_operand:<V_cmp_result> 0 "s_register_operand")
2608 (neg:<V_cmp_result>
2609 (GLTE:<V_cmp_result>
2610 (abs:VH (match_operand:VH 1 "s_register_operand"))
2611 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2612 "TARGET_NEON_FP16INST"
2613 {
2614 if (flag_unsafe_math_optimizations)
2615 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2616 (operands[0], operands[1], operands[2]));
2617 else
2618 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2619 (operands[0], operands[1], operands[2]));
2620 DONE;
2621 })
2622
2623 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2624 [(set
2625 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2626 (neg:<V_cmp_result>
2627 (GLTE:<V_cmp_result>
2628 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2629 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2630 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2631 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2632 [(set_attr "type" "neon_fp_compare_s<q>")]
2633 )
2634
2635 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2636 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2637 (unspec:<V_cmp_result>
2638 [(match_operand:VH 1 "s_register_operand" "w")
2639 (match_operand:VH 2 "s_register_operand" "w")]
2640 NEON_VAGLTE))]
2641 "TARGET_NEON"
2642 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2643 [(set_attr "type" "neon_fp_compare_s<q>")]
2644 )
2645
2646 (define_expand "neon_vc<cmp_op>z<mode>"
2647 [(set
2648 (match_operand:<V_cmp_result> 0 "s_register_operand")
2649 (COMPARISONS:<V_cmp_result>
2650 (match_operand:VH 1 "s_register_operand")
2651 (const_int 0)))]
2652 "TARGET_NEON_FP16INST"
2653 {
2654 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2655 CONST0_RTX (<MODE>mode)));
2656 DONE;
2657 })
2658
2659 (define_insn "neon_vtst<mode>"
2660 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2661 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2662 (match_operand:VDQIW 2 "s_register_operand" "w")]
2663 UNSPEC_VTST))]
2664 "TARGET_NEON"
2665 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2666 [(set_attr "type" "neon_tst<q>")]
2667 )
2668
2669 (define_insn "neon_vabd<sup><mode>"
2670 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2671 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2672 (match_operand:VDQIW 2 "s_register_operand" "w")]
2673 VABD))]
2674 "TARGET_NEON"
2675 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2676 [(set_attr "type" "neon_abd<q>")]
2677 )
2678
2679 (define_insn "neon_vabd<mode>"
2680 [(set (match_operand:VH 0 "s_register_operand" "=w")
2681 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2682 (match_operand:VH 2 "s_register_operand" "w")]
2683 UNSPEC_VABD_F))]
2684 "TARGET_NEON_FP16INST"
2685 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2686 [(set_attr "type" "neon_abd<q>")]
2687 )
2688
2689 (define_insn "neon_vabdf<mode>"
2690 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2691 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2692 (match_operand:VCVTF 2 "s_register_operand" "w")]
2693 UNSPEC_VABD_F))]
2694 "TARGET_NEON"
2695 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2696 [(set_attr "type" "neon_fp_abd_s<q>")]
2697 )
2698
2699 (define_insn "neon_vabdl<sup><mode>"
2700 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2701 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2702 (match_operand:VW 2 "s_register_operand" "w")]
2703 VABDL))]
2704 "TARGET_NEON"
2705 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2706 [(set_attr "type" "neon_abd_long")]
2707 )
2708
2709 (define_insn "neon_vaba<sup><mode>"
2710 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2711 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2712 (match_operand:VDQIW 3 "s_register_operand" "w")]
2713 VABD)
2714 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2715 "TARGET_NEON"
2716 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2717 [(set_attr "type" "neon_arith_acc<q>")]
2718 )
2719
2720 (define_insn "neon_vabal<sup><mode>"
2721 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2722 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2723 (match_operand:VW 3 "s_register_operand" "w")]
2724 VABDL)
2725 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2726 "TARGET_NEON"
2727 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2728 [(set_attr "type" "neon_arith_acc<q>")]
2729 )
2730
2731 (define_expand "<sup>sadv16qi"
2732 [(use (match_operand:V4SI 0 "register_operand"))
2733 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
2734 (use (match_operand:V16QI 2 "register_operand"))] VABAL)
2735 (use (match_operand:V4SI 3 "register_operand"))]
2736 "TARGET_NEON"
2737 {
2738 rtx reduc = gen_reg_rtx (V8HImode);
2739 rtx op1_highpart = gen_reg_rtx (V8QImode);
2740 rtx op2_highpart = gen_reg_rtx (V8QImode);
2741
2742 emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
2743 gen_lowpart (V8QImode, operands[1]),
2744 gen_lowpart (V8QImode, operands[2])));
2745
2746 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
2747 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
2748 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
2749 op1_highpart, op2_highpart));
2750 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
2751
2752 emit_move_insn (operands[0], operands[3]);
2753 DONE;
2754 }
2755 )
2756
2757 (define_insn "neon_v<maxmin><sup><mode>"
2758 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2759 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2760 (match_operand:VDQIW 2 "s_register_operand" "w")]
2761 VMAXMIN))]
2762 "TARGET_NEON"
2763 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2764 [(set_attr "type" "neon_minmax<q>")]
2765 )
2766
2767 (define_insn "neon_v<maxmin>f<mode>"
2768 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2769 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2770 (match_operand:VCVTF 2 "s_register_operand" "w")]
2771 VMAXMINF))]
2772 "TARGET_NEON"
2773 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2774 [(set_attr "type" "neon_fp_minmax_s<q>")]
2775 )
2776
2777 (define_insn "neon_v<maxmin>f<mode>"
2778 [(set (match_operand:VH 0 "s_register_operand" "=w")
2779 (unspec:VH
2780 [(match_operand:VH 1 "s_register_operand" "w")
2781 (match_operand:VH 2 "s_register_operand" "w")]
2782 VMAXMINF))]
2783 "TARGET_NEON_FP16INST"
2784 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2785 [(set_attr "type" "neon_fp_minmax_s<q>")]
2786 )
2787
2788 (define_insn "neon_vp<maxmin>fv4hf"
2789 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2790 (unspec:V4HF
2791 [(match_operand:V4HF 1 "s_register_operand" "w")
2792 (match_operand:V4HF 2 "s_register_operand" "w")]
2793 VPMAXMINF))]
2794 "TARGET_NEON_FP16INST"
2795 "vp<maxmin>.f16\t%P0, %P1, %P2"
2796 [(set_attr "type" "neon_reduc_minmax")]
2797 )
2798
2799 (define_insn "neon_<fmaxmin_op><mode>"
2800 [(set
2801 (match_operand:VH 0 "s_register_operand" "=w")
2802 (unspec:VH
2803 [(match_operand:VH 1 "s_register_operand" "w")
2804 (match_operand:VH 2 "s_register_operand" "w")]
2805 VMAXMINFNM))]
2806 "TARGET_NEON_FP16INST"
2807 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2808 [(set_attr "type" "neon_fp_minmax_s<q>")]
2809 )
2810
2811 ;; v<maxmin>nm intrinsics.
2812 (define_insn "neon_<fmaxmin_op><mode>"
2813 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2814 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2815 (match_operand:VCVTF 2 "s_register_operand" "w")]
2816 VMAXMINFNM))]
2817 "TARGET_NEON && TARGET_VFP5"
2818 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2819 [(set_attr "type" "neon_fp_minmax_s<q>")]
2820 )
2821
2822 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2823 (define_insn "<fmaxmin><mode>3"
2824 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2825 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2826 (match_operand:VCVTF 2 "s_register_operand" "w")]
2827 VMAXMINFNM))]
2828 "TARGET_NEON && TARGET_VFP5"
2829 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2830 [(set_attr "type" "neon_fp_minmax_s<q>")]
2831 )
2832
2833 (define_expand "neon_vpadd<mode>"
2834 [(match_operand:VD 0 "s_register_operand")
2835 (match_operand:VD 1 "s_register_operand")
2836 (match_operand:VD 2 "s_register_operand")]
2837 "TARGET_NEON"
2838 {
2839 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2840 operands[2]));
2841 DONE;
2842 })
2843
2844 (define_insn "neon_vpaddl<sup><mode>"
2845 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2846 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2847 VPADDL))]
2848 "TARGET_NEON"
2849 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2850 [(set_attr "type" "neon_reduc_add_long")]
2851 )
2852
2853 (define_insn "neon_vpadal<sup><mode>"
2854 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2855 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2856 (match_operand:VDQIW 2 "s_register_operand" "w")]
2857 VPADAL))]
2858 "TARGET_NEON"
2859 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2860 [(set_attr "type" "neon_reduc_add_acc")]
2861 )
2862
2863 (define_insn "neon_vp<maxmin><sup><mode>"
2864 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2865 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2866 (match_operand:VDI 2 "s_register_operand" "w")]
2867 VPMAXMIN))]
2868 "TARGET_NEON"
2869 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2870 [(set_attr "type" "neon_reduc_minmax<q>")]
2871 )
2872
2873 (define_insn "neon_vp<maxmin>f<mode>"
2874 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2875 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2876 (match_operand:VCVTF 2 "s_register_operand" "w")]
2877 VPMAXMINF))]
2878 "TARGET_NEON"
2879 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2880 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2881 )
2882
2883 (define_insn "neon_vrecps<mode>"
2884 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2885 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2886 (match_operand:VCVTF 2 "s_register_operand" "w")]
2887 UNSPEC_VRECPS))]
2888 "TARGET_NEON"
2889 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2890 [(set_attr "type" "neon_fp_recps_s<q>")]
2891 )
2892
2893 (define_insn "neon_vrecps<mode>"
2894 [(set
2895 (match_operand:VH 0 "s_register_operand" "=w")
2896 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2897 (match_operand:VH 2 "s_register_operand" "w")]
2898 UNSPEC_VRECPS))]
2899 "TARGET_NEON_FP16INST"
2900 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2901 [(set_attr "type" "neon_fp_recps_s<q>")]
2902 )
2903
2904 (define_insn "neon_vrsqrts<mode>"
2905 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2906 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2907 (match_operand:VCVTF 2 "s_register_operand" "w")]
2908 UNSPEC_VRSQRTS))]
2909 "TARGET_NEON"
2910 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2911 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2912 )
2913
2914 (define_insn "neon_vrsqrts<mode>"
2915 [(set
2916 (match_operand:VH 0 "s_register_operand" "=w")
2917 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2918 (match_operand:VH 2 "s_register_operand" "w")]
2919 UNSPEC_VRSQRTS))]
2920 "TARGET_NEON_FP16INST"
2921 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2922 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2923 )
2924
2925 (define_expand "neon_vabs<mode>"
2926 [(match_operand:VDQW 0 "s_register_operand")
2927 (match_operand:VDQW 1 "s_register_operand")]
2928 "TARGET_NEON"
2929 {
2930 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2931 DONE;
2932 })
2933
2934 (define_insn "neon_vqabs<mode>"
2935 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2936 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2937 UNSPEC_VQABS))]
2938 "TARGET_NEON"
2939 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2940 [(set_attr "type" "neon_qabs<q>")]
2941 )
2942
2943 (define_insn "neon_bswap<mode>"
2944 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2945 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2946 "TARGET_NEON"
2947 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2948 [(set_attr "type" "neon_rev<q>")]
2949 )
2950
2951 (define_expand "neon_vneg<mode>"
2952 [(match_operand:VDQW 0 "s_register_operand")
2953 (match_operand:VDQW 1 "s_register_operand")]
2954 "TARGET_NEON"
2955 {
2956 emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1]));
2957 DONE;
2958 })
2959
2960
2961 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
2962 ;; fact that their usage need to guarantee that the source vectors are
2963 ;; contiguous. It would be wrong to describe the operation without being able
2964 ;; to describe the permute that is also required, but even if that is done
2965 ;; the permute would have been created as a LOAD_LANES which means the values
2966 ;; in the registers are in the wrong order.
2967 (define_insn "neon_vcadd<rot><mode>"
2968 [(set (match_operand:VF 0 "register_operand" "=w")
2969 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
2970 (match_operand:VF 2 "register_operand" "w")]
2971 VCADD))]
2972 "TARGET_COMPLEX"
2973 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
2974 [(set_attr "type" "neon_fcadd")]
2975 )
2976
2977 (define_insn "neon_vcmla<rot><mode>"
2978 [(set (match_operand:VF 0 "register_operand" "=w")
2979 (plus:VF (match_operand:VF 1 "register_operand" "0")
2980 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
2981 (match_operand:VF 3 "register_operand" "w")]
2982 VCMLA)))]
2983 "TARGET_COMPLEX"
2984 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
2985 [(set_attr "type" "neon_fcmla")]
2986 )
2987
2988 (define_insn "neon_vcmla_lane<rot><mode>"
2989 [(set (match_operand:VF 0 "s_register_operand" "=w")
2990 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
2991 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
2992 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
2993 (match_operand:SI 4 "const_int_operand" "n")]
2994 VCMLA)))]
2995 "TARGET_COMPLEX"
2996 {
2997 operands = neon_vcmla_lane_prepare_operands (operands);
2998 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2999 }
3000 [(set_attr "type" "neon_fcmla")]
3001 )
3002
3003 (define_insn "neon_vcmla_laneq<rot><mode>"
3004 [(set (match_operand:VDF 0 "s_register_operand" "=w")
3005 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3006 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3007 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3008 (match_operand:SI 4 "const_int_operand" "n")]
3009 VCMLA)))]
3010 "TARGET_COMPLEX"
3011 {
3012 operands = neon_vcmla_lane_prepare_operands (operands);
3013 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3014 }
3015 [(set_attr "type" "neon_fcmla")]
3016 )
3017
3018 (define_insn "neon_vcmlaq_lane<rot><mode>"
3019 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3020 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3021 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3022 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3023 (match_operand:SI 4 "const_int_operand" "n")]
3024 VCMLA)))]
3025 "TARGET_COMPLEX"
3026 {
3027 operands = neon_vcmla_lane_prepare_operands (operands);
3028 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3029 }
3030 [(set_attr "type" "neon_fcmla")]
3031 )
3032
3033
3034 ;; These instructions map to the __builtins for the Dot Product operations.
3035 (define_insn "neon_<sup>dot<vsi2qi>"
3036 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3037 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3038 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3039 "register_operand" "w")
3040 (match_operand:<VSI2QI> 3
3041 "register_operand" "w")]
3042 DOTPROD)))]
3043 "TARGET_DOTPROD"
3044 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3045 [(set_attr "type" "neon_dot<q>")]
3046 )
3047
3048 ;; These instructions map to the __builtins for the Dot Product operations.
3049 (define_insn "neon_usdot<vsi2qi>"
3050 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3051 (plus:VCVTI
3052 (unspec:VCVTI
3053 [(match_operand:<VSI2QI> 2 "register_operand" "w")
3054 (match_operand:<VSI2QI> 3 "register_operand" "w")]
3055 UNSPEC_DOT_US)
3056 (match_operand:VCVTI 1 "register_operand" "0")))]
3057 "TARGET_I8MM"
3058 "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3059 [(set_attr "type" "neon_dot<q>")]
3060 )
3061
3062 ;; These instructions map to the __builtins for the Dot Product
3063 ;; indexed operations.
3064 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3065 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3066 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3067 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3068 "register_operand" "w")
3069 (match_operand:V8QI 3 "register_operand" "t")
3070 (match_operand:SI 4 "immediate_operand" "i")]
3071 DOTPROD)))]
3072 "TARGET_DOTPROD"
3073 {
3074 operands[4]
3075 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3076 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3077 }
3078 [(set_attr "type" "neon_dot<q>")]
3079 )
3080
3081 ;; These instructions map to the __builtins for the Dot Product
3082 ;; indexed operations in the v8.6 I8MM extension.
3083 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3084 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3085 (plus:VCVTI
3086 (unspec:VCVTI
3087 [(match_operand:<VSI2QI> 2 "register_operand" "w")
3088 (match_operand:V8QI 3 "register_operand" "t")
3089 (match_operand:SI 4 "immediate_operand" "i")]
3090 DOTPROD_I8MM)
3091 (match_operand:VCVTI 1 "register_operand" "0")))]
3092 "TARGET_I8MM"
3093 {
3094 operands[4] = GEN_INT (INTVAL (operands[4]));
3095 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3096 }
3097 [(set_attr "type" "neon_dot<q>")]
3098 )
3099
3100 ;; These expands map to the Dot Product optab the vectorizer checks for.
3101 ;; The auto-vectorizer expects a dot product builtin that also does an
3102 ;; accumulation into the provided register.
3103 ;; Given the following pattern
3104 ;;
3105 ;; for (i=0; i<len; i++) {
3106 ;; c = a[i] * b[i];
3107 ;; r += c;
3108 ;; }
3109 ;; return result;
3110 ;;
3111 ;; This can be auto-vectorized to
3112 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3113 ;;
3114 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3115 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3116 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3117 ;; ...
3118 ;;
3119 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3120 (define_expand "<sup>dot_prod<vsi2qi>"
3121 [(set (match_operand:VCVTI 0 "register_operand")
3122 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3123 "register_operand")
3124 (match_operand:<VSI2QI> 2
3125 "register_operand")]
3126 DOTPROD)
3127 (match_operand:VCVTI 3 "register_operand")))]
3128 "TARGET_DOTPROD"
3129 {
3130 emit_insn (
3131 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3132 operands[2]));
3133 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3134 DONE;
3135 })
3136
3137 (define_expand "neon_copysignf<mode>"
3138 [(match_operand:VCVTF 0 "register_operand")
3139 (match_operand:VCVTF 1 "register_operand")
3140 (match_operand:VCVTF 2 "register_operand")]
3141 "TARGET_NEON"
3142 "{
3143 rtx v_bitmask_cast;
3144 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3145 rtx c = gen_int_mode (0x80000000, SImode);
3146
3147 emit_move_insn (v_bitmask,
3148 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3149 emit_move_insn (operands[0], operands[2]);
3150 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3151 <VCVTF:V_cmp_result>mode, 0);
3152 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3153 operands[1]));
3154
3155 DONE;
3156 }"
3157 )
3158
3159 (define_insn "neon_vqneg<mode>"
3160 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3161 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3162 UNSPEC_VQNEG))]
3163 "TARGET_NEON"
3164 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3165 [(set_attr "type" "neon_qneg<q>")]
3166 )
3167
3168 (define_insn "neon_vcls<mode>"
3169 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3170 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3171 UNSPEC_VCLS))]
3172 "TARGET_NEON"
3173 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3174 [(set_attr "type" "neon_cls<q>")]
3175 )
3176
3177 (define_insn "clz<mode>2"
3178 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3179 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3180 "TARGET_NEON"
3181 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3182 [(set_attr "type" "neon_cnt<q>")]
3183 )
3184
3185 (define_expand "neon_vclz<mode>"
3186 [(match_operand:VDQIW 0 "s_register_operand")
3187 (match_operand:VDQIW 1 "s_register_operand")]
3188 "TARGET_NEON"
3189 {
3190 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3191 DONE;
3192 })
3193
3194 (define_insn "popcount<mode>2"
3195 [(set (match_operand:VE 0 "s_register_operand" "=w")
3196 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3197 "TARGET_NEON"
3198 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3199 [(set_attr "type" "neon_cnt<q>")]
3200 )
3201
3202 (define_expand "neon_vcnt<mode>"
3203 [(match_operand:VE 0 "s_register_operand")
3204 (match_operand:VE 1 "s_register_operand")]
3205 "TARGET_NEON"
3206 {
3207 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3208 DONE;
3209 })
3210
3211 (define_insn "neon_vrecpe<mode>"
3212 [(set (match_operand:VH 0 "s_register_operand" "=w")
3213 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3214 UNSPEC_VRECPE))]
3215 "TARGET_NEON_FP16INST"
3216 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3217 [(set_attr "type" "neon_fp_recpe_s<q>")]
3218 )
3219
3220 (define_insn "neon_vrecpe<mode>"
3221 [(set (match_operand:V32 0 "s_register_operand" "=w")
3222 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3223 UNSPEC_VRECPE))]
3224 "TARGET_NEON"
3225 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3226 [(set_attr "type" "neon_fp_recpe_s<q>")]
3227 )
3228
3229 (define_insn "neon_vrsqrte<mode>"
3230 [(set (match_operand:V32 0 "s_register_operand" "=w")
3231 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3232 UNSPEC_VRSQRTE))]
3233 "TARGET_NEON"
3234 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3235 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3236 )
3237
3238 (define_expand "neon_vmvn<mode>"
3239 [(match_operand:VDQIW 0 "s_register_operand")
3240 (match_operand:VDQIW 1 "s_register_operand")]
3241 "TARGET_NEON"
3242 {
3243 emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1]));
3244 DONE;
3245 })
3246
3247 (define_insn "neon_vget_lane<mode>_sext_internal"
3248 [(set (match_operand:SI 0 "s_register_operand" "=r")
3249 (sign_extend:SI
3250 (vec_select:<V_elem>
3251 (match_operand:VD 1 "s_register_operand" "w")
3252 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3253 "TARGET_NEON"
3254 {
3255 if (BYTES_BIG_ENDIAN)
3256 {
3257 int elt = INTVAL (operands[2]);
3258 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3259 operands[2] = GEN_INT (elt);
3260 }
3261 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3262 }
3263 [(set_attr "type" "neon_to_gp")]
3264 )
3265
3266 (define_insn "neon_vget_lane<mode>_zext_internal"
3267 [(set (match_operand:SI 0 "s_register_operand" "=r")
3268 (zero_extend:SI
3269 (vec_select:<V_elem>
3270 (match_operand:VD 1 "s_register_operand" "w")
3271 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3272 "TARGET_NEON"
3273 {
3274 if (BYTES_BIG_ENDIAN)
3275 {
3276 int elt = INTVAL (operands[2]);
3277 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3278 operands[2] = GEN_INT (elt);
3279 }
3280 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3281 }
3282 [(set_attr "type" "neon_to_gp")]
3283 )
3284
3285 (define_insn "neon_vget_lane<mode>_sext_internal"
3286 [(set (match_operand:SI 0 "s_register_operand" "=r")
3287 (sign_extend:SI
3288 (vec_select:<V_elem>
3289 (match_operand:VQ2 1 "s_register_operand" "w")
3290 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3291 "TARGET_NEON"
3292 {
3293 rtx ops[3];
3294 int regno = REGNO (operands[1]);
3295 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3296 unsigned int elt = INTVAL (operands[2]);
3297 unsigned int elt_adj = elt % halfelts;
3298
3299 if (BYTES_BIG_ENDIAN)
3300 elt_adj = halfelts - 1 - elt_adj;
3301
3302 ops[0] = operands[0];
3303 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3304 ops[2] = GEN_INT (elt_adj);
3305 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3306
3307 return "";
3308 }
3309 [(set_attr "type" "neon_to_gp_q")]
3310 )
3311
3312 (define_insn "neon_vget_lane<mode>_zext_internal"
3313 [(set (match_operand:SI 0 "s_register_operand" "=r")
3314 (zero_extend:SI
3315 (vec_select:<V_elem>
3316 (match_operand:VQ2 1 "s_register_operand" "w")
3317 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3318 "TARGET_NEON"
3319 {
3320 rtx ops[3];
3321 int regno = REGNO (operands[1]);
3322 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3323 unsigned int elt = INTVAL (operands[2]);
3324 unsigned int elt_adj = elt % halfelts;
3325
3326 if (BYTES_BIG_ENDIAN)
3327 elt_adj = halfelts - 1 - elt_adj;
3328
3329 ops[0] = operands[0];
3330 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3331 ops[2] = GEN_INT (elt_adj);
3332 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3333
3334 return "";
3335 }
3336 [(set_attr "type" "neon_to_gp_q")]
3337 )
3338
3339 (define_expand "neon_vget_lane<mode>"
3340 [(match_operand:<V_ext> 0 "s_register_operand")
3341 (match_operand:VDQW 1 "s_register_operand")
3342 (match_operand:SI 2 "immediate_operand")]
3343 "TARGET_NEON"
3344 {
3345 if (BYTES_BIG_ENDIAN)
3346 {
3347 /* The intrinsics are defined in terms of a model where the
3348 element ordering in memory is vldm order, whereas the generic
3349 RTL is defined in terms of a model where the element ordering
3350 in memory is array order. Convert the lane number to conform
3351 to this model. */
3352 unsigned int elt = INTVAL (operands[2]);
3353 unsigned int reg_nelts
3354 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3355 elt ^= reg_nelts - 1;
3356 operands[2] = GEN_INT (elt);
3357 }
3358
3359 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3360 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3361 operands[2]));
3362 else
3363 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3364 operands[1],
3365 operands[2]));
3366 DONE;
3367 })
3368
3369 (define_expand "neon_vget_laneu<mode>"
3370 [(match_operand:<V_ext> 0 "s_register_operand")
3371 (match_operand:VDQIW 1 "s_register_operand")
3372 (match_operand:SI 2 "immediate_operand")]
3373 "TARGET_NEON"
3374 {
3375 if (BYTES_BIG_ENDIAN)
3376 {
3377 /* The intrinsics are defined in terms of a model where the
3378 element ordering in memory is vldm order, whereas the generic
3379 RTL is defined in terms of a model where the element ordering
3380 in memory is array order. Convert the lane number to conform
3381 to this model. */
3382 unsigned int elt = INTVAL (operands[2]);
3383 unsigned int reg_nelts
3384 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3385 elt ^= reg_nelts - 1;
3386 operands[2] = GEN_INT (elt);
3387 }
3388
3389 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3390 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3391 operands[2]));
3392 else
3393 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3394 operands[1],
3395 operands[2]));
3396 DONE;
3397 })
3398
3399 (define_expand "neon_vget_lanedi"
3400 [(match_operand:DI 0 "s_register_operand")
3401 (match_operand:DI 1 "s_register_operand")
3402 (match_operand:SI 2 "immediate_operand")]
3403 "TARGET_NEON"
3404 {
3405 emit_move_insn (operands[0], operands[1]);
3406 DONE;
3407 })
3408
3409 (define_expand "neon_vget_lanev2di"
3410 [(match_operand:DI 0 "s_register_operand")
3411 (match_operand:V2DI 1 "s_register_operand")
3412 (match_operand:SI 2 "immediate_operand")]
3413 "TARGET_NEON"
3414 {
3415 int lane;
3416
3417 if (BYTES_BIG_ENDIAN)
3418 {
3419 /* The intrinsics are defined in terms of a model where the
3420 element ordering in memory is vldm order, whereas the generic
3421 RTL is defined in terms of a model where the element ordering
3422 in memory is array order. Convert the lane number to conform
3423 to this model. */
3424 unsigned int elt = INTVAL (operands[2]);
3425 unsigned int reg_nelts = 2;
3426 elt ^= reg_nelts - 1;
3427 operands[2] = GEN_INT (elt);
3428 }
3429
3430 lane = INTVAL (operands[2]);
3431 gcc_assert ((lane ==0) || (lane == 1));
3432 emit_move_insn (operands[0], lane == 0
3433 ? gen_lowpart (DImode, operands[1])
3434 : gen_highpart (DImode, operands[1]));
3435 DONE;
3436 })
3437
3438 (define_expand "neon_vset_lane<mode>"
3439 [(match_operand:VDQ 0 "s_register_operand")
3440 (match_operand:<V_elem> 1 "s_register_operand")
3441 (match_operand:VDQ 2 "s_register_operand")
3442 (match_operand:SI 3 "immediate_operand")]
3443 "TARGET_NEON"
3444 {
3445 unsigned int elt = INTVAL (operands[3]);
3446
3447 if (BYTES_BIG_ENDIAN)
3448 {
3449 unsigned int reg_nelts
3450 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3451 elt ^= reg_nelts - 1;
3452 }
3453
3454 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3455 GEN_INT (1 << elt), operands[2]));
3456 DONE;
3457 })
3458
3459 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3460
3461 (define_expand "neon_vset_lanedi"
3462 [(match_operand:DI 0 "s_register_operand")
3463 (match_operand:DI 1 "s_register_operand")
3464 (match_operand:DI 2 "s_register_operand")
3465 (match_operand:SI 3 "immediate_operand")]
3466 "TARGET_NEON"
3467 {
3468 emit_move_insn (operands[0], operands[1]);
3469 DONE;
3470 })
3471
3472 (define_expand "neon_vcreate<mode>"
3473 [(match_operand:VD_RE 0 "s_register_operand")
3474 (match_operand:DI 1 "general_operand")]
3475 "TARGET_NEON"
3476 {
3477 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3478 emit_move_insn (operands[0], src);
3479 DONE;
3480 })
3481
3482 (define_insn "neon_vdup_n<mode>"
3483 [(set (match_operand:VX 0 "s_register_operand" "=w")
3484 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3485 "TARGET_NEON"
3486 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3487 [(set_attr "type" "neon_from_gp<q>")]
3488 )
3489
3490 (define_insn "neon_vdup_nv4hf"
3491 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3492 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3493 "TARGET_NEON"
3494 "vdup.16\t%P0, %1"
3495 [(set_attr "type" "neon_from_gp")]
3496 )
3497
3498 (define_insn "neon_vdup_nv8hf"
3499 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3500 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3501 "TARGET_NEON"
3502 "vdup.16\t%q0, %1"
3503 [(set_attr "type" "neon_from_gp_q")]
3504 )
3505
3506 (define_insn "neon_vdup_nv4bf"
3507 [(set (match_operand:V4BF 0 "s_register_operand" "=w")
3508 (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))]
3509 "TARGET_NEON"
3510 "vdup.16\t%P0, %1"
3511 [(set_attr "type" "neon_from_gp")]
3512 )
3513
3514 (define_insn "neon_vdup_nv8bf"
3515 [(set (match_operand:V8BF 0 "s_register_operand" "=w")
3516 (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))]
3517 "TARGET_NEON"
3518 "vdup.16\t%q0, %1"
3519 [(set_attr "type" "neon_from_gp_q")]
3520 )
3521
3522 (define_insn "neon_vdup_n<mode>"
3523 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3524 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3525 "TARGET_NEON"
3526 "@
3527 vdup.<V_sz_elem>\t%<V_reg>0, %1
3528 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3529 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3530 )
3531
3532 (define_expand "neon_vdup_ndi"
3533 [(match_operand:DI 0 "s_register_operand")
3534 (match_operand:DI 1 "s_register_operand")]
3535 "TARGET_NEON"
3536 {
3537 emit_move_insn (operands[0], operands[1]);
3538 DONE;
3539 }
3540 )
3541
3542 (define_insn "neon_vdup_nv2di"
3543 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3544 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3545 "TARGET_NEON"
3546 "@
3547 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3548 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3549 [(set_attr "length" "8")
3550 (set_attr "type" "multiple")]
3551 )
3552
3553 (define_insn "neon_vdup_lane<mode>_internal"
3554 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3555 (vec_duplicate:VDQW
3556 (vec_select:<V_elem>
3557 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3558 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3559 "TARGET_NEON"
3560 {
3561 if (BYTES_BIG_ENDIAN)
3562 {
3563 int elt = INTVAL (operands[2]);
3564 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3565 operands[2] = GEN_INT (elt);
3566 }
3567 if (<Is_d_reg>)
3568 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3569 else
3570 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3571 }
3572 [(set_attr "type" "neon_dup<q>")]
3573 )
3574
3575 (define_insn "neon_vdup_lane<mode>_internal"
3576 [(set (match_operand:VHFBF 0 "s_register_operand" "=w")
3577 (vec_duplicate:VHFBF
3578 (vec_select:<V_elem>
3579 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3580 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3581 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3582 {
3583 if (BYTES_BIG_ENDIAN)
3584 {
3585 int elt = INTVAL (operands[2]);
3586 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3587 operands[2] = GEN_INT (elt);
3588 }
3589 if (<Is_d_reg>)
3590 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3591 else
3592 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3593 }
3594 [(set_attr "type" "neon_dup<q>")]
3595 )
3596
3597 (define_expand "neon_vdup_lane<mode>"
3598 [(match_operand:VDQW 0 "s_register_operand")
3599 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3600 (match_operand:SI 2 "immediate_operand")]
3601 "TARGET_NEON"
3602 {
3603 if (BYTES_BIG_ENDIAN)
3604 {
3605 unsigned int elt = INTVAL (operands[2]);
3606 unsigned int reg_nelts
3607 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3608 elt ^= reg_nelts - 1;
3609 operands[2] = GEN_INT (elt);
3610 }
3611 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3612 operands[2]));
3613 DONE;
3614 })
3615
3616 (define_expand "neon_vdup_lane<mode>"
3617 [(match_operand:VHFBF 0 "s_register_operand")
3618 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3619 (match_operand:SI 2 "immediate_operand")]
3620 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
3621 {
3622 if (BYTES_BIG_ENDIAN)
3623 {
3624 unsigned int elt = INTVAL (operands[2]);
3625 unsigned int reg_nelts
3626 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3627 elt ^= reg_nelts - 1;
3628 operands[2] = GEN_INT (elt);
3629 }
3630 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3631 operands[2]));
3632 DONE;
3633 })
3634
3635 ; Scalar index is ignored, since only zero is valid here.
3636 (define_expand "neon_vdup_lanedi"
3637 [(match_operand:DI 0 "s_register_operand")
3638 (match_operand:DI 1 "s_register_operand")
3639 (match_operand:SI 2 "immediate_operand")]
3640 "TARGET_NEON"
3641 {
3642 emit_move_insn (operands[0], operands[1]);
3643 DONE;
3644 })
3645
3646 ; Likewise for v2di, as the DImode second operand has only a single element.
3647 (define_expand "neon_vdup_lanev2di"
3648 [(match_operand:V2DI 0 "s_register_operand")
3649 (match_operand:DI 1 "s_register_operand")
3650 (match_operand:SI 2 "immediate_operand")]
3651 "TARGET_NEON"
3652 {
3653 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3654 DONE;
3655 })
3656
3657 ; Disabled before reload because we don't want combine doing something silly,
3658 ; but used by the post-reload expansion of neon_vcombine.
3659 (define_insn "*neon_vswp<mode>"
3660 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3661 (match_operand:VDQX 1 "s_register_operand" "+w"))
3662 (set (match_dup 1) (match_dup 0))]
3663 "TARGET_NEON && reload_completed"
3664 "vswp\t%<V_reg>0, %<V_reg>1"
3665 [(set_attr "type" "neon_permute<q>")]
3666 )
3667
3668 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3669 ;; dest vector.
3670 ;; FIXME: A different implementation of this builtin could make it much
3671 ;; more likely that we wouldn't actually need to output anything (we could make
3672 ;; it so that the reg allocator puts things in the right places magically
3673 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3674
3675 (define_insn_and_split "neon_vcombine<mode>"
3676 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3677 (vec_concat:<V_DOUBLE>
3678 (match_operand:VDX 1 "s_register_operand" "w")
3679 (match_operand:VDX 2 "s_register_operand" "w")))]
3680 "TARGET_NEON"
3681 "#"
3682 "&& reload_completed"
3683 [(const_int 0)]
3684 {
3685 neon_split_vcombine (operands);
3686 DONE;
3687 }
3688 [(set_attr "type" "multiple")]
3689 )
3690
3691 (define_expand "neon_vget_high<mode>"
3692 [(match_operand:<V_HALF> 0 "s_register_operand")
3693 (match_operand:VQXBF 1 "s_register_operand")]
3694 "TARGET_NEON"
3695 {
3696 emit_move_insn (operands[0],
3697 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3698 GET_MODE_SIZE (<V_HALF>mode)));
3699 DONE;
3700 })
3701
3702 (define_expand "neon_vget_low<mode>"
3703 [(match_operand:<V_HALF> 0 "s_register_operand")
3704 (match_operand:VQX 1 "s_register_operand")]
3705 "TARGET_NEON"
3706 {
3707 emit_move_insn (operands[0],
3708 simplify_gen_subreg (<V_HALF>mode, operands[1],
3709 <MODE>mode, 0));
3710 DONE;
3711 })
3712
3713 (define_insn "float<mode><V_cvtto>2"
3714 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3715 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3716 "TARGET_NEON && !flag_rounding_math"
3717 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3718 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3719 )
3720
3721 (define_insn "floatuns<mode><V_cvtto>2"
3722 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3723 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3724 "TARGET_NEON && !flag_rounding_math"
3725 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3726 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3727 )
3728
3729 (define_insn "fix_trunc<mode><V_cvtto>2"
3730 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3731 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3732 "TARGET_NEON"
3733 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3734 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3735 )
3736
3737 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3738 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3739 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3740 "TARGET_NEON"
3741 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3742 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3743 )
3744
3745 (define_insn "neon_vcvt<sup><mode>"
3746 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3747 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3748 VCVT_US))]
3749 "TARGET_NEON"
3750 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3751 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3752 )
3753
3754 (define_insn "neon_vcvt<sup><mode>"
3755 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3756 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3757 VCVT_US))]
3758 "TARGET_NEON"
3759 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3760 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3761 )
3762
3763 (define_insn "neon_vcvtv4sfv4hf"
3764 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3765 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3766 UNSPEC_VCVT))]
3767 "TARGET_NEON && TARGET_FP16"
3768 "vcvt.f32.f16\t%q0, %P1"
3769 [(set_attr "type" "neon_fp_cvt_widen_h")]
3770 )
3771
3772 (define_insn "neon_vcvtv4hfv4sf"
3773 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3774 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3775 UNSPEC_VCVT))]
3776 "TARGET_NEON && TARGET_FP16"
3777 "vcvt.f16.f32\t%P0, %q1"
3778 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3779 )
3780
3781 (define_insn "neon_vcvt<sup><mode>"
3782 [(set
3783 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3784 (unspec:<VH_CVTTO>
3785 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3786 VCVT_US))]
3787 "TARGET_NEON_FP16INST"
3788 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3789 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3790 )
3791
3792 (define_insn "neon_vcvt<sup><mode>"
3793 [(set
3794 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3795 (unspec:<VH_CVTTO>
3796 [(match_operand:VH 1 "s_register_operand" "w")]
3797 VCVT_US))]
3798 "TARGET_NEON_FP16INST"
3799 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3800 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3801 )
3802
3803 (define_insn "neon_vcvt<sup>_n<mode>"
3804 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3805 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3806 (match_operand:SI 2 "immediate_operand" "i")]
3807 VCVT_US_N))]
3808 "TARGET_NEON"
3809 {
3810 arm_const_bounds (operands[2], 1, 33);
3811 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3812 }
3813 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3814 )
3815
3816 (define_insn "neon_vcvt<sup>_n<mode>"
3817 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3818 (unspec:<VH_CVTTO>
3819 [(match_operand:VH 1 "s_register_operand" "w")
3820 (match_operand:SI 2 "immediate_operand" "i")]
3821 VCVT_US_N))]
3822 "TARGET_NEON_FP16INST"
3823 {
3824 arm_const_bounds (operands[2], 0, 17);
3825 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3826 }
3827 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3828 )
3829
3830 (define_insn "neon_vcvt<sup>_n<mode>"
3831 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3832 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3833 (match_operand:SI 2 "immediate_operand" "i")]
3834 VCVT_US_N))]
3835 "TARGET_NEON"
3836 {
3837 arm_const_bounds (operands[2], 1, 33);
3838 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3839 }
3840 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3841 )
3842
3843 (define_insn "neon_vcvt<sup>_n<mode>"
3844 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3845 (unspec:<VH_CVTTO>
3846 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3847 (match_operand:SI 2 "immediate_operand" "i")]
3848 VCVT_US_N))]
3849 "TARGET_NEON_FP16INST"
3850 {
3851 arm_const_bounds (operands[2], 0, 17);
3852 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3853 }
3854 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3855 )
3856
3857 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3858 [(set
3859 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3860 (unspec:<VH_CVTTO>
3861 [(match_operand:VH 1 "s_register_operand" "w")]
3862 VCVT_HF_US))]
3863 "TARGET_NEON_FP16INST"
3864 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3865 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3866 )
3867
3868 (define_insn "neon_vmovn<mode>"
3869 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3870 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3871 UNSPEC_VMOVN))]
3872 "TARGET_NEON"
3873 "vmovn.<V_if_elem>\t%P0, %q1"
3874 [(set_attr "type" "neon_shift_imm_narrow_q")]
3875 )
3876
3877 (define_insn "neon_vqmovn<sup><mode>"
3878 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3879 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3880 VQMOVN))]
3881 "TARGET_NEON"
3882 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3883 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3884 )
3885
3886 (define_insn "neon_vqmovun<mode>"
3887 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3888 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3889 UNSPEC_VQMOVUN))]
3890 "TARGET_NEON"
3891 "vqmovun.<V_s_elem>\t%P0, %q1"
3892 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3893 )
3894
3895 (define_insn "neon_vmovl<sup><mode>"
3896 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3897 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3898 VMOVL))]
3899 "TARGET_NEON"
3900 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3901 [(set_attr "type" "neon_shift_imm_long")]
3902 )
3903
3904 (define_insn "neon_vmul_lane<mode>"
3905 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3906 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3907 (match_operand:VMD 2 "s_register_operand"
3908 "<scalar_mul_constraint>")
3909 (match_operand:SI 3 "immediate_operand" "i")]
3910 UNSPEC_VMUL_LANE))]
3911 "TARGET_NEON"
3912 {
3913 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3914 }
3915 [(set (attr "type")
3916 (if_then_else (match_test "<Is_float_mode>")
3917 (const_string "neon_fp_mul_s_scalar<q>")
3918 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3919 )
3920
3921 (define_insn "neon_vmul_lane<mode>"
3922 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3923 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3924 (match_operand:<V_HALF> 2 "s_register_operand"
3925 "<scalar_mul_constraint>")
3926 (match_operand:SI 3 "immediate_operand" "i")]
3927 UNSPEC_VMUL_LANE))]
3928 "TARGET_NEON"
3929 {
3930 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3931 }
3932 [(set (attr "type")
3933 (if_then_else (match_test "<Is_float_mode>")
3934 (const_string "neon_fp_mul_s_scalar<q>")
3935 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3936 )
3937
3938 (define_insn "neon_vmul_lane<mode>"
3939 [(set (match_operand:VH 0 "s_register_operand" "=w")
3940 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3941 (match_operand:V4HF 2 "s_register_operand"
3942 "<scalar_mul_constraint>")
3943 (match_operand:SI 3 "immediate_operand" "i")]
3944 UNSPEC_VMUL_LANE))]
3945 "TARGET_NEON_FP16INST"
3946 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3947 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3948 )
3949
3950 (define_insn "neon_vmull<sup>_lane<mode>"
3951 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3952 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3953 (match_operand:VMDI 2 "s_register_operand"
3954 "<scalar_mul_constraint>")
3955 (match_operand:SI 3 "immediate_operand" "i")]
3956 VMULL_LANE))]
3957 "TARGET_NEON"
3958 {
3959 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3960 }
3961 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3962 )
3963
3964 (define_insn "neon_vqdmull_lane<mode>"
3965 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3966 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3967 (match_operand:VMDI 2 "s_register_operand"
3968 "<scalar_mul_constraint>")
3969 (match_operand:SI 3 "immediate_operand" "i")]
3970 UNSPEC_VQDMULL_LANE))]
3971 "TARGET_NEON"
3972 {
3973 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3974 }
3975 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3976 )
3977
3978 (define_insn "neon_vq<r>dmulh_lane<mode>"
3979 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3980 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3981 (match_operand:<V_HALF> 2 "s_register_operand"
3982 "<scalar_mul_constraint>")
3983 (match_operand:SI 3 "immediate_operand" "i")]
3984 VQDMULH_LANE))]
3985 "TARGET_NEON"
3986 {
3987 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3988 }
3989 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3990 )
3991
3992 (define_insn "neon_vq<r>dmulh_lane<mode>"
3993 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3994 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3995 (match_operand:VMDI 2 "s_register_operand"
3996 "<scalar_mul_constraint>")
3997 (match_operand:SI 3 "immediate_operand" "i")]
3998 VQDMULH_LANE))]
3999 "TARGET_NEON"
4000 {
4001 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4002 }
4003 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4004 )
4005
4006 ;; vqrdmlah_lane, vqrdmlsh_lane
4007 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4008 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4009 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4010 (match_operand:VMQI 2 "s_register_operand" "w")
4011 (match_operand:<V_HALF> 3 "s_register_operand"
4012 "<scalar_mul_constraint>")
4013 (match_operand:SI 4 "immediate_operand" "i")]
4014 VQRDMLH_AS))]
4015 "TARGET_NEON_RDMA"
4016 {
4017 return
4018 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4019 }
4020 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4021 )
4022
4023 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4024 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4025 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4026 (match_operand:VMDI 2 "s_register_operand" "w")
4027 (match_operand:VMDI 3 "s_register_operand"
4028 "<scalar_mul_constraint>")
4029 (match_operand:SI 4 "immediate_operand" "i")]
4030 VQRDMLH_AS))]
4031 "TARGET_NEON_RDMA"
4032 {
4033 return
4034 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4035 }
4036 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4037 )
4038
4039 (define_insn "neon_vmla_lane<mode>"
4040 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4041 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4042 (match_operand:VMD 2 "s_register_operand" "w")
4043 (match_operand:VMD 3 "s_register_operand"
4044 "<scalar_mul_constraint>")
4045 (match_operand:SI 4 "immediate_operand" "i")]
4046 UNSPEC_VMLA_LANE))]
4047 "TARGET_NEON"
4048 {
4049 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4050 }
4051 [(set (attr "type")
4052 (if_then_else (match_test "<Is_float_mode>")
4053 (const_string "neon_fp_mla_s_scalar<q>")
4054 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4055 )
4056
4057 (define_insn "neon_vmla_lane<mode>"
4058 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4059 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4060 (match_operand:VMQ 2 "s_register_operand" "w")
4061 (match_operand:<V_HALF> 3 "s_register_operand"
4062 "<scalar_mul_constraint>")
4063 (match_operand:SI 4 "immediate_operand" "i")]
4064 UNSPEC_VMLA_LANE))]
4065 "TARGET_NEON"
4066 {
4067 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4068 }
4069 [(set (attr "type")
4070 (if_then_else (match_test "<Is_float_mode>")
4071 (const_string "neon_fp_mla_s_scalar<q>")
4072 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4073 )
4074
4075 (define_insn "neon_vmlal<sup>_lane<mode>"
4076 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4077 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4078 (match_operand:VMDI 2 "s_register_operand" "w")
4079 (match_operand:VMDI 3 "s_register_operand"
4080 "<scalar_mul_constraint>")
4081 (match_operand:SI 4 "immediate_operand" "i")]
4082 VMLAL_LANE))]
4083 "TARGET_NEON"
4084 {
4085 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4086 }
4087 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4088 )
4089
4090 (define_insn "neon_vqdmlal_lane<mode>"
4091 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4092 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4093 (match_operand:VMDI 2 "s_register_operand" "w")
4094 (match_operand:VMDI 3 "s_register_operand"
4095 "<scalar_mul_constraint>")
4096 (match_operand:SI 4 "immediate_operand" "i")]
4097 UNSPEC_VQDMLAL_LANE))]
4098 "TARGET_NEON"
4099 {
4100 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4101 }
4102 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4103 )
4104
4105 (define_insn "neon_vmls_lane<mode>"
4106 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4107 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4108 (match_operand:VMD 2 "s_register_operand" "w")
4109 (match_operand:VMD 3 "s_register_operand"
4110 "<scalar_mul_constraint>")
4111 (match_operand:SI 4 "immediate_operand" "i")]
4112 UNSPEC_VMLS_LANE))]
4113 "TARGET_NEON"
4114 {
4115 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4116 }
4117 [(set (attr "type")
4118 (if_then_else (match_test "<Is_float_mode>")
4119 (const_string "neon_fp_mla_s_scalar<q>")
4120 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4121 )
4122
4123 (define_insn "neon_vmls_lane<mode>"
4124 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4125 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4126 (match_operand:VMQ 2 "s_register_operand" "w")
4127 (match_operand:<V_HALF> 3 "s_register_operand"
4128 "<scalar_mul_constraint>")
4129 (match_operand:SI 4 "immediate_operand" "i")]
4130 UNSPEC_VMLS_LANE))]
4131 "TARGET_NEON"
4132 {
4133 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4134 }
4135 [(set (attr "type")
4136 (if_then_else (match_test "<Is_float_mode>")
4137 (const_string "neon_fp_mla_s_scalar<q>")
4138 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4139 )
4140
4141 (define_insn "neon_vmlsl<sup>_lane<mode>"
4142 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4143 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4144 (match_operand:VMDI 2 "s_register_operand" "w")
4145 (match_operand:VMDI 3 "s_register_operand"
4146 "<scalar_mul_constraint>")
4147 (match_operand:SI 4 "immediate_operand" "i")]
4148 VMLSL_LANE))]
4149 "TARGET_NEON"
4150 {
4151 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4152 }
4153 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4154 )
4155
4156 (define_insn "neon_vqdmlsl_lane<mode>"
4157 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4158 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4159 (match_operand:VMDI 2 "s_register_operand" "w")
4160 (match_operand:VMDI 3 "s_register_operand"
4161 "<scalar_mul_constraint>")
4162 (match_operand:SI 4 "immediate_operand" "i")]
4163 UNSPEC_VQDMLSL_LANE))]
4164 "TARGET_NEON"
4165 {
4166 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4167 }
4168 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4169 )
4170
4171 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4172 ; core register into a temp register, then use a scalar taken from that. This
4173 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4174 ; or extracted from another vector. The latter case it's currently better to
4175 ; use the "_lane" variant, and the former case can probably be implemented
4176 ; using vld1_lane, but that hasn't been done yet.
4177
4178 (define_expand "neon_vmul_n<mode>"
4179 [(match_operand:VMD 0 "s_register_operand")
4180 (match_operand:VMD 1 "s_register_operand")
4181 (match_operand:<V_elem> 2 "s_register_operand")]
4182 "TARGET_NEON"
4183 {
4184 rtx tmp = gen_reg_rtx (<MODE>mode);
4185 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4186 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4187 const0_rtx));
4188 DONE;
4189 })
4190
4191 (define_expand "neon_vmul_n<mode>"
4192 [(match_operand:VMQ 0 "s_register_operand")
4193 (match_operand:VMQ 1 "s_register_operand")
4194 (match_operand:<V_elem> 2 "s_register_operand")]
4195 "TARGET_NEON"
4196 {
4197 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4198 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4199 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4200 const0_rtx));
4201 DONE;
4202 })
4203
4204 (define_expand "neon_vmul_n<mode>"
4205 [(match_operand:VH 0 "s_register_operand")
4206 (match_operand:VH 1 "s_register_operand")
4207 (match_operand:<V_elem> 2 "s_register_operand")]
4208 "TARGET_NEON_FP16INST"
4209 {
4210 rtx tmp = gen_reg_rtx (V4HFmode);
4211 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4212 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4213 const0_rtx));
4214 DONE;
4215 })
4216
4217 (define_expand "neon_vmulls_n<mode>"
4218 [(match_operand:<V_widen> 0 "s_register_operand")
4219 (match_operand:VMDI 1 "s_register_operand")
4220 (match_operand:<V_elem> 2 "s_register_operand")]
4221 "TARGET_NEON"
4222 {
4223 rtx tmp = gen_reg_rtx (<MODE>mode);
4224 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4225 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4226 const0_rtx));
4227 DONE;
4228 })
4229
4230 (define_expand "neon_vmullu_n<mode>"
4231 [(match_operand:<V_widen> 0 "s_register_operand")
4232 (match_operand:VMDI 1 "s_register_operand")
4233 (match_operand:<V_elem> 2 "s_register_operand")]
4234 "TARGET_NEON"
4235 {
4236 rtx tmp = gen_reg_rtx (<MODE>mode);
4237 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4238 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4239 const0_rtx));
4240 DONE;
4241 })
4242
4243 (define_expand "neon_vqdmull_n<mode>"
4244 [(match_operand:<V_widen> 0 "s_register_operand")
4245 (match_operand:VMDI 1 "s_register_operand")
4246 (match_operand:<V_elem> 2 "s_register_operand")]
4247 "TARGET_NEON"
4248 {
4249 rtx tmp = gen_reg_rtx (<MODE>mode);
4250 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4251 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4252 const0_rtx));
4253 DONE;
4254 })
4255
4256 (define_expand "neon_vqdmulh_n<mode>"
4257 [(match_operand:VMDI 0 "s_register_operand")
4258 (match_operand:VMDI 1 "s_register_operand")
4259 (match_operand:<V_elem> 2 "s_register_operand")]
4260 "TARGET_NEON"
4261 {
4262 rtx tmp = gen_reg_rtx (<MODE>mode);
4263 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4264 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4265 const0_rtx));
4266 DONE;
4267 })
4268
4269 (define_expand "neon_vqrdmulh_n<mode>"
4270 [(match_operand:VMDI 0 "s_register_operand")
4271 (match_operand:VMDI 1 "s_register_operand")
4272 (match_operand:<V_elem> 2 "s_register_operand")]
4273 "TARGET_NEON"
4274 {
4275 rtx tmp = gen_reg_rtx (<MODE>mode);
4276 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4277 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4278 const0_rtx));
4279 DONE;
4280 })
4281
4282 (define_expand "neon_vqdmulh_n<mode>"
4283 [(match_operand:VMQI 0 "s_register_operand")
4284 (match_operand:VMQI 1 "s_register_operand")
4285 (match_operand:<V_elem> 2 "s_register_operand")]
4286 "TARGET_NEON"
4287 {
4288 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4289 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4290 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4291 const0_rtx));
4292 DONE;
4293 })
4294
4295 (define_expand "neon_vqrdmulh_n<mode>"
4296 [(match_operand:VMQI 0 "s_register_operand")
4297 (match_operand:VMQI 1 "s_register_operand")
4298 (match_operand:<V_elem> 2 "s_register_operand")]
4299 "TARGET_NEON"
4300 {
4301 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4302 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4303 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4304 const0_rtx));
4305 DONE;
4306 })
4307
4308 (define_expand "neon_vmla_n<mode>"
4309 [(match_operand:VMD 0 "s_register_operand")
4310 (match_operand:VMD 1 "s_register_operand")
4311 (match_operand:VMD 2 "s_register_operand")
4312 (match_operand:<V_elem> 3 "s_register_operand")]
4313 "TARGET_NEON"
4314 {
4315 rtx tmp = gen_reg_rtx (<MODE>mode);
4316 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4317 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4318 tmp, const0_rtx));
4319 DONE;
4320 })
4321
4322 (define_expand "neon_vmla_n<mode>"
4323 [(match_operand:VMQ 0 "s_register_operand")
4324 (match_operand:VMQ 1 "s_register_operand")
4325 (match_operand:VMQ 2 "s_register_operand")
4326 (match_operand:<V_elem> 3 "s_register_operand")]
4327 "TARGET_NEON"
4328 {
4329 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4330 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4331 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4332 tmp, const0_rtx));
4333 DONE;
4334 })
4335
4336 (define_expand "neon_vmlals_n<mode>"
4337 [(match_operand:<V_widen> 0 "s_register_operand")
4338 (match_operand:<V_widen> 1 "s_register_operand")
4339 (match_operand:VMDI 2 "s_register_operand")
4340 (match_operand:<V_elem> 3 "s_register_operand")]
4341 "TARGET_NEON"
4342 {
4343 rtx tmp = gen_reg_rtx (<MODE>mode);
4344 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4345 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4346 tmp, const0_rtx));
4347 DONE;
4348 })
4349
4350 (define_expand "neon_vmlalu_n<mode>"
4351 [(match_operand:<V_widen> 0 "s_register_operand")
4352 (match_operand:<V_widen> 1 "s_register_operand")
4353 (match_operand:VMDI 2 "s_register_operand")
4354 (match_operand:<V_elem> 3 "s_register_operand")]
4355 "TARGET_NEON"
4356 {
4357 rtx tmp = gen_reg_rtx (<MODE>mode);
4358 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4359 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4360 tmp, const0_rtx));
4361 DONE;
4362 })
4363
4364 (define_expand "neon_vqdmlal_n<mode>"
4365 [(match_operand:<V_widen> 0 "s_register_operand")
4366 (match_operand:<V_widen> 1 "s_register_operand")
4367 (match_operand:VMDI 2 "s_register_operand")
4368 (match_operand:<V_elem> 3 "s_register_operand")]
4369 "TARGET_NEON"
4370 {
4371 rtx tmp = gen_reg_rtx (<MODE>mode);
4372 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4373 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4374 tmp, const0_rtx));
4375 DONE;
4376 })
4377
4378 (define_expand "neon_vmls_n<mode>"
4379 [(match_operand:VMD 0 "s_register_operand")
4380 (match_operand:VMD 1 "s_register_operand")
4381 (match_operand:VMD 2 "s_register_operand")
4382 (match_operand:<V_elem> 3 "s_register_operand")]
4383 "TARGET_NEON"
4384 {
4385 rtx tmp = gen_reg_rtx (<MODE>mode);
4386 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4387 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4388 tmp, const0_rtx));
4389 DONE;
4390 })
4391
4392 (define_expand "neon_vmls_n<mode>"
4393 [(match_operand:VMQ 0 "s_register_operand")
4394 (match_operand:VMQ 1 "s_register_operand")
4395 (match_operand:VMQ 2 "s_register_operand")
4396 (match_operand:<V_elem> 3 "s_register_operand")]
4397 "TARGET_NEON"
4398 {
4399 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4400 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4401 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4402 tmp, const0_rtx));
4403 DONE;
4404 })
4405
4406 (define_expand "neon_vmlsls_n<mode>"
4407 [(match_operand:<V_widen> 0 "s_register_operand")
4408 (match_operand:<V_widen> 1 "s_register_operand")
4409 (match_operand:VMDI 2 "s_register_operand")
4410 (match_operand:<V_elem> 3 "s_register_operand")]
4411 "TARGET_NEON"
4412 {
4413 rtx tmp = gen_reg_rtx (<MODE>mode);
4414 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4415 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4416 tmp, const0_rtx));
4417 DONE;
4418 })
4419
4420 (define_expand "neon_vmlslu_n<mode>"
4421 [(match_operand:<V_widen> 0 "s_register_operand")
4422 (match_operand:<V_widen> 1 "s_register_operand")
4423 (match_operand:VMDI 2 "s_register_operand")
4424 (match_operand:<V_elem> 3 "s_register_operand")]
4425 "TARGET_NEON"
4426 {
4427 rtx tmp = gen_reg_rtx (<MODE>mode);
4428 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4429 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4430 tmp, const0_rtx));
4431 DONE;
4432 })
4433
4434 (define_expand "neon_vqdmlsl_n<mode>"
4435 [(match_operand:<V_widen> 0 "s_register_operand")
4436 (match_operand:<V_widen> 1 "s_register_operand")
4437 (match_operand:VMDI 2 "s_register_operand")
4438 (match_operand:<V_elem> 3 "s_register_operand")]
4439 "TARGET_NEON"
4440 {
4441 rtx tmp = gen_reg_rtx (<MODE>mode);
4442 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4443 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4444 tmp, const0_rtx));
4445 DONE;
4446 })
4447
4448 (define_insn "@neon_vext<mode>"
4449 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4450 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4451 (match_operand:VDQX 2 "s_register_operand" "w")
4452 (match_operand:SI 3 "immediate_operand" "i")]
4453 UNSPEC_VEXT))]
4454 "TARGET_NEON"
4455 {
4456 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4457 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4458 }
4459 [(set_attr "type" "neon_ext<q>")]
4460 )
4461
4462 (define_insn "@neon_vrev64<mode>"
4463 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4464 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4465 UNSPEC_VREV64))]
4466 "TARGET_NEON"
4467 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4468 [(set_attr "type" "neon_rev<q>")]
4469 )
4470
4471 (define_insn "@neon_vrev32<mode>"
4472 [(set (match_operand:VX 0 "s_register_operand" "=w")
4473 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4474 UNSPEC_VREV32))]
4475 "TARGET_NEON"
4476 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4477 [(set_attr "type" "neon_rev<q>")]
4478 )
4479
4480 (define_insn "@neon_vrev16<mode>"
4481 [(set (match_operand:VE 0 "s_register_operand" "=w")
4482 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4483 UNSPEC_VREV16))]
4484 "TARGET_NEON"
4485 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4486 [(set_attr "type" "neon_rev<q>")]
4487 )
4488
4489 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4490 ; allocation. For an intrinsic of form:
4491 ; rD = vbsl_* (rS, rN, rM)
4492 ; We can use any of:
4493 ; vbsl rS, rN, rM (if D = S)
4494 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4495 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4496
4497 (define_insn "neon_vbsl<mode>_internal"
4498 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4499 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4500 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4501 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4502 UNSPEC_VBSL))]
4503 "TARGET_NEON"
4504 "@
4505 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4506 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4507 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4508 [(set_attr "type" "neon_bsl<q>")]
4509 )
4510
4511 (define_expand "@neon_vbsl<mode>"
4512 [(set (match_operand:VDQX 0 "s_register_operand")
4513 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4514 (match_operand:VDQX 2 "s_register_operand")
4515 (match_operand:VDQX 3 "s_register_operand")]
4516 UNSPEC_VBSL))]
4517 "TARGET_NEON"
4518 {
4519 /* We can't alias operands together if they have different modes. */
4520 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4521 })
4522
4523 ;; vshl, vrshl
4524 (define_insn "neon_v<shift_op><sup><mode>"
4525 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4526 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4527 (match_operand:VDQIX 2 "s_register_operand" "w")]
4528 VSHL))]
4529 "TARGET_NEON"
4530 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4531 [(set_attr "type" "neon_shift_imm<q>")]
4532 )
4533
4534 ;; vqshl, vqrshl
4535 (define_insn "neon_v<shift_op><sup><mode>"
4536 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4537 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4538 (match_operand:VDQIX 2 "s_register_operand" "w")]
4539 VQSHL))]
4540 "TARGET_NEON"
4541 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4542 [(set_attr "type" "neon_sat_shift_imm<q>")]
4543 )
4544
4545 ;; vshr_n, vrshr_n
4546 (define_insn "neon_v<shift_op><sup>_n<mode>"
4547 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4548 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4549 (match_operand:SI 2 "immediate_operand" "i")]
4550 VSHR_N))]
4551 "TARGET_NEON"
4552 {
4553 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4554 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4555 }
4556 [(set_attr "type" "neon_shift_imm<q>")]
4557 )
4558
4559 ;; vshrn_n, vrshrn_n
4560 (define_insn "neon_v<shift_op>_n<mode>"
4561 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4562 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4563 (match_operand:SI 2 "immediate_operand" "i")]
4564 VSHRN_N))]
4565 "TARGET_NEON"
4566 {
4567 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4568 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4569 }
4570 [(set_attr "type" "neon_shift_imm_narrow_q")]
4571 )
4572
4573 ;; vqshrn_n, vqrshrn_n
4574 (define_insn "neon_v<shift_op><sup>_n<mode>"
4575 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4576 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4577 (match_operand:SI 2 "immediate_operand" "i")]
4578 VQSHRN_N))]
4579 "TARGET_NEON"
4580 {
4581 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4582 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4583 }
4584 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4585 )
4586
4587 ;; vqshrun_n, vqrshrun_n
4588 (define_insn "neon_v<shift_op>_n<mode>"
4589 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4590 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4591 (match_operand:SI 2 "immediate_operand" "i")]
4592 VQSHRUN_N))]
4593 "TARGET_NEON"
4594 {
4595 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4596 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4597 }
4598 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4599 )
4600
4601 (define_insn "neon_vshl_n<mode>"
4602 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4603 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4604 (match_operand:SI 2 "immediate_operand" "i")]
4605 UNSPEC_VSHL_N))]
4606 "TARGET_NEON"
4607 {
4608 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4609 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4610 }
4611 [(set_attr "type" "neon_shift_imm<q>")]
4612 )
4613
4614 (define_insn "neon_vqshl_<sup>_n<mode>"
4615 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4616 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4617 (match_operand:SI 2 "immediate_operand" "i")]
4618 VQSHL_N))]
4619 "TARGET_NEON"
4620 {
4621 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4622 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4623 }
4624 [(set_attr "type" "neon_sat_shift_imm<q>")]
4625 )
4626
4627 (define_insn "neon_vqshlu_n<mode>"
4628 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4629 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4630 (match_operand:SI 2 "immediate_operand" "i")]
4631 UNSPEC_VQSHLU_N))]
4632 "TARGET_NEON"
4633 {
4634 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4635 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4636 }
4637 [(set_attr "type" "neon_sat_shift_imm<q>")]
4638 )
4639
4640 (define_insn "neon_vshll<sup>_n<mode>"
4641 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4642 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4643 (match_operand:SI 2 "immediate_operand" "i")]
4644 VSHLL_N))]
4645 "TARGET_NEON"
4646 {
4647 /* The boundaries are: 0 < imm <= size. */
4648 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4649 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4650 }
4651 [(set_attr "type" "neon_shift_imm_long")]
4652 )
4653
4654 ;; vsra_n, vrsra_n
4655 (define_insn "neon_v<shift_op><sup>_n<mode>"
4656 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4657 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4658 (match_operand:VDQIX 2 "s_register_operand" "w")
4659 (match_operand:SI 3 "immediate_operand" "i")]
4660 VSRA_N))]
4661 "TARGET_NEON"
4662 {
4663 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4664 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4665 }
4666 [(set_attr "type" "neon_shift_acc<q>")]
4667 )
4668
4669 (define_insn "neon_vsri_n<mode>"
4670 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4671 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4672 (match_operand:VDQIX 2 "s_register_operand" "w")
4673 (match_operand:SI 3 "immediate_operand" "i")]
4674 UNSPEC_VSRI))]
4675 "TARGET_NEON"
4676 {
4677 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4678 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4679 }
4680 [(set_attr "type" "neon_shift_reg<q>")]
4681 )
4682
4683 (define_insn "neon_vsli_n<mode>"
4684 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4685 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4686 (match_operand:VDQIX 2 "s_register_operand" "w")
4687 (match_operand:SI 3 "immediate_operand" "i")]
4688 UNSPEC_VSLI))]
4689 "TARGET_NEON"
4690 {
4691 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4692 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4693 }
4694 [(set_attr "type" "neon_shift_reg<q>")]
4695 )
4696
4697 (define_insn "neon_vtbl1v8qi"
4698 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4699 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4700 (match_operand:V8QI 2 "s_register_operand" "w")]
4701 UNSPEC_VTBL))]
4702 "TARGET_NEON"
4703 "vtbl.8\t%P0, {%P1}, %P2"
4704 [(set_attr "type" "neon_tbl1")]
4705 )
4706
4707 (define_insn "neon_vtbl2v8qi"
4708 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4709 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4710 (match_operand:V8QI 2 "s_register_operand" "w")]
4711 UNSPEC_VTBL))]
4712 "TARGET_NEON"
4713 {
4714 rtx ops[4];
4715 int tabbase = REGNO (operands[1]);
4716
4717 ops[0] = operands[0];
4718 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4719 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4720 ops[3] = operands[2];
4721 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4722
4723 return "";
4724 }
4725 [(set_attr "type" "neon_tbl2")]
4726 )
4727
4728 (define_insn "neon_vtbl3v8qi"
4729 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4730 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4731 (match_operand:V8QI 2 "s_register_operand" "w")]
4732 UNSPEC_VTBL))]
4733 "TARGET_NEON"
4734 {
4735 rtx ops[5];
4736 int tabbase = REGNO (operands[1]);
4737
4738 ops[0] = operands[0];
4739 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4740 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4741 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4742 ops[4] = operands[2];
4743 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4744
4745 return "";
4746 }
4747 [(set_attr "type" "neon_tbl3")]
4748 )
4749
4750 (define_insn "neon_vtbl4v8qi"
4751 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4752 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4753 (match_operand:V8QI 2 "s_register_operand" "w")]
4754 UNSPEC_VTBL))]
4755 "TARGET_NEON"
4756 {
4757 rtx ops[6];
4758 int tabbase = REGNO (operands[1]);
4759
4760 ops[0] = operands[0];
4761 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4762 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4763 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4764 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4765 ops[5] = operands[2];
4766 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4767
4768 return "";
4769 }
4770 [(set_attr "type" "neon_tbl4")]
4771 )
4772
4773 ;; These three are used by the vec_perm infrastructure for V16QImode.
4774 (define_insn_and_split "neon_vtbl1v16qi"
4775 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4776 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4777 (match_operand:V16QI 2 "s_register_operand" "w")]
4778 UNSPEC_VTBL))]
4779 "TARGET_NEON"
4780 "#"
4781 "&& reload_completed"
4782 [(const_int 0)]
4783 {
4784 rtx op0, op1, op2, part0, part2;
4785 unsigned ofs;
4786
4787 op0 = operands[0];
4788 op1 = gen_lowpart (TImode, operands[1]);
4789 op2 = operands[2];
4790
4791 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4792 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4793 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4794 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4795
4796 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4797 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4798 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4799 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4800 DONE;
4801 }
4802 [(set_attr "type" "multiple")]
4803 )
4804
4805 (define_insn_and_split "neon_vtbl2v16qi"
4806 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4807 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4808 (match_operand:V16QI 2 "s_register_operand" "w")]
4809 UNSPEC_VTBL))]
4810 "TARGET_NEON"
4811 "#"
4812 "&& reload_completed"
4813 [(const_int 0)]
4814 {
4815 rtx op0, op1, op2, part0, part2;
4816 unsigned ofs;
4817
4818 op0 = operands[0];
4819 op1 = operands[1];
4820 op2 = operands[2];
4821
4822 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4823 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4824 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4825 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4826
4827 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4828 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4829 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4830 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4831 DONE;
4832 }
4833 [(set_attr "type" "multiple")]
4834 )
4835
4836 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4837 ;; handle quad-word input modes, producing octa-word output modes. But
4838 ;; that requires us to add support for octa-word vector modes in moves.
4839 ;; That seems overkill for this one use in vec_perm.
4840 (define_insn_and_split "neon_vcombinev16qi"
4841 [(set (match_operand:OI 0 "s_register_operand" "=w")
4842 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4843 (match_operand:V16QI 2 "s_register_operand" "w")]
4844 UNSPEC_VCONCAT))]
4845 "TARGET_NEON"
4846 "#"
4847 "&& reload_completed"
4848 [(const_int 0)]
4849 {
4850 neon_split_vcombine (operands);
4851 DONE;
4852 }
4853 [(set_attr "type" "multiple")]
4854 )
4855
4856 (define_insn "neon_vtbx1v8qi"
4857 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4858 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4859 (match_operand:V8QI 2 "s_register_operand" "w")
4860 (match_operand:V8QI 3 "s_register_operand" "w")]
4861 UNSPEC_VTBX))]
4862 "TARGET_NEON"
4863 "vtbx.8\t%P0, {%P2}, %P3"
4864 [(set_attr "type" "neon_tbl1")]
4865 )
4866
4867 (define_insn "neon_vtbx2v8qi"
4868 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4869 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4870 (match_operand:TI 2 "s_register_operand" "w")
4871 (match_operand:V8QI 3 "s_register_operand" "w")]
4872 UNSPEC_VTBX))]
4873 "TARGET_NEON"
4874 {
4875 rtx ops[4];
4876 int tabbase = REGNO (operands[2]);
4877
4878 ops[0] = operands[0];
4879 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4880 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4881 ops[3] = operands[3];
4882 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4883
4884 return "";
4885 }
4886 [(set_attr "type" "neon_tbl2")]
4887 )
4888
4889 (define_insn "neon_vtbx3v8qi"
4890 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4891 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4892 (match_operand:EI 2 "s_register_operand" "w")
4893 (match_operand:V8QI 3 "s_register_operand" "w")]
4894 UNSPEC_VTBX))]
4895 "TARGET_NEON"
4896 {
4897 rtx ops[5];
4898 int tabbase = REGNO (operands[2]);
4899
4900 ops[0] = operands[0];
4901 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4902 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4903 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4904 ops[4] = operands[3];
4905 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4906
4907 return "";
4908 }
4909 [(set_attr "type" "neon_tbl3")]
4910 )
4911
4912 (define_insn "neon_vtbx4v8qi"
4913 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4914 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4915 (match_operand:OI 2 "s_register_operand" "w")
4916 (match_operand:V8QI 3 "s_register_operand" "w")]
4917 UNSPEC_VTBX))]
4918 "TARGET_NEON"
4919 {
4920 rtx ops[6];
4921 int tabbase = REGNO (operands[2]);
4922
4923 ops[0] = operands[0];
4924 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4925 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4926 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4927 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4928 ops[5] = operands[3];
4929 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4930
4931 return "";
4932 }
4933 [(set_attr "type" "neon_tbl4")]
4934 )
4935
4936 (define_expand "@neon_vtrn<mode>_internal"
4937 [(parallel
4938 [(set (match_operand:VDQWH 0 "s_register_operand")
4939 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4940 (match_operand:VDQWH 2 "s_register_operand")]
4941 UNSPEC_VTRN1))
4942 (set (match_operand:VDQWH 3 "s_register_operand")
4943 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4944 "TARGET_NEON"
4945 ""
4946 )
4947
4948 ;; Note: Different operand numbering to handle tied registers correctly.
4949 (define_insn "*neon_vtrn<mode>_insn"
4950 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4951 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4952 (match_operand:VDQWH 3 "s_register_operand" "2")]
4953 UNSPEC_VTRN1))
4954 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4955 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4956 UNSPEC_VTRN2))]
4957 "TARGET_NEON"
4958 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4959 [(set_attr "type" "neon_permute<q>")]
4960 )
4961
4962 (define_expand "@neon_vzip<mode>_internal"
4963 [(parallel
4964 [(set (match_operand:VDQWH 0 "s_register_operand")
4965 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4966 (match_operand:VDQWH 2 "s_register_operand")]
4967 UNSPEC_VZIP1))
4968 (set (match_operand:VDQWH 3 "s_register_operand")
4969 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4970 "TARGET_NEON"
4971 ""
4972 )
4973
4974 ;; Note: Different operand numbering to handle tied registers correctly.
4975 (define_insn "*neon_vzip<mode>_insn"
4976 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4977 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4978 (match_operand:VDQWH 3 "s_register_operand" "2")]
4979 UNSPEC_VZIP1))
4980 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4981 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4982 UNSPEC_VZIP2))]
4983 "TARGET_NEON"
4984 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4985 [(set_attr "type" "neon_zip<q>")]
4986 )
4987
4988 (define_expand "@neon_vuzp<mode>_internal"
4989 [(parallel
4990 [(set (match_operand:VDQWH 0 "s_register_operand")
4991 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4992 (match_operand:VDQWH 2 "s_register_operand")]
4993 UNSPEC_VUZP1))
4994 (set (match_operand:VDQWH 3 "s_register_operand")
4995 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4996 "TARGET_NEON"
4997 ""
4998 )
4999
5000 ;; Note: Different operand numbering to handle tied registers correctly.
5001 (define_insn "*neon_vuzp<mode>_insn"
5002 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5003 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5004 (match_operand:VDQWH 3 "s_register_operand" "2")]
5005 UNSPEC_VUZP1))
5006 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5007 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5008 UNSPEC_VUZP2))]
5009 "TARGET_NEON"
5010 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5011 [(set_attr "type" "neon_zip<q>")]
5012 )
5013
5014 (define_expand "vec_load_lanes<mode><mode>"
5015 [(set (match_operand:VDQX 0 "s_register_operand")
5016 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5017 UNSPEC_VLD1))]
5018 "TARGET_NEON")
5019
5020 (define_insn "neon_vld1<mode>"
5021 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5022 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5023 UNSPEC_VLD1))]
5024 "TARGET_NEON"
5025 "vld1.<V_sz_elem>\t%h0, %A1"
5026 [(set_attr "type" "neon_load1_1reg<q>")]
5027 )
5028
5029 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5030 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5031 ;; lane order here.
5032 (define_insn "neon_vld1_lane<mode>"
5033 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5034 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5035 (match_operand:VDX 2 "s_register_operand" "0")
5036 (match_operand:SI 3 "immediate_operand" "i")]
5037 UNSPEC_VLD1_LANE))]
5038 "TARGET_NEON"
5039 {
5040 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5041 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5042 operands[3] = GEN_INT (lane);
5043 if (max == 1)
5044 return "vld1.<V_sz_elem>\t%P0, %A1";
5045 else
5046 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5047 }
5048 [(set_attr "type" "neon_load1_one_lane<q>")]
5049 )
5050
5051 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5052 ;; here on big endian targets.
5053 (define_insn "neon_vld1_lane<mode>"
5054 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5055 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5056 (match_operand:VQX 2 "s_register_operand" "0")
5057 (match_operand:SI 3 "immediate_operand" "i")]
5058 UNSPEC_VLD1_LANE))]
5059 "TARGET_NEON"
5060 {
5061 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5062 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5063 operands[3] = GEN_INT (lane);
5064 int regno = REGNO (operands[0]);
5065 if (lane >= max / 2)
5066 {
5067 lane -= max / 2;
5068 regno += 2;
5069 operands[3] = GEN_INT (lane);
5070 }
5071 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5072 if (max == 2)
5073 return "vld1.<V_sz_elem>\t%P0, %A1";
5074 else
5075 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5076 }
5077 [(set_attr "type" "neon_load1_one_lane<q>")]
5078 )
5079
5080 (define_insn "neon_vld1_dup<mode>"
5081 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5082 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5083 "TARGET_NEON"
5084 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5085 [(set_attr "type" "neon_load1_all_lanes<q>")]
5086 )
5087
5088 ;; Special case for DImode. Treat it exactly like a simple load.
5089 (define_expand "neon_vld1_dupdi"
5090 [(set (match_operand:DI 0 "s_register_operand")
5091 (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
5092 UNSPEC_VLD1))]
5093 "TARGET_NEON"
5094 ""
5095 )
5096
5097 (define_insn "neon_vld1_dup<mode>"
5098 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5099 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5100 "TARGET_NEON"
5101 {
5102 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5103 }
5104 [(set_attr "type" "neon_load1_all_lanes<q>")]
5105 )
5106
5107 (define_insn_and_split "neon_vld1_dupv2di"
5108 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5109 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5110 "TARGET_NEON"
5111 "#"
5112 "&& reload_completed"
5113 [(const_int 0)]
5114 {
5115 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5116 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5117 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5118 DONE;
5119 }
5120 [(set_attr "length" "8")
5121 (set_attr "type" "neon_load1_all_lanes_q")]
5122 )
5123
5124 (define_expand "vec_store_lanes<mode><mode>"
5125 [(set (match_operand:VDQX 0 "neon_struct_operand")
5126 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5127 UNSPEC_VST1))]
5128 "TARGET_NEON")
5129
5130 (define_insn "neon_vst1<mode>"
5131 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5132 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5133 UNSPEC_VST1))]
5134 "TARGET_NEON"
5135 "vst1.<V_sz_elem>\t%h1, %A0"
5136 [(set_attr "type" "neon_store1_1reg<q>")])
5137
5138 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5139 ;; here on big endian targets.
5140 (define_insn "neon_vst1_lane<mode>"
5141 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5142 (unspec:<V_elem>
5143 [(match_operand:VDX 1 "s_register_operand" "w")
5144 (match_operand:SI 2 "immediate_operand" "i")]
5145 UNSPEC_VST1_LANE))]
5146 "TARGET_NEON"
5147 {
5148 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5149 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5150 operands[2] = GEN_INT (lane);
5151 if (max == 1)
5152 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5153 else
5154 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5155 }
5156 [(set_attr "type" "neon_store1_one_lane<q>")]
5157 )
5158
5159 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5160 ;; here on big endian targets.
5161 (define_insn "neon_vst1_lane<mode>"
5162 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5163 (unspec:<V_elem>
5164 [(match_operand:VQX 1 "s_register_operand" "w")
5165 (match_operand:SI 2 "immediate_operand" "i")]
5166 UNSPEC_VST1_LANE))]
5167 "TARGET_NEON"
5168 {
5169 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5170 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5171 int regno = REGNO (operands[1]);
5172 if (lane >= max / 2)
5173 {
5174 lane -= max / 2;
5175 regno += 2;
5176 }
5177 operands[2] = GEN_INT (lane);
5178 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5179 if (max == 2)
5180 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5181 else
5182 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5183 }
5184 [(set_attr "type" "neon_store1_one_lane<q>")]
5185 )
5186
5187 (define_expand "vec_load_lanesti<mode>"
5188 [(set (match_operand:TI 0 "s_register_operand")
5189 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5190 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5191 UNSPEC_VLD2))]
5192 "TARGET_NEON")
5193
5194 (define_insn "neon_vld2<mode>"
5195 [(set (match_operand:TI 0 "s_register_operand" "=w")
5196 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5197 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5198 UNSPEC_VLD2))]
5199 "TARGET_NEON"
5200 {
5201 if (<V_sz_elem> == 64)
5202 return "vld1.64\t%h0, %A1";
5203 else
5204 return "vld2.<V_sz_elem>\t%h0, %A1";
5205 }
5206 [(set (attr "type")
5207 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5208 (const_string "neon_load1_2reg<q>")
5209 (const_string "neon_load2_2reg<q>")))]
5210 )
5211
5212 (define_expand "vec_load_lanesoi<mode>"
5213 [(set (match_operand:OI 0 "s_register_operand")
5214 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5215 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5216 UNSPEC_VLD2))]
5217 "TARGET_NEON")
5218
5219 (define_insn "neon_vld2<mode>"
5220 [(set (match_operand:OI 0 "s_register_operand" "=w")
5221 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5222 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5223 UNSPEC_VLD2))]
5224 "TARGET_NEON"
5225 "vld2.<V_sz_elem>\t%h0, %A1"
5226 [(set_attr "type" "neon_load2_2reg_q")])
5227
5228 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5229 ;; here on big endian targets.
5230 (define_insn "neon_vld2_lane<mode>"
5231 [(set (match_operand:TI 0 "s_register_operand" "=w")
5232 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5233 (match_operand:TI 2 "s_register_operand" "0")
5234 (match_operand:SI 3 "immediate_operand" "i")
5235 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5236 UNSPEC_VLD2_LANE))]
5237 "TARGET_NEON"
5238 {
5239 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5240 int regno = REGNO (operands[0]);
5241 rtx ops[4];
5242 ops[0] = gen_rtx_REG (DImode, regno);
5243 ops[1] = gen_rtx_REG (DImode, regno + 2);
5244 ops[2] = operands[1];
5245 ops[3] = GEN_INT (lane);
5246 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5247 return "";
5248 }
5249 [(set_attr "type" "neon_load2_one_lane<q>")]
5250 )
5251
5252 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5253 ;; here on big endian targets.
5254 (define_insn "neon_vld2_lane<mode>"
5255 [(set (match_operand:OI 0 "s_register_operand" "=w")
5256 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5257 (match_operand:OI 2 "s_register_operand" "0")
5258 (match_operand:SI 3 "immediate_operand" "i")
5259 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5260 UNSPEC_VLD2_LANE))]
5261 "TARGET_NEON"
5262 {
5263 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5264 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5265 int regno = REGNO (operands[0]);
5266 rtx ops[4];
5267 if (lane >= max / 2)
5268 {
5269 lane -= max / 2;
5270 regno += 2;
5271 }
5272 ops[0] = gen_rtx_REG (DImode, regno);
5273 ops[1] = gen_rtx_REG (DImode, regno + 4);
5274 ops[2] = operands[1];
5275 ops[3] = GEN_INT (lane);
5276 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5277 return "";
5278 }
5279 [(set_attr "type" "neon_load2_one_lane<q>")]
5280 )
5281
5282 (define_insn "neon_vld2_dup<mode>"
5283 [(set (match_operand:TI 0 "s_register_operand" "=w")
5284 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5285 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5286 UNSPEC_VLD2_DUP))]
5287 "TARGET_NEON"
5288 {
5289 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5290 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5291 else
5292 return "vld1.<V_sz_elem>\t%h0, %A1";
5293 }
5294 [(set (attr "type")
5295 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5296 (const_string "neon_load2_all_lanes<q>")
5297 (const_string "neon_load1_1reg<q>")))]
5298 )
5299
5300 (define_insn "neon_vld2_dupv8bf"
5301 [(set (match_operand:OI 0 "s_register_operand" "=w")
5302 (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5303 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5304 UNSPEC_VLD2_DUP))]
5305 "TARGET_BF16_SIMD"
5306 {
5307 rtx ops[5];
5308 int tabbase = REGNO (operands[0]);
5309
5310 ops[4] = operands[1];
5311 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5312 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5313 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5314 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5315 output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
5316 return "";
5317 }
5318 [(set_attr "type" "neon_load2_all_lanes_q")]
5319 )
5320
5321 (define_expand "vec_store_lanesti<mode>"
5322 [(set (match_operand:TI 0 "neon_struct_operand")
5323 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5324 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5325 UNSPEC_VST2))]
5326 "TARGET_NEON")
5327
5328 (define_insn "neon_vst2<mode>"
5329 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5330 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5331 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5332 UNSPEC_VST2))]
5333 "TARGET_NEON"
5334 {
5335 if (<V_sz_elem> == 64)
5336 return "vst1.64\t%h1, %A0";
5337 else
5338 return "vst2.<V_sz_elem>\t%h1, %A0";
5339 }
5340 [(set (attr "type")
5341 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5342 (const_string "neon_store1_2reg<q>")
5343 (const_string "neon_store2_one_lane<q>")))]
5344 )
5345
5346 (define_expand "vec_store_lanesoi<mode>"
5347 [(set (match_operand:OI 0 "neon_struct_operand")
5348 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5349 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5350 UNSPEC_VST2))]
5351 "TARGET_NEON")
5352
5353 (define_insn "neon_vst2<mode>"
5354 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5355 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5356 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5357 UNSPEC_VST2))]
5358 "TARGET_NEON"
5359 "vst2.<V_sz_elem>\t%h1, %A0"
5360 [(set_attr "type" "neon_store2_4reg<q>")]
5361 )
5362
5363 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5364 ;; here on big endian targets.
5365 (define_insn "neon_vst2_lane<mode>"
5366 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5367 (unspec:<V_two_elem>
5368 [(match_operand:TI 1 "s_register_operand" "w")
5369 (match_operand:SI 2 "immediate_operand" "i")
5370 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5371 UNSPEC_VST2_LANE))]
5372 "TARGET_NEON"
5373 {
5374 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5375 int regno = REGNO (operands[1]);
5376 rtx ops[4];
5377 ops[0] = operands[0];
5378 ops[1] = gen_rtx_REG (DImode, regno);
5379 ops[2] = gen_rtx_REG (DImode, regno + 2);
5380 ops[3] = GEN_INT (lane);
5381 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5382 return "";
5383 }
5384 [(set_attr "type" "neon_store2_one_lane<q>")]
5385 )
5386
5387 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5388 ;; here on big endian targets.
5389 (define_insn "neon_vst2_lane<mode>"
5390 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5391 (unspec:<V_two_elem>
5392 [(match_operand:OI 1 "s_register_operand" "w")
5393 (match_operand:SI 2 "immediate_operand" "i")
5394 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5395 UNSPEC_VST2_LANE))]
5396 "TARGET_NEON"
5397 {
5398 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5399 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5400 int regno = REGNO (operands[1]);
5401 rtx ops[4];
5402 if (lane >= max / 2)
5403 {
5404 lane -= max / 2;
5405 regno += 2;
5406 }
5407 ops[0] = operands[0];
5408 ops[1] = gen_rtx_REG (DImode, regno);
5409 ops[2] = gen_rtx_REG (DImode, regno + 4);
5410 ops[3] = GEN_INT (lane);
5411 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5412 return "";
5413 }
5414 [(set_attr "type" "neon_store2_one_lane<q>")]
5415 )
5416
5417 (define_expand "vec_load_lanesei<mode>"
5418 [(set (match_operand:EI 0 "s_register_operand")
5419 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5420 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5421 UNSPEC_VLD3))]
5422 "TARGET_NEON")
5423
5424 (define_insn "neon_vld3<mode>"
5425 [(set (match_operand:EI 0 "s_register_operand" "=w")
5426 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5427 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5428 UNSPEC_VLD3))]
5429 "TARGET_NEON"
5430 {
5431 if (<V_sz_elem> == 64)
5432 return "vld1.64\t%h0, %A1";
5433 else
5434 return "vld3.<V_sz_elem>\t%h0, %A1";
5435 }
5436 [(set (attr "type")
5437 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5438 (const_string "neon_load1_3reg<q>")
5439 (const_string "neon_load3_3reg<q>")))]
5440 )
5441
5442 (define_expand "vec_load_lanesci<mode>"
5443 [(match_operand:CI 0 "s_register_operand")
5444 (match_operand:CI 1 "neon_struct_operand")
5445 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5446 "TARGET_NEON"
5447 {
5448 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5449 DONE;
5450 })
5451
5452 (define_expand "neon_vld3<mode>"
5453 [(match_operand:CI 0 "s_register_operand")
5454 (match_operand:CI 1 "neon_struct_operand")
5455 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5456 "TARGET_NEON"
5457 {
5458 rtx mem;
5459
5460 mem = adjust_address (operands[1], EImode, 0);
5461 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5462 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5463 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5464 DONE;
5465 })
5466
5467 (define_insn "neon_vld3qa<mode>"
5468 [(set (match_operand:CI 0 "s_register_operand" "=w")
5469 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5470 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5471 UNSPEC_VLD3A))]
5472 "TARGET_NEON"
5473 {
5474 int regno = REGNO (operands[0]);
5475 rtx ops[4];
5476 ops[0] = gen_rtx_REG (DImode, regno);
5477 ops[1] = gen_rtx_REG (DImode, regno + 4);
5478 ops[2] = gen_rtx_REG (DImode, regno + 8);
5479 ops[3] = operands[1];
5480 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5481 return "";
5482 }
5483 [(set_attr "type" "neon_load3_3reg<q>")]
5484 )
5485
5486 (define_insn "neon_vld3qb<mode>"
5487 [(set (match_operand:CI 0 "s_register_operand" "=w")
5488 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5489 (match_operand:CI 2 "s_register_operand" "0")
5490 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5491 UNSPEC_VLD3B))]
5492 "TARGET_NEON"
5493 {
5494 int regno = REGNO (operands[0]);
5495 rtx ops[4];
5496 ops[0] = gen_rtx_REG (DImode, regno + 2);
5497 ops[1] = gen_rtx_REG (DImode, regno + 6);
5498 ops[2] = gen_rtx_REG (DImode, regno + 10);
5499 ops[3] = operands[1];
5500 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5501 return "";
5502 }
5503 [(set_attr "type" "neon_load3_3reg<q>")]
5504 )
5505
5506 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5507 ;; here on big endian targets.
5508 (define_insn "neon_vld3_lane<mode>"
5509 [(set (match_operand:EI 0 "s_register_operand" "=w")
5510 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5511 (match_operand:EI 2 "s_register_operand" "0")
5512 (match_operand:SI 3 "immediate_operand" "i")
5513 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5514 UNSPEC_VLD3_LANE))]
5515 "TARGET_NEON"
5516 {
5517 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5518 int regno = REGNO (operands[0]);
5519 rtx ops[5];
5520 ops[0] = gen_rtx_REG (DImode, regno);
5521 ops[1] = gen_rtx_REG (DImode, regno + 2);
5522 ops[2] = gen_rtx_REG (DImode, regno + 4);
5523 ops[3] = operands[1];
5524 ops[4] = GEN_INT (lane);
5525 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5526 ops);
5527 return "";
5528 }
5529 [(set_attr "type" "neon_load3_one_lane<q>")]
5530 )
5531
5532 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5533 ;; here on big endian targets.
5534 (define_insn "neon_vld3_lane<mode>"
5535 [(set (match_operand:CI 0 "s_register_operand" "=w")
5536 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5537 (match_operand:CI 2 "s_register_operand" "0")
5538 (match_operand:SI 3 "immediate_operand" "i")
5539 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5540 UNSPEC_VLD3_LANE))]
5541 "TARGET_NEON"
5542 {
5543 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5544 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5545 int regno = REGNO (operands[0]);
5546 rtx ops[5];
5547 if (lane >= max / 2)
5548 {
5549 lane -= max / 2;
5550 regno += 2;
5551 }
5552 ops[0] = gen_rtx_REG (DImode, regno);
5553 ops[1] = gen_rtx_REG (DImode, regno + 4);
5554 ops[2] = gen_rtx_REG (DImode, regno + 8);
5555 ops[3] = operands[1];
5556 ops[4] = GEN_INT (lane);
5557 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5558 ops);
5559 return "";
5560 }
5561 [(set_attr "type" "neon_load3_one_lane<q>")]
5562 )
5563
5564 (define_insn "neon_vld3_dup<mode>"
5565 [(set (match_operand:EI 0 "s_register_operand" "=w")
5566 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5567 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568 UNSPEC_VLD3_DUP))]
5569 "TARGET_NEON"
5570 {
5571 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5572 {
5573 int regno = REGNO (operands[0]);
5574 rtx ops[4];
5575 ops[0] = gen_rtx_REG (DImode, regno);
5576 ops[1] = gen_rtx_REG (DImode, regno + 2);
5577 ops[2] = gen_rtx_REG (DImode, regno + 4);
5578 ops[3] = operands[1];
5579 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5580 return "";
5581 }
5582 else
5583 return "vld1.<V_sz_elem>\t%h0, %A1";
5584 }
5585 [(set (attr "type")
5586 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5587 (const_string "neon_load3_all_lanes<q>")
5588 (const_string "neon_load1_1reg<q>")))])
5589
5590 (define_insn "neon_vld3_dupv8bf"
5591 [(set (match_operand:CI 0 "s_register_operand" "=w")
5592 (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5593 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5594 UNSPEC_VLD2_DUP))]
5595 "TARGET_BF16_SIMD"
5596 {
5597 rtx ops[4];
5598 int tabbase = REGNO (operands[0]);
5599
5600 ops[3] = operands[1];
5601 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5602 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5603 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5604 output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
5605 return "";
5606 }
5607 [(set_attr "type" "neon_load3_all_lanes_q")]
5608 )
5609
5610 (define_expand "vec_store_lanesei<mode>"
5611 [(set (match_operand:EI 0 "neon_struct_operand")
5612 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5613 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5614 UNSPEC_VST3))]
5615 "TARGET_NEON")
5616
5617 (define_insn "neon_vst3<mode>"
5618 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5619 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5620 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5621 UNSPEC_VST3))]
5622 "TARGET_NEON"
5623 {
5624 if (<V_sz_elem> == 64)
5625 return "vst1.64\t%h1, %A0";
5626 else
5627 return "vst3.<V_sz_elem>\t%h1, %A0";
5628 }
5629 [(set (attr "type")
5630 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5631 (const_string "neon_store1_3reg<q>")
5632 (const_string "neon_store3_one_lane<q>")))])
5633
5634 (define_expand "vec_store_lanesci<mode>"
5635 [(match_operand:CI 0 "neon_struct_operand")
5636 (match_operand:CI 1 "s_register_operand")
5637 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5638 "TARGET_NEON"
5639 {
5640 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5641 DONE;
5642 })
5643
5644 (define_expand "neon_vst3<mode>"
5645 [(match_operand:CI 0 "neon_struct_operand")
5646 (match_operand:CI 1 "s_register_operand")
5647 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5648 "TARGET_NEON"
5649 {
5650 rtx mem;
5651
5652 mem = adjust_address (operands[0], EImode, 0);
5653 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5654 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5655 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5656 DONE;
5657 })
5658
5659 (define_insn "neon_vst3qa<mode>"
5660 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5661 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5662 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5663 UNSPEC_VST3A))]
5664 "TARGET_NEON"
5665 {
5666 int regno = REGNO (operands[1]);
5667 rtx ops[4];
5668 ops[0] = operands[0];
5669 ops[1] = gen_rtx_REG (DImode, regno);
5670 ops[2] = gen_rtx_REG (DImode, regno + 4);
5671 ops[3] = gen_rtx_REG (DImode, regno + 8);
5672 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5673 return "";
5674 }
5675 [(set_attr "type" "neon_store3_3reg<q>")]
5676 )
5677
5678 (define_insn "neon_vst3qb<mode>"
5679 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5680 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5681 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5682 UNSPEC_VST3B))]
5683 "TARGET_NEON"
5684 {
5685 int regno = REGNO (operands[1]);
5686 rtx ops[4];
5687 ops[0] = operands[0];
5688 ops[1] = gen_rtx_REG (DImode, regno + 2);
5689 ops[2] = gen_rtx_REG (DImode, regno + 6);
5690 ops[3] = gen_rtx_REG (DImode, regno + 10);
5691 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5692 return "";
5693 }
5694 [(set_attr "type" "neon_store3_3reg<q>")]
5695 )
5696
5697 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5698 ;; here on big endian targets.
5699 (define_insn "neon_vst3_lane<mode>"
5700 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5701 (unspec:<V_three_elem>
5702 [(match_operand:EI 1 "s_register_operand" "w")
5703 (match_operand:SI 2 "immediate_operand" "i")
5704 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5705 UNSPEC_VST3_LANE))]
5706 "TARGET_NEON"
5707 {
5708 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5709 int regno = REGNO (operands[1]);
5710 rtx ops[5];
5711 ops[0] = operands[0];
5712 ops[1] = gen_rtx_REG (DImode, regno);
5713 ops[2] = gen_rtx_REG (DImode, regno + 2);
5714 ops[3] = gen_rtx_REG (DImode, regno + 4);
5715 ops[4] = GEN_INT (lane);
5716 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5717 ops);
5718 return "";
5719 }
5720 [(set_attr "type" "neon_store3_one_lane<q>")]
5721 )
5722
5723 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5724 ;; here on big endian targets.
5725 (define_insn "neon_vst3_lane<mode>"
5726 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5727 (unspec:<V_three_elem>
5728 [(match_operand:CI 1 "s_register_operand" "w")
5729 (match_operand:SI 2 "immediate_operand" "i")
5730 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5731 UNSPEC_VST3_LANE))]
5732 "TARGET_NEON"
5733 {
5734 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5735 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5736 int regno = REGNO (operands[1]);
5737 rtx ops[5];
5738 if (lane >= max / 2)
5739 {
5740 lane -= max / 2;
5741 regno += 2;
5742 }
5743 ops[0] = operands[0];
5744 ops[1] = gen_rtx_REG (DImode, regno);
5745 ops[2] = gen_rtx_REG (DImode, regno + 4);
5746 ops[3] = gen_rtx_REG (DImode, regno + 8);
5747 ops[4] = GEN_INT (lane);
5748 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5749 ops);
5750 return "";
5751 }
5752 [(set_attr "type" "neon_store3_one_lane<q>")]
5753 )
5754
5755 (define_expand "vec_load_lanesoi<mode>"
5756 [(set (match_operand:OI 0 "s_register_operand")
5757 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5758 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759 UNSPEC_VLD4))]
5760 "TARGET_NEON")
5761
5762 (define_insn "neon_vld4<mode>"
5763 [(set (match_operand:OI 0 "s_register_operand" "=w")
5764 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5765 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5766 UNSPEC_VLD4))]
5767 "TARGET_NEON"
5768 {
5769 if (<V_sz_elem> == 64)
5770 return "vld1.64\t%h0, %A1";
5771 else
5772 return "vld4.<V_sz_elem>\t%h0, %A1";
5773 }
5774 [(set (attr "type")
5775 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5776 (const_string "neon_load1_4reg<q>")
5777 (const_string "neon_load4_4reg<q>")))]
5778 )
5779
5780 (define_expand "vec_load_lanesxi<mode>"
5781 [(match_operand:XI 0 "s_register_operand")
5782 (match_operand:XI 1 "neon_struct_operand")
5783 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5784 "TARGET_NEON"
5785 {
5786 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5787 DONE;
5788 })
5789
5790 (define_expand "neon_vld4<mode>"
5791 [(match_operand:XI 0 "s_register_operand")
5792 (match_operand:XI 1 "neon_struct_operand")
5793 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5794 "TARGET_NEON"
5795 {
5796 rtx mem;
5797
5798 mem = adjust_address (operands[1], OImode, 0);
5799 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5800 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5801 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5802 DONE;
5803 })
5804
5805 (define_insn "neon_vld4qa<mode>"
5806 [(set (match_operand:XI 0 "s_register_operand" "=w")
5807 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5808 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5809 UNSPEC_VLD4A))]
5810 "TARGET_NEON"
5811 {
5812 int regno = REGNO (operands[0]);
5813 rtx ops[5];
5814 ops[0] = gen_rtx_REG (DImode, regno);
5815 ops[1] = gen_rtx_REG (DImode, regno + 4);
5816 ops[2] = gen_rtx_REG (DImode, regno + 8);
5817 ops[3] = gen_rtx_REG (DImode, regno + 12);
5818 ops[4] = operands[1];
5819 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5820 return "";
5821 }
5822 [(set_attr "type" "neon_load4_4reg<q>")]
5823 )
5824
5825 (define_insn "neon_vld4qb<mode>"
5826 [(set (match_operand:XI 0 "s_register_operand" "=w")
5827 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5828 (match_operand:XI 2 "s_register_operand" "0")
5829 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5830 UNSPEC_VLD4B))]
5831 "TARGET_NEON"
5832 {
5833 int regno = REGNO (operands[0]);
5834 rtx ops[5];
5835 ops[0] = gen_rtx_REG (DImode, regno + 2);
5836 ops[1] = gen_rtx_REG (DImode, regno + 6);
5837 ops[2] = gen_rtx_REG (DImode, regno + 10);
5838 ops[3] = gen_rtx_REG (DImode, regno + 14);
5839 ops[4] = operands[1];
5840 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5841 return "";
5842 }
5843 [(set_attr "type" "neon_load4_4reg<q>")]
5844 )
5845
5846 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5847 ;; here on big endian targets.
5848 (define_insn "neon_vld4_lane<mode>"
5849 [(set (match_operand:OI 0 "s_register_operand" "=w")
5850 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5851 (match_operand:OI 2 "s_register_operand" "0")
5852 (match_operand:SI 3 "immediate_operand" "i")
5853 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5854 UNSPEC_VLD4_LANE))]
5855 "TARGET_NEON"
5856 {
5857 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5858 int regno = REGNO (operands[0]);
5859 rtx ops[6];
5860 ops[0] = gen_rtx_REG (DImode, regno);
5861 ops[1] = gen_rtx_REG (DImode, regno + 2);
5862 ops[2] = gen_rtx_REG (DImode, regno + 4);
5863 ops[3] = gen_rtx_REG (DImode, regno + 6);
5864 ops[4] = operands[1];
5865 ops[5] = GEN_INT (lane);
5866 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5867 ops);
5868 return "";
5869 }
5870 [(set_attr "type" "neon_load4_one_lane<q>")]
5871 )
5872
5873 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5874 ;; here on big endian targets.
5875 (define_insn "neon_vld4_lane<mode>"
5876 [(set (match_operand:XI 0 "s_register_operand" "=w")
5877 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5878 (match_operand:XI 2 "s_register_operand" "0")
5879 (match_operand:SI 3 "immediate_operand" "i")
5880 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5881 UNSPEC_VLD4_LANE))]
5882 "TARGET_NEON"
5883 {
5884 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5885 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5886 int regno = REGNO (operands[0]);
5887 rtx ops[6];
5888 if (lane >= max / 2)
5889 {
5890 lane -= max / 2;
5891 regno += 2;
5892 }
5893 ops[0] = gen_rtx_REG (DImode, regno);
5894 ops[1] = gen_rtx_REG (DImode, regno + 4);
5895 ops[2] = gen_rtx_REG (DImode, regno + 8);
5896 ops[3] = gen_rtx_REG (DImode, regno + 12);
5897 ops[4] = operands[1];
5898 ops[5] = GEN_INT (lane);
5899 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5900 ops);
5901 return "";
5902 }
5903 [(set_attr "type" "neon_load4_one_lane<q>")]
5904 )
5905
5906 (define_insn "neon_vld4_dup<mode>"
5907 [(set (match_operand:OI 0 "s_register_operand" "=w")
5908 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5909 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5910 UNSPEC_VLD4_DUP))]
5911 "TARGET_NEON"
5912 {
5913 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5914 {
5915 int regno = REGNO (operands[0]);
5916 rtx ops[5];
5917 ops[0] = gen_rtx_REG (DImode, regno);
5918 ops[1] = gen_rtx_REG (DImode, regno + 2);
5919 ops[2] = gen_rtx_REG (DImode, regno + 4);
5920 ops[3] = gen_rtx_REG (DImode, regno + 6);
5921 ops[4] = operands[1];
5922 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5923 ops);
5924 return "";
5925 }
5926 else
5927 return "vld1.<V_sz_elem>\t%h0, %A1";
5928 }
5929 [(set (attr "type")
5930 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5931 (const_string "neon_load4_all_lanes<q>")
5932 (const_string "neon_load1_1reg<q>")))]
5933 )
5934
5935 (define_insn "neon_vld4_dupv8bf"
5936 [(set (match_operand:XI 0 "s_register_operand" "=w")
5937 (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5938 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5939 UNSPEC_VLD2_DUP))]
5940 "TARGET_BF16_SIMD"
5941 {
5942 rtx ops[5];
5943 int tabbase = REGNO (operands[0]);
5944
5945 ops[4] = operands[1];
5946 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5947 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5948 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5949 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5950 output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
5951 return "";
5952 }
5953 [(set_attr "type" "neon_load4_all_lanes_q")]
5954 )
5955
5956 (define_expand "vec_store_lanesoi<mode>"
5957 [(set (match_operand:OI 0 "neon_struct_operand")
5958 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5959 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5960 UNSPEC_VST4))]
5961 "TARGET_NEON")
5962
5963 (define_insn "neon_vst4<mode>"
5964 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5965 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5966 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5967 UNSPEC_VST4))]
5968 "TARGET_NEON"
5969 {
5970 if (<V_sz_elem> == 64)
5971 return "vst1.64\t%h1, %A0";
5972 else
5973 return "vst4.<V_sz_elem>\t%h1, %A0";
5974 }
5975 [(set (attr "type")
5976 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5977 (const_string "neon_store1_4reg<q>")
5978 (const_string "neon_store4_4reg<q>")))]
5979 )
5980
5981 (define_expand "vec_store_lanesxi<mode>"
5982 [(match_operand:XI 0 "neon_struct_operand")
5983 (match_operand:XI 1 "s_register_operand")
5984 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5985 "TARGET_NEON"
5986 {
5987 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5988 DONE;
5989 })
5990
5991 (define_expand "neon_vst4<mode>"
5992 [(match_operand:XI 0 "neon_struct_operand")
5993 (match_operand:XI 1 "s_register_operand")
5994 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5995 "TARGET_NEON"
5996 {
5997 rtx mem;
5998
5999 mem = adjust_address (operands[0], OImode, 0);
6000 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6001 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6002 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6003 DONE;
6004 })
6005
6006 (define_insn "neon_vst4qa<mode>"
6007 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6008 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6009 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6010 UNSPEC_VST4A))]
6011 "TARGET_NEON"
6012 {
6013 int regno = REGNO (operands[1]);
6014 rtx ops[5];
6015 ops[0] = operands[0];
6016 ops[1] = gen_rtx_REG (DImode, regno);
6017 ops[2] = gen_rtx_REG (DImode, regno + 4);
6018 ops[3] = gen_rtx_REG (DImode, regno + 8);
6019 ops[4] = gen_rtx_REG (DImode, regno + 12);
6020 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6021 return "";
6022 }
6023 [(set_attr "type" "neon_store4_4reg<q>")]
6024 )
6025
6026 (define_insn "neon_vst4qb<mode>"
6027 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6028 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6029 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6030 UNSPEC_VST4B))]
6031 "TARGET_NEON"
6032 {
6033 int regno = REGNO (operands[1]);
6034 rtx ops[5];
6035 ops[0] = operands[0];
6036 ops[1] = gen_rtx_REG (DImode, regno + 2);
6037 ops[2] = gen_rtx_REG (DImode, regno + 6);
6038 ops[3] = gen_rtx_REG (DImode, regno + 10);
6039 ops[4] = gen_rtx_REG (DImode, regno + 14);
6040 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6041 return "";
6042 }
6043 [(set_attr "type" "neon_store4_4reg<q>")]
6044 )
6045
6046 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6047 ;; here on big endian targets.
6048 (define_insn "neon_vst4_lane<mode>"
6049 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6050 (unspec:<V_four_elem>
6051 [(match_operand:OI 1 "s_register_operand" "w")
6052 (match_operand:SI 2 "immediate_operand" "i")
6053 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6054 UNSPEC_VST4_LANE))]
6055 "TARGET_NEON"
6056 {
6057 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6058 int regno = REGNO (operands[1]);
6059 rtx ops[6];
6060 ops[0] = operands[0];
6061 ops[1] = gen_rtx_REG (DImode, regno);
6062 ops[2] = gen_rtx_REG (DImode, regno + 2);
6063 ops[3] = gen_rtx_REG (DImode, regno + 4);
6064 ops[4] = gen_rtx_REG (DImode, regno + 6);
6065 ops[5] = GEN_INT (lane);
6066 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6067 ops);
6068 return "";
6069 }
6070 [(set_attr "type" "neon_store4_one_lane<q>")]
6071 )
6072
6073 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6074 ;; here on big endian targets.
6075 (define_insn "neon_vst4_lane<mode>"
6076 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6077 (unspec:<V_four_elem>
6078 [(match_operand:XI 1 "s_register_operand" "w")
6079 (match_operand:SI 2 "immediate_operand" "i")
6080 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6081 UNSPEC_VST4_LANE))]
6082 "TARGET_NEON"
6083 {
6084 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6085 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6086 int regno = REGNO (operands[1]);
6087 rtx ops[6];
6088 if (lane >= max / 2)
6089 {
6090 lane -= max / 2;
6091 regno += 2;
6092 }
6093 ops[0] = operands[0];
6094 ops[1] = gen_rtx_REG (DImode, regno);
6095 ops[2] = gen_rtx_REG (DImode, regno + 4);
6096 ops[3] = gen_rtx_REG (DImode, regno + 8);
6097 ops[4] = gen_rtx_REG (DImode, regno + 12);
6098 ops[5] = GEN_INT (lane);
6099 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6100 ops);
6101 return "";
6102 }
6103 [(set_attr "type" "neon_store4_4reg<q>")]
6104 )
6105
6106 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6107 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6108 (SE:<V_unpack> (vec_select:<V_HALF>
6109 (match_operand:VU 1 "register_operand" "w")
6110 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6111 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6112 "vmovl.<US><V_sz_elem> %q0, %e1"
6113 [(set_attr "type" "neon_shift_imm_long")]
6114 )
6115
6116 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6117 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6118 (SE:<V_unpack> (vec_select:<V_HALF>
6119 (match_operand:VU 1 "register_operand" "w")
6120 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6121 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6122 "vmovl.<US><V_sz_elem> %q0, %f1"
6123 [(set_attr "type" "neon_shift_imm_long")]
6124 )
6125
6126 (define_expand "vec_unpack<US>_hi_<mode>"
6127 [(match_operand:<V_unpack> 0 "register_operand")
6128 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6129 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6130 {
6131 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6132 rtx t1;
6133 int i;
6134 for (i = 0; i < (<V_mode_nunits>/2); i++)
6135 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6136
6137 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6138 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6139 operands[1],
6140 t1));
6141 DONE;
6142 }
6143 )
6144
6145 (define_expand "vec_unpack<US>_lo_<mode>"
6146 [(match_operand:<V_unpack> 0 "register_operand")
6147 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6148 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6149 {
6150 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6151 rtx t1;
6152 int i;
6153 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6154 RTVEC_ELT (v, i) = GEN_INT (i);
6155 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6156 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6157 operands[1],
6158 t1));
6159 DONE;
6160 }
6161 )
6162
6163 (define_insn "neon_vec_<US>mult_lo_<mode>"
6164 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6165 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6166 (match_operand:VU 1 "register_operand" "w")
6167 (match_operand:VU 2 "vect_par_constant_low" "")))
6168 (SE:<V_unpack> (vec_select:<V_HALF>
6169 (match_operand:VU 3 "register_operand" "w")
6170 (match_dup 2)))))]
6171 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6172 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6173 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6174 )
6175
6176 (define_expand "vec_widen_<US>mult_lo_<mode>"
6177 [(match_operand:<V_unpack> 0 "register_operand")
6178 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6179 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6180 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6181 {
6182 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6183 rtx t1;
6184 int i;
6185 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6186 RTVEC_ELT (v, i) = GEN_INT (i);
6187 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6188
6189 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6190 operands[1],
6191 t1,
6192 operands[2]));
6193 DONE;
6194 }
6195 )
6196
6197 (define_insn "neon_vec_<US>mult_hi_<mode>"
6198 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6199 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6200 (match_operand:VU 1 "register_operand" "w")
6201 (match_operand:VU 2 "vect_par_constant_high" "")))
6202 (SE:<V_unpack> (vec_select:<V_HALF>
6203 (match_operand:VU 3 "register_operand" "w")
6204 (match_dup 2)))))]
6205 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6206 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6207 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6208 )
6209
6210 (define_expand "vec_widen_<US>mult_hi_<mode>"
6211 [(match_operand:<V_unpack> 0 "register_operand")
6212 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6213 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6214 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6215 {
6216 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6217 rtx t1;
6218 int i;
6219 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6220 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6221 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6222
6223 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6224 operands[1],
6225 t1,
6226 operands[2]));
6227 DONE;
6228
6229 }
6230 )
6231
6232 (define_insn "neon_vec_<US>shiftl_<mode>"
6233 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6234 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6235 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6236 "TARGET_NEON"
6237 {
6238 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6239 }
6240 [(set_attr "type" "neon_shift_imm_long")]
6241 )
6242
6243 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6244 [(match_operand:<V_unpack> 0 "register_operand")
6245 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6246 (match_operand:SI 2 "immediate_operand")]
6247 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6248 {
6249 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6250 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6251 operands[2]));
6252 DONE;
6253 }
6254 )
6255
6256 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6257 [(match_operand:<V_unpack> 0 "register_operand")
6258 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6259 (match_operand:SI 2 "immediate_operand")]
6260 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6261 {
6262 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6263 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6264 GET_MODE_SIZE (<V_HALF>mode)),
6265 operands[2]));
6266 DONE;
6267 }
6268 )
6269
6270 ;; Vectorize for non-neon-quad case
6271 (define_insn "neon_unpack<US>_<mode>"
6272 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6273 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6274 "TARGET_NEON"
6275 "vmovl.<US><V_sz_elem> %q0, %P1"
6276 [(set_attr "type" "neon_move")]
6277 )
6278
6279 (define_expand "vec_unpack<US>_lo_<mode>"
6280 [(match_operand:<V_double_width> 0 "register_operand")
6281 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6282 "TARGET_NEON"
6283 {
6284 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6285 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6286 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6287
6288 DONE;
6289 }
6290 )
6291
6292 (define_expand "vec_unpack<US>_hi_<mode>"
6293 [(match_operand:<V_double_width> 0 "register_operand")
6294 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6295 "TARGET_NEON"
6296 {
6297 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6298 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6299 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6300
6301 DONE;
6302 }
6303 )
6304
6305 (define_insn "neon_vec_<US>mult_<mode>"
6306 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6307 (mult:<V_widen> (SE:<V_widen>
6308 (match_operand:VDI 1 "register_operand" "w"))
6309 (SE:<V_widen>
6310 (match_operand:VDI 2 "register_operand" "w"))))]
6311 "TARGET_NEON"
6312 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6313 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6314 )
6315
6316 (define_expand "vec_widen_<US>mult_hi_<mode>"
6317 [(match_operand:<V_double_width> 0 "register_operand")
6318 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6319 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6320 "TARGET_NEON"
6321 {
6322 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6323 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6324 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6325
6326 DONE;
6327
6328 }
6329 )
6330
6331 (define_expand "vec_widen_<US>mult_lo_<mode>"
6332 [(match_operand:<V_double_width> 0 "register_operand")
6333 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6334 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6335 "TARGET_NEON"
6336 {
6337 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6338 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6339 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6340
6341 DONE;
6342
6343 }
6344 )
6345
6346 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6347 [(match_operand:<V_double_width> 0 "register_operand")
6348 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6349 (match_operand:SI 2 "immediate_operand")]
6350 "TARGET_NEON"
6351 {
6352 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6353 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6354 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6355
6356 DONE;
6357 }
6358 )
6359
6360 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6361 [(match_operand:<V_double_width> 0 "register_operand")
6362 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6363 (match_operand:SI 2 "immediate_operand")]
6364 "TARGET_NEON"
6365 {
6366 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6367 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6368 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6369
6370 DONE;
6371 }
6372 )
6373
6374 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6375 ; because the ordering of vector elements in Q registers is different from what
6376 ; the semantics of the instructions require.
6377
6378 (define_insn "vec_pack_trunc_<mode>"
6379 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6380 (vec_concat:<V_narrow_pack>
6381 (truncate:<V_narrow>
6382 (match_operand:VN 1 "register_operand" "w"))
6383 (truncate:<V_narrow>
6384 (match_operand:VN 2 "register_operand" "w"))))]
6385 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6386 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6387 [(set_attr "type" "multiple")
6388 (set_attr "length" "8")]
6389 )
6390
6391 ;; For the non-quad case.
6392 (define_insn "neon_vec_pack_trunc_<mode>"
6393 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6394 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6395 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6396 "vmovn.i<V_sz_elem>\t%P0, %q1"
6397 [(set_attr "type" "neon_move_narrow_q")]
6398 )
6399
6400 (define_expand "vec_pack_trunc_<mode>"
6401 [(match_operand:<V_narrow_pack> 0 "register_operand")
6402 (match_operand:VSHFT 1 "register_operand")
6403 (match_operand:VSHFT 2 "register_operand")]
6404 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6405 {
6406 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6407
6408 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6409 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6410 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6411 DONE;
6412 })
6413
6414 (define_insn "neon_vabd<mode>_2"
6415 [(set (match_operand:VF 0 "s_register_operand" "=w")
6416 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6417 (match_operand:VF 2 "s_register_operand" "w"))))]
6418 "ARM_HAVE_NEON_<MODE>_ARITH"
6419 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6420 [(set_attr "type" "neon_fp_abd_s<q>")]
6421 )
6422
6423 (define_insn "neon_vabd<mode>_3"
6424 [(set (match_operand:VF 0 "s_register_operand" "=w")
6425 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6426 (match_operand:VF 2 "s_register_operand" "w")]
6427 UNSPEC_VSUB)))]
6428 "ARM_HAVE_NEON_<MODE>_ARITH"
6429 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6430 [(set_attr "type" "neon_fp_abd_s<q>")]
6431 )
6432
6433 (define_insn "neon_<sup>mmlav16qi"
6434 [(set (match_operand:V4SI 0 "register_operand" "=w")
6435 (plus:V4SI
6436 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
6437 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
6438 (match_operand:V4SI 1 "register_operand" "0")))]
6439 "TARGET_I8MM"
6440 "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3"
6441 [(set_attr "type" "neon_mla_s_q")]
6442 )
6443
6444 (define_insn "neon_vbfdot<VCVTF:mode>"
6445 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6446 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6447 (unspec:VCVTF [
6448 (match_operand:<VSF2BF> 2 "register_operand" "w")
6449 (match_operand:<VSF2BF> 3 "register_operand" "w")]
6450 UNSPEC_DOT_S)))]
6451 "TARGET_BF16_SIMD"
6452 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
6453 [(set_attr "type" "neon_dot<q>")]
6454 )
6455
6456 (define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>"
6457 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6458 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6459 (unspec:VCVTF [
6460 (match_operand:<VSF2BF> 2 "register_operand" "w")
6461 (match_operand:V4BF 3 "register_operand" "x")
6462 (match_operand:SI 4 "immediate_operand" "i")]
6463 UNSPEC_DOT_S)))]
6464 "TARGET_BF16_SIMD"
6465 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
6466 [(set_attr "type" "neon_dot<q>")]
6467 )
6468
6469 (define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>"
6470 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6471 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6472 (unspec:VCVTF [
6473 (match_operand:<VSF2BF> 2 "register_operand" "w")
6474 (match_operand:V8BF 3 "register_operand" "x")
6475 (match_operand:SI 4 "immediate_operand" "i")]
6476 UNSPEC_DOT_S)))]
6477 "TARGET_BF16_SIMD"
6478 {
6479 int lane = INTVAL (operands[4]);
6480 int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4;
6481 if (lane < half)
6482 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
6483 else
6484 {
6485 operands[4] = GEN_INT (lane - half);
6486 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
6487 }
6488 }
6489 [(set_attr "type" "neon_dot<q>")]
6490 )
6491
6492 (define_insn "neon_vbfcvtv4sf<VBFCVT:mode>"
6493 [(set (match_operand:VBFCVT 0 "register_operand" "=w")
6494 (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")]
6495 UNSPEC_BFCVT))]
6496 "TARGET_BF16_SIMD"
6497 "vcvt.bf16.f32\\t%<V_bf_low>0, %q1"
6498 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6499 )
6500
6501 (define_insn "neon_vbfcvtv4sf_highv8bf"
6502 [(set (match_operand:V8BF 0 "register_operand" "=w")
6503 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
6504 (match_operand:V4SF 2 "register_operand" "w")]
6505 UNSPEC_BFCVT_HIGH))]
6506 "TARGET_BF16_SIMD"
6507 "vcvt.bf16.f32\\t%f0, %q2"
6508 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6509 )
6510
6511 (define_insn "neon_vbfcvtsf"
6512 [(set (match_operand:BF 0 "register_operand" "=t")
6513 (unspec:BF [(match_operand:SF 1 "register_operand" "t")]
6514 UNSPEC_BFCVT))]
6515 "TARGET_BF16_FP"
6516 "vcvtb.bf16.f32\\t%0, %1"
6517 [(set_attr "type" "f_cvt")]
6518 )
6519
6520 (define_insn "neon_vbfcvt<VBFCVT:mode>"
6521 [(set (match_operand:V4SF 0 "register_operand" "=w")
6522 (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")]
6523 UNSPEC_BFCVT))]
6524 "TARGET_BF16_SIMD"
6525 "vshll.u32\\t%q0, %<V_bf_low>1, #16"
6526 [(set_attr "type" "neon_shift_imm_q")]
6527 )
6528
6529 (define_insn "neon_vbfcvt_highv8bf"
6530 [(set (match_operand:V4SF 0 "register_operand" "=w")
6531 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
6532 UNSPEC_BFCVT_HIGH))]
6533 "TARGET_BF16_SIMD"
6534 "vshll.u32\\t%q0, %f1, #16"
6535 [(set_attr "type" "neon_shift_imm_q")]
6536 )
6537
6538 ;; Convert a BF scalar operand to SF via VSHL.
6539 ;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands
6540 ;; would be allocated, therefore the operands must be converted to intermediate
6541 ;; vectors (i.e. V2SI) in order to apply 64-bit registers.
6542 (define_expand "neon_vbfcvtbf"
6543 [(match_operand:SF 0 "register_operand")
6544 (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)]
6545 "TARGET_BF16_FP"
6546 {
6547 rtx op0 = gen_reg_rtx (V2SImode);
6548 rtx op1 = gen_reg_rtx (V2SImode);
6549 emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1]));
6550 emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode)));
6551 emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0));
6552 DONE;
6553 })
6554
6555 ;; Convert BF mode to V2SI and V2SI to SF.
6556 ;; Implement this by allocating a 32-bit operand in the low half of a 64-bit
6557 ;; register indexed by a 32-bit sub-register number.
6558 ;; This will generate reloads but compiler can optimize out the moves.
6559 ;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable
6560 ;; range so that to avoid extra moves.
6561 (define_insn "neon_vbfcvtbf_cvtmode<mode>"
6562 [(set (match_operand:VBFCVTM 0 "register_operand" "=x")
6563 (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")]
6564 UNSPEC_BFCVT))]
6565 "TARGET_BF16_FP"
6566 ""
6567 )
6568
6569 (define_insn "neon_vmmlav8bf"
6570 [(set (match_operand:V4SF 0 "register_operand" "=w")
6571 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
6572 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6573 (match_operand:V8BF 3 "register_operand" "w")]
6574 UNSPEC_BFMMLA)))]
6575 "TARGET_BF16_SIMD"
6576 "vmmla.bf16\\t%q0, %q2, %q3"
6577 [(set_attr "type" "neon_fp_mla_s_q")]
6578 )
6579
6580 (define_insn "neon_vfma<bt>v8bf"
6581 [(set (match_operand:V4SF 0 "register_operand" "=w")
6582 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6583 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6584 (match_operand:V8BF 3 "register_operand" "w")]
6585 BF_MA)))]
6586 "TARGET_BF16_SIMD"
6587 "vfma<bt>.bf16\\t%q0, %q2, %q3"
6588 [(set_attr "type" "neon_fp_mla_s_q")]
6589 )
6590
6591 (define_insn "neon_vfma<bt>_lanev8bf"
6592 [(set (match_operand:V4SF 0 "register_operand" "=w")
6593 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6594 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6595 (match_operand:V4BF 3 "register_operand" "x")
6596 (match_operand:SI 4 "const_int_operand" "n")]
6597 BF_MA)))]
6598 "TARGET_BF16_SIMD"
6599 "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]"
6600 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6601 )
6602
6603 (define_expand "neon_vfma<bt>_laneqv8bf"
6604 [(set (match_operand:V4SF 0 "register_operand" "=w")
6605 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6606 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6607 (match_operand:V8BF 3 "register_operand" "x")
6608 (match_operand:SI 4 "const_int_operand" "n")]
6609 BF_MA)))]
6610 "TARGET_BF16_SIMD"
6611 {
6612 int lane = INTVAL (operands[4]);
6613 gcc_assert (IN_RANGE(lane, 0, 7));
6614 if (lane < 4)
6615 {
6616 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4]));
6617 }
6618 else
6619 {
6620 rtx op_highpart = gen_reg_rtx (V4BFmode);
6621 emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3]));
6622 operands[4] = GEN_INT (lane - 4);
6623 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4]));
6624 }
6625 DONE;
6626 }
6627 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6628 )