]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/arm/neon.md
[ARM][GCC][1/x]: MVE ACLE intrinsics framework patch.
[thirdparty/gcc.git] / gcc / config / arm / neon.md
CommitLineData
88f77cba 1;; ARM NEON coprocessor Machine Description
8d9254fc 2;; Copyright (C) 2006-2020 Free Software Foundation, Inc.
88f77cba
JB
3;; Written by CodeSourcery.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
2f83c7d6 9;; the Free Software Foundation; either version 3, or (at your option)
88f77cba
JB
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
2f83c7d6
NC
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
88f77cba 20
88f77cba 21
c956e102 22;; Attribute used to permit string comparisons against <VQH_mnem> in
003bb7f3 23;; type attribute definitions.
c956e102
MS
24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
3eefaaa9
BE
26(define_insn "unaligned_storev8qi"
27 [(set (match_operand:V8QI 0 "memory_operand" "=Un")
28 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")]
29 UNSPEC_UNALIGNED_STORE))]
30 "TARGET_NEON"
31 "*
32 return output_move_neon (operands);
33 "
34 [(set_attr "type" "neon_store1_1reg")])
35
88f77cba 36(define_insn "*neon_mov<mode>"
2e87b2f4 37 [(set (match_operand:VDXMOV 0 "nonimmediate_operand"
e009dfb3 38 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r")
2e87b2f4 39 (match_operand:VDXMOV 1 "general_operand"
e009dfb3 40 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
40f73786
DJ
41 "TARGET_NEON
42 && (register_operand (operands[0], <MODE>mode)
43 || register_operand (operands[1], <MODE>mode))"
88f77cba 44{
e009dfb3 45 if (which_alternative == 2 || which_alternative == 3)
88f77cba
JB
46 {
47 int width, is_valid;
48 static char templ[40];
49
63c8f7d6 50 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
88f77cba
JB
51 &operands[1], &width);
52
53 gcc_assert (is_valid != 0);
54
55 if (width == 0)
56 return "vmov.f32\t%P0, %1 @ <mode>";
57 else
00a3a76a 58 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
88f77cba
JB
59
60 return templ;
61 }
62
88f77cba
JB
63 switch (which_alternative)
64 {
65 case 0: return "vmov\t%P0, %P1 @ <mode>";
e009dfb3
MM
66 case 1: case 4: return output_move_neon (operands);
67 case 2: case 3: gcc_unreachable ();
68 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>";
69 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>";
70 case 9: return "#";
3598da80 71 default: return output_move_double (operands, true, NULL);
88f77cba
JB
72 }
73}
f7379e5e 74 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
e009dfb3
MM
75 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
76 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
77 multiple")
78 (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
79 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*")
80 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*")
81 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
88f77cba
JB
82
83(define_insn "*neon_mov<mode>"
84 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
e009dfb3 85 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us")
88f77cba 86 (match_operand:VQXMOV 1 "general_operand"
e009dfb3 87 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
40f73786
DJ
88 "TARGET_NEON
89 && (register_operand (operands[0], <MODE>mode)
90 || register_operand (operands[1], <MODE>mode))"
88f77cba 91{
e009dfb3 92 if (which_alternative == 2 || which_alternative == 3)
88f77cba
JB
93 {
94 int width, is_valid;
95 static char templ[40];
96
63c8f7d6 97 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
88f77cba
JB
98 &operands[1], &width);
99
100 gcc_assert (is_valid != 0);
101
102 if (width == 0)
103 return "vmov.f32\t%q0, %1 @ <mode>";
104 else
105 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
106
107 return templ;
108 }
109
110 switch (which_alternative)
111 {
112 case 0: return "vmov\t%q0, %q1 @ <mode>";
e009dfb3
MM
113 case 1: case 4: return output_move_neon (operands);
114 case 2: case 3: gcc_unreachable ();
115 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
116 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
88f77cba
JB
117 default: return output_move_quad (operands);
118 }
119}
f7379e5e 120 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
e009dfb3
MM
121 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
122 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
123 (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
124 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
125 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
126 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
88f77cba 127
2a9234e8
TC
128/* We define these mov expanders to match the standard mov$a optab to prevent
129 the mid-end from trying to do a subreg for these modes which is the most
130 inefficient way to expand the move. Also big-endian subreg's aren't
131 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
132 Without these RTL generation patterns the mid-end would attempt to take a
133 sub-reg and may ICE if it can't. */
134
88f77cba 135(define_expand "movti"
cd65e265
DZ
136 [(set (match_operand:TI 0 "nonimmediate_operand")
137 (match_operand:TI 1 "general_operand"))]
88f77cba
JB
138 "TARGET_NEON"
139{
70cdb21e
BE
140 gcc_checking_assert (aligned_operand (operands[0], TImode));
141 gcc_checking_assert (aligned_operand (operands[1], TImode));
40f73786
DJ
142 if (can_create_pseudo_p ())
143 {
d435a4be 144 if (!REG_P (operands[0]))
40f73786
DJ
145 operands[1] = force_reg (TImode, operands[1]);
146 }
88f77cba
JB
147})
148
149(define_expand "mov<mode>"
cd65e265
DZ
150 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
151 (match_operand:VSTRUCT 1 "general_operand"))]
88f77cba
JB
152 "TARGET_NEON"
153{
70cdb21e
BE
154 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
155 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
40f73786
DJ
156 if (can_create_pseudo_p ())
157 {
d435a4be 158 if (!REG_P (operands[0]))
40f73786
DJ
159 operands[1] = force_reg (<MODE>mode, operands[1]);
160 }
88f77cba
JB
161})
162
63c8f7d6
SP
163;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into
164;; two groups. The pattern movv8hf is common for MVE and NEON, so it is moved
165;; into vec-common.md file. Remaining mov expand patterns with half float and
166;; bfloats are implemented below.
2a9234e8 167(define_expand "mov<mode>"
63c8f7d6
SP
168 [(set (match_operand:VHFBF_split 0 "s_register_operand")
169 (match_operand:VHFBF_split 1 "s_register_operand"))]
2a9234e8 170 "TARGET_NEON"
92422235 171{
70cdb21e
BE
172 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
173 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
92422235
CL
174 if (can_create_pseudo_p ())
175 {
176 if (!REG_P (operands[0]))
2a9234e8 177 operands[1] = force_reg (<MODE>mode, operands[1]);
92422235
CL
178 }
179})
180
88f77cba
JB
181(define_insn "*neon_mov<mode>"
182 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
183 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
40f73786
DJ
184 "TARGET_NEON
185 && (register_operand (operands[0], <MODE>mode)
186 || register_operand (operands[1], <MODE>mode))"
88f77cba
JB
187{
188 switch (which_alternative)
189 {
190 case 0: return "#";
191 case 1: case 2: return output_move_neon (operands);
192 default: gcc_unreachable ();
193 }
194}
f7379e5e 195 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
7c4f0041 196 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
88f77cba
JB
197
198(define_split
199 [(set (match_operand:EI 0 "s_register_operand" "")
200 (match_operand:EI 1 "s_register_operand" ""))]
201 "TARGET_NEON && reload_completed"
202 [(set (match_dup 0) (match_dup 1))
203 (set (match_dup 2) (match_dup 3))]
204{
205 int rdest = REGNO (operands[0]);
206 int rsrc = REGNO (operands[1]);
207 rtx dest[2], src[2];
208
209 dest[0] = gen_rtx_REG (TImode, rdest);
210 src[0] = gen_rtx_REG (TImode, rsrc);
211 dest[1] = gen_rtx_REG (DImode, rdest + 4);
212 src[1] = gen_rtx_REG (DImode, rsrc + 4);
213
214 neon_disambiguate_copy (operands, dest, src, 2);
215})
216
217(define_split
218 [(set (match_operand:OI 0 "s_register_operand" "")
219 (match_operand:OI 1 "s_register_operand" ""))]
220 "TARGET_NEON && reload_completed"
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))]
223{
224 int rdest = REGNO (operands[0]);
225 int rsrc = REGNO (operands[1]);
226 rtx dest[2], src[2];
227
228 dest[0] = gen_rtx_REG (TImode, rdest);
229 src[0] = gen_rtx_REG (TImode, rsrc);
230 dest[1] = gen_rtx_REG (TImode, rdest + 4);
231 src[1] = gen_rtx_REG (TImode, rsrc + 4);
232
233 neon_disambiguate_copy (operands, dest, src, 2);
234})
235
236(define_split
237 [(set (match_operand:CI 0 "s_register_operand" "")
238 (match_operand:CI 1 "s_register_operand" ""))]
239 "TARGET_NEON && reload_completed"
240 [(set (match_dup 0) (match_dup 1))
241 (set (match_dup 2) (match_dup 3))
242 (set (match_dup 4) (match_dup 5))]
243{
244 int rdest = REGNO (operands[0]);
245 int rsrc = REGNO (operands[1]);
246 rtx dest[3], src[3];
247
248 dest[0] = gen_rtx_REG (TImode, rdest);
249 src[0] = gen_rtx_REG (TImode, rsrc);
250 dest[1] = gen_rtx_REG (TImode, rdest + 4);
251 src[1] = gen_rtx_REG (TImode, rsrc + 4);
252 dest[2] = gen_rtx_REG (TImode, rdest + 8);
253 src[2] = gen_rtx_REG (TImode, rsrc + 8);
254
255 neon_disambiguate_copy (operands, dest, src, 3);
256})
257
258(define_split
259 [(set (match_operand:XI 0 "s_register_operand" "")
260 (match_operand:XI 1 "s_register_operand" ""))]
261 "TARGET_NEON && reload_completed"
262 [(set (match_dup 0) (match_dup 1))
263 (set (match_dup 2) (match_dup 3))
264 (set (match_dup 4) (match_dup 5))
265 (set (match_dup 6) (match_dup 7))]
266{
267 int rdest = REGNO (operands[0]);
268 int rsrc = REGNO (operands[1]);
269 rtx dest[4], src[4];
270
271 dest[0] = gen_rtx_REG (TImode, rdest);
272 src[0] = gen_rtx_REG (TImode, rsrc);
273 dest[1] = gen_rtx_REG (TImode, rdest + 4);
274 src[1] = gen_rtx_REG (TImode, rsrc + 4);
275 dest[2] = gen_rtx_REG (TImode, rdest + 8);
276 src[2] = gen_rtx_REG (TImode, rsrc + 8);
277 dest[3] = gen_rtx_REG (TImode, rdest + 12);
278 src[3] = gen_rtx_REG (TImode, rsrc + 12);
279
280 neon_disambiguate_copy (operands, dest, src, 4);
281})
282
c452684d 283(define_expand "movmisalign<mode>"
33255ae3
JB
284 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
285 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
c452684d 286 UNSPEC_MISALIGNED_ACCESS))]
869b9125 287 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 288{
3416dd87 289 rtx adjust_mem;
c452684d
JB
290 /* This pattern is not permitted to fail during expansion: if both arguments
291 are non-registers (e.g. memory := constant, which can be created by the
292 auto-vectorizer), force operand 1 into a register. */
293 if (!s_register_operand (operands[0], <MODE>mode)
294 && !s_register_operand (operands[1], <MODE>mode))
295 operands[1] = force_reg (<MODE>mode, operands[1]);
3416dd87
RR
296
297 if (s_register_operand (operands[0], <MODE>mode))
298 adjust_mem = operands[1];
299 else
300 adjust_mem = operands[0];
301
302 /* Legitimize address. */
303 if (!neon_vector_mem_operand (adjust_mem, 2, true))
304 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
305
c452684d
JB
306})
307
308(define_insn "*movmisalign<mode>_neon_store"
33255ae3 309 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
c452684d
JB
310 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
311 UNSPEC_MISALIGNED_ACCESS))]
869b9125 312 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 313 "vst1.<V_sz_elem>\t{%P1}, %A0"
f7379e5e 314 [(set_attr "type" "neon_store1_1reg<q>")])
c452684d
JB
315
316(define_insn "*movmisalign<mode>_neon_load"
33255ae3
JB
317 [(set (match_operand:VDX 0 "s_register_operand" "=w")
318 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
319 " Um")]
c452684d 320 UNSPEC_MISALIGNED_ACCESS))]
869b9125 321 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 322 "vld1.<V_sz_elem>\t{%P0}, %A1"
f7379e5e 323 [(set_attr "type" "neon_load1_1reg<q>")])
c452684d
JB
324
325(define_insn "*movmisalign<mode>_neon_store"
33255ae3 326 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
c452684d
JB
327 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
328 UNSPEC_MISALIGNED_ACCESS))]
869b9125 329 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 330 "vst1.<V_sz_elem>\t{%q1}, %A0"
f7379e5e 331 [(set_attr "type" "neon_store1_1reg<q>")])
c452684d
JB
332
333(define_insn "*movmisalign<mode>_neon_load"
33255ae3
JB
334 [(set (match_operand:VQX 0 "s_register_operand" "=w")
335 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
336 " Um")]
c452684d 337 UNSPEC_MISALIGNED_ACCESS))]
869b9125 338 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 339 "vld1.<V_sz_elem>\t{%q0}, %A1"
b5a26023 340 [(set_attr "type" "neon_load1_1reg<q>")])
c452684d 341
8ba8ebff 342(define_insn "@vec_set<mode>_internal"
92422235
CL
343 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
344 (vec_merge:VD_LANE
345 (vec_duplicate:VD_LANE
058e2674 346 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
92422235 347 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
058e2674 348 (match_operand:SI 2 "immediate_operand" "i,i")))]
88f77cba 349 "TARGET_NEON"
80b8eb11 350{
d19eb620 351 int elt = ffs ((int) INTVAL (operands[2])) - 1;
874d42b9
JM
352 if (BYTES_BIG_ENDIAN)
353 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
354 operands[2] = GEN_INT (elt);
058e2674
UW
355
356 if (which_alternative == 0)
357 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
358 else
359 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
80b8eb11 360}
f7379e5e 361 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
88f77cba 362
8ba8ebff 363(define_insn "@vec_set<mode>_internal"
4b644867
AL
364 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
365 (vec_merge:VQ2
366 (vec_duplicate:VQ2
058e2674 367 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
4b644867 368 (match_operand:VQ2 3 "s_register_operand" "0,0")
058e2674 369 (match_operand:SI 2 "immediate_operand" "i,i")))]
88f77cba
JB
370 "TARGET_NEON"
371{
466e4b7a 372 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
88f77cba 373 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
80b8eb11
JB
374 int elt = elem % half_elts;
375 int hi = (elem / half_elts) * 2;
88f77cba
JB
376 int regno = REGNO (operands[0]);
377
874d42b9
JM
378 if (BYTES_BIG_ENDIAN)
379 elt = half_elts - 1 - elt;
380
88f77cba
JB
381 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
382 operands[2] = GEN_INT (elt);
383
058e2674
UW
384 if (which_alternative == 0)
385 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
386 else
387 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
88f77cba 388}
f7379e5e 389 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
c956e102 390)
88f77cba 391
8ba8ebff
RS
392(define_insn "@vec_set<mode>_internal"
393 [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
394 (vec_merge:V2DI_ONLY
395 (vec_duplicate:V2DI_ONLY
058e2674 396 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
8ba8ebff 397 (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
058e2674 398 (match_operand:SI 2 "immediate_operand" "i,i")))]
88f77cba
JB
399 "TARGET_NEON"
400{
466e4b7a 401 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
80b8eb11 402 int regno = REGNO (operands[0]) + 2 * elem;
88f77cba
JB
403
404 operands[0] = gen_rtx_REG (DImode, regno);
405
058e2674
UW
406 if (which_alternative == 0)
407 return "vld1.64\t%P0, %A1";
408 else
409 return "vmov\t%P0, %Q1, %R1";
88f77cba 410}
f7379e5e 411 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
c956e102 412)
88f77cba 413
80b8eb11 414(define_expand "vec_set<mode>"
cd65e265
DZ
415 [(match_operand:VDQ 0 "s_register_operand")
416 (match_operand:<V_elem> 1 "s_register_operand")
417 (match_operand:SI 2 "immediate_operand")]
80b8eb11
JB
418 "TARGET_NEON"
419{
d4b5c77d 420 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
80b8eb11
JB
421 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
422 GEN_INT (elem), operands[0]));
423 DONE;
424})
425
ff03930a 426(define_insn "vec_extract<mode><V_elem_l>"
058e2674 427 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
88f77cba 428 (vec_select:<V_elem>
92422235 429 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
058e2674 430 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
88f77cba 431 "TARGET_NEON"
874d42b9
JM
432{
433 if (BYTES_BIG_ENDIAN)
434 {
435 int elt = INTVAL (operands[2]);
436 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
437 operands[2] = GEN_INT (elt);
438 }
058e2674
UW
439
440 if (which_alternative == 0)
441 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
442 else
443 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
874d42b9 444}
f7379e5e 445 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
c956e102 446)
88f77cba 447
ff03930a 448(define_insn "vec_extract<mode><V_elem_l>"
058e2674 449 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
88f77cba 450 (vec_select:<V_elem>
4b644867 451 (match_operand:VQ2 1 "s_register_operand" "w,w")
058e2674 452 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
88f77cba
JB
453 "TARGET_NEON"
454{
455 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
456 int elt = INTVAL (operands[2]) % half_elts;
457 int hi = (INTVAL (operands[2]) / half_elts) * 2;
458 int regno = REGNO (operands[1]);
459
874d42b9
JM
460 if (BYTES_BIG_ENDIAN)
461 elt = half_elts - 1 - elt;
462
88f77cba
JB
463 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
464 operands[2] = GEN_INT (elt);
465
058e2674
UW
466 if (which_alternative == 0)
467 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
468 else
469 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
88f77cba 470}
f7379e5e 471 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
c956e102 472)
88f77cba 473
ff03930a 474(define_insn "vec_extractv2didi"
058e2674 475 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
88f77cba 476 (vec_select:DI
058e2674
UW
477 (match_operand:V2DI 1 "s_register_operand" "w,w")
478 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
88f77cba
JB
479 "TARGET_NEON"
480{
8c98c2a6 481 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
88f77cba
JB
482
483 operands[1] = gen_rtx_REG (DImode, regno);
484
058e2674
UW
485 if (which_alternative == 0)
486 return "vst1.64\t{%P1}, %A0 @ v2di";
487 else
488 return "vmov\t%Q0, %R0, %P1 @ v2di";
88f77cba 489}
f7379e5e 490 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
c956e102 491)
88f77cba 492
ff03930a 493(define_expand "vec_init<mode><V_elem_l>"
cd65e265 494 [(match_operand:VDQ 0 "s_register_operand")
88f77cba 495 (match_operand 1 "" "")]
63c8f7d6 496 "TARGET_NEON || TARGET_HAVE_MVE"
88f77cba
JB
497{
498 neon_expand_vector_init (operands[0], operands[1]);
499 DONE;
500})
501
502;; Doubleword and quadword arithmetic.
503
bab53516
SL
504;; NOTE: some other instructions also support 64-bit integer
505;; element size, which we could potentially use for "long long" operations.
88f77cba
JB
506
507(define_insn "*add<mode>3_neon"
508 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
509 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
510 (match_operand:VDQ 2 "s_register_operand" "w")))]
400cfcf5 511 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
c956e102 512 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 513 [(set (attr "type")
b75b1be2 514 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
515 (const_string "neon_fp_addsub_s<q>")
516 (const_string "neon_add<q>")))]
c956e102 517)
88f77cba 518
6da37857
MW
519;; As with SFmode, full support for HFmode vector arithmetic is only available
520;; when flag-unsafe-math-optimizations is enabled.
521
522(define_insn "add<mode>3"
523 [(set
524 (match_operand:VH 0 "s_register_operand" "=w")
525 (plus:VH
526 (match_operand:VH 1 "s_register_operand" "w")
527 (match_operand:VH 2 "s_register_operand" "w")))]
528 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
529 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
530 [(set (attr "type")
531 (if_then_else (match_test "<Is_float_mode>")
532 (const_string "neon_fp_addsub_s<q>")
533 (const_string "neon_add<q>")))]
534)
535
55a9b91b
MW
536(define_insn "add<mode>3_fp16"
537 [(set
538 (match_operand:VH 0 "s_register_operand" "=w")
539 (plus:VH
540 (match_operand:VH 1 "s_register_operand" "w")
541 (match_operand:VH 2 "s_register_operand" "w")))]
542 "TARGET_NEON_FP16INST"
543 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
544 [(set (attr "type")
545 (if_then_else (match_test "<Is_float_mode>")
546 (const_string "neon_fp_addsub_s<q>")
547 (const_string "neon_add<q>")))]
548)
549
88f77cba
JB
550(define_insn "*sub<mode>3_neon"
551 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
552 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
553 (match_operand:VDQ 2 "s_register_operand" "w")))]
400cfcf5 554 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
c956e102 555 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 556 [(set (attr "type")
b75b1be2 557 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
558 (const_string "neon_fp_addsub_s<q>")
559 (const_string "neon_sub<q>")))]
c956e102 560)
88f77cba 561
6da37857
MW
562(define_insn "sub<mode>3"
563 [(set
564 (match_operand:VH 0 "s_register_operand" "=w")
565 (minus:VH
566 (match_operand:VH 1 "s_register_operand" "w")
567 (match_operand:VH 2 "s_register_operand" "w")))]
568 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
569 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
570 [(set_attr "type" "neon_sub<q>")]
571)
572
55a9b91b
MW
573(define_insn "sub<mode>3_fp16"
574 [(set
575 (match_operand:VH 0 "s_register_operand" "=w")
576 (minus:VH
577 (match_operand:VH 1 "s_register_operand" "w")
578 (match_operand:VH 2 "s_register_operand" "w")))]
579 "TARGET_NEON_FP16INST"
580 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
581 [(set_attr "type" "neon_sub<q>")]
582)
583
88f77cba 584(define_insn "*mul<mode>3_neon"
f7379e5e
JG
585 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
586 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
587 (match_operand:VDQW 2 "s_register_operand" "w")))]
400cfcf5 588 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
c956e102 589 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 590 [(set (attr "type")
b75b1be2 591 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
592 (const_string "neon_fp_mul_s<q>")
593 (const_string "neon_mul_<V_elem_ch><q>")))]
c956e102 594)
88f77cba 595
536ecfc4
PK
596/* Perform division using multiply-by-reciprocal.
597 Reciprocal is calculated using Newton-Raphson method.
598 Enabled with -funsafe-math-optimizations -freciprocal-math
599 and disabled for -Os since it increases code size . */
600
601(define_expand "div<mode>3"
cd65e265
DZ
602 [(set (match_operand:VCVTF 0 "s_register_operand")
603 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
604 (match_operand:VCVTF 2 "s_register_operand")))]
536ecfc4
PK
605 "TARGET_NEON && !optimize_size
606 && flag_reciprocal_math"
607 {
608 rtx rec = gen_reg_rtx (<MODE>mode);
609 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
610
611 /* Reciprocal estimate. */
612 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
613
614 /* Perform 2 iterations of newton-raphson method. */
615 for (int i = 0; i < 2; i++)
616 {
617 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
618 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
619 }
620
621 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
622 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
623 DONE;
624 }
625)
626
627
bab53516 628(define_insn "mul<mode>3add<mode>_neon"
f7379e5e
JG
629 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
630 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
631 (match_operand:VDQW 3 "s_register_operand" "w"))
632 (match_operand:VDQW 1 "s_register_operand" "0")))]
400cfcf5 633 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1ea9fe56 634 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
003bb7f3 635 [(set (attr "type")
b75b1be2 636 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
637 (const_string "neon_fp_mla_s<q>")
638 (const_string "neon_mla_<V_elem_ch><q>")))]
1ea9fe56
MM
639)
640
55a9b91b
MW
641(define_insn "mul<mode>3add<mode>_neon"
642 [(set (match_operand:VH 0 "s_register_operand" "=w")
643 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
644 (match_operand:VH 3 "s_register_operand" "w"))
645 (match_operand:VH 1 "s_register_operand" "0")))]
646 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
647 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
648 [(set_attr "type" "neon_fp_mla_s<q>")]
649)
650
bab53516 651(define_insn "mul<mode>3neg<mode>add<mode>_neon"
f7379e5e
JG
652 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
653 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
654 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
655 (match_operand:VDQW 3 "s_register_operand" "w"))))]
400cfcf5 656 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1ea9fe56 657 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
003bb7f3 658 [(set (attr "type")
b75b1be2 659 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
660 (const_string "neon_fp_mla_s<q>")
661 (const_string "neon_mla_<V_elem_ch><q>")))]
1ea9fe56
MM
662)
663
8b2ab9cb 664;; Fused multiply-accumulate
c4216388
MGD
665;; We define each insn twice here:
666;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
667;; to be able to use when converting to FMA.
668;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
8b2ab9cb
RR
669(define_insn "fma<VCVTF:mode>4"
670 [(set (match_operand:VCVTF 0 "register_operand" "=w")
671 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
672 (match_operand:VCVTF 2 "register_operand" "w")
673 (match_operand:VCVTF 3 "register_operand" "0")))]
674 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
e60226ff 675 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 676 [(set_attr "type" "neon_fp_mla_s<q>")]
8b2ab9cb
RR
677)
678
c4216388
MGD
679(define_insn "fma<VCVTF:mode>4_intrinsic"
680 [(set (match_operand:VCVTF 0 "register_operand" "=w")
681 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
682 (match_operand:VCVTF 2 "register_operand" "w")
683 (match_operand:VCVTF 3 "register_operand" "0")))]
684 "TARGET_NEON && TARGET_FMA"
e60226ff 685 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 686 [(set_attr "type" "neon_fp_mla_s<q>")]
c4216388
MGD
687)
688
6da37857
MW
689(define_insn "fma<VH:mode>4"
690 [(set (match_operand:VH 0 "register_operand" "=w")
691 (fma:VH
692 (match_operand:VH 1 "register_operand" "w")
693 (match_operand:VH 2 "register_operand" "w")
694 (match_operand:VH 3 "register_operand" "0")))]
695 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
696 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
697 [(set_attr "type" "neon_fp_mla_s<q>")]
698)
699
55a9b91b
MW
700(define_insn "fma<VH:mode>4_intrinsic"
701 [(set (match_operand:VH 0 "register_operand" "=w")
702 (fma:VH
703 (match_operand:VH 1 "register_operand" "w")
704 (match_operand:VH 2 "register_operand" "w")
705 (match_operand:VH 3 "register_operand" "0")))]
706 "TARGET_NEON_FP16INST"
707 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
708 [(set_attr "type" "neon_fp_mla_s<q>")]
709)
710
8b2ab9cb
RR
711(define_insn "*fmsub<VCVTF:mode>4"
712 [(set (match_operand:VCVTF 0 "register_operand" "=w")
713 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
714 (match_operand:VCVTF 2 "register_operand" "w")
715 (match_operand:VCVTF 3 "register_operand" "0")))]
716 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
e60226ff 717 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 718 [(set_attr "type" "neon_fp_mla_s<q>")]
8b2ab9cb
RR
719)
720
c4216388 721(define_insn "fmsub<VCVTF:mode>4_intrinsic"
55a9b91b
MW
722 [(set (match_operand:VCVTF 0 "register_operand" "=w")
723 (fma:VCVTF
724 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
725 (match_operand:VCVTF 2 "register_operand" "w")
726 (match_operand:VCVTF 3 "register_operand" "0")))]
727 "TARGET_NEON && TARGET_FMA"
e60226ff 728 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
55a9b91b
MW
729 [(set_attr "type" "neon_fp_mla_s<q>")]
730)
731
732(define_insn "fmsub<VH:mode>4_intrinsic"
733 [(set (match_operand:VH 0 "register_operand" "=w")
734 (fma:VH
735 (neg:VH (match_operand:VH 1 "register_operand" "w"))
736 (match_operand:VH 2 "register_operand" "w")
737 (match_operand:VH 3 "register_operand" "0")))]
738 "TARGET_NEON_FP16INST"
739 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
740 [(set_attr "type" "neon_fp_mla_s<q>")]
c4216388
MGD
741)
742
79739965
KT
743(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
744 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
745 (unspec:VCVTF [(match_operand:VCVTF 1
746 "s_register_operand" "w")]
747 NEON_VRINT))]
c8d61ab8 748 "TARGET_NEON && TARGET_VFP5"
e60226ff 749 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
f7379e5e 750 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
79739965
KT
751)
752
e9e67af1
KT
753(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
754 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
755 (FIXUORS:<V_cmp_result> (unspec:VCVTF
756 [(match_operand:VCVTF 1 "register_operand" "w")]
757 NEON_VCVT)))]
c8d61ab8 758 "TARGET_NEON && TARGET_VFP5"
e9e67af1
KT
759 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
760 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
761 (set_attr "predicable" "no")]
762)
763
88f77cba
JB
764(define_insn "ior<mode>3"
765 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
766 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
767 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
768 "TARGET_NEON"
769{
770 switch (which_alternative)
771 {
772 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
773 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
774 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
775 default: gcc_unreachable ();
776 }
c956e102 777}
f7379e5e 778 [(set_attr "type" "neon_logic<q>")]
c956e102 779)
88f77cba 780
88f77cba
JB
781;; The concrete forms of the Neon immediate-logic instructions are vbic and
782;; vorr. We support the pseudo-instruction vand instead, because that
783;; corresponds to the canonical form the middle-end expects to use for
784;; immediate bitwise-ANDs.
785
786(define_insn "and<mode>3"
787 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
788 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
789 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
790 "TARGET_NEON"
791{
792 switch (which_alternative)
793 {
794 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
795 case 1: return neon_output_logic_immediate ("vand", &operands[2],
796 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
797 default: gcc_unreachable ();
798 }
c956e102 799}
f7379e5e 800 [(set_attr "type" "neon_logic<q>")]
c956e102 801)
88f77cba 802
88f77cba
JB
803(define_insn "orn<mode>3_neon"
804 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
50fed7bf
RR
805 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
806 (match_operand:VDQ 1 "s_register_operand" "w")))]
88f77cba 807 "TARGET_NEON"
c956e102 808 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 809 [(set_attr "type" "neon_logic<q>")]
c956e102 810)
88f77cba 811
88f77cba
JB
812(define_insn "bic<mode>3_neon"
813 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
50fed7bf
RR
814 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
815 (match_operand:VDQ 1 "s_register_operand" "w")))]
88f77cba 816 "TARGET_NEON"
c956e102 817 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 818 [(set_attr "type" "neon_logic<q>")]
c956e102 819)
88f77cba 820
88f77cba
JB
821(define_insn "xor<mode>3"
822 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
823 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
824 (match_operand:VDQ 2 "s_register_operand" "w")))]
825 "TARGET_NEON"
c956e102 826 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 827 [(set_attr "type" "neon_logic<q>")]
c956e102 828)
88f77cba 829
88f77cba
JB
830(define_insn "one_cmpl<mode>2"
831 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
832 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
833 "TARGET_NEON"
c956e102 834 "vmvn\t%<V_reg>0, %<V_reg>1"
f7379e5e 835 [(set_attr "type" "neon_move<q>")]
c956e102 836)
88f77cba
JB
837
838(define_insn "abs<mode>2"
839 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
840 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
841 "TARGET_NEON"
c956e102 842 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
003bb7f3 843 [(set (attr "type")
b75b1be2 844 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
845 (const_string "neon_fp_abs_s<q>")
846 (const_string "neon_abs<q>")))]
c956e102 847)
88f77cba
JB
848
849(define_insn "neg<mode>2"
850 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
851 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
852 "TARGET_NEON"
c956e102 853 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
003bb7f3 854 [(set (attr "type")
b75b1be2 855 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
856 (const_string "neon_fp_neg_s<q>")
857 (const_string "neon_neg<q>")))]
c956e102 858)
88f77cba 859
55a9b91b
MW
860(define_insn "<absneg_str><mode>2"
861 [(set (match_operand:VH 0 "s_register_operand" "=w")
862 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
863 "TARGET_NEON_FP16INST"
864 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
865 [(set_attr "type" "neon_abs<q>")]
866)
867
868(define_expand "neon_v<absneg_str><mode>"
869 [(set
870 (match_operand:VH 0 "s_register_operand")
871 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
872 "TARGET_NEON_FP16INST"
873{
874 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
875 DONE;
876})
877
878(define_insn "neon_v<fp16_rnd_str><mode>"
879 [(set (match_operand:VH 0 "s_register_operand" "=w")
880 (unspec:VH
881 [(match_operand:VH 1 "s_register_operand" "w")]
882 FP16_RND))]
883 "TARGET_NEON_FP16INST"
884 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
885 [(set_attr "type" "neon_fp_round_s<q>")]
886)
887
888(define_insn "neon_vrsqrte<mode>"
889 [(set (match_operand:VH 0 "s_register_operand" "=w")
890 (unspec:VH
891 [(match_operand:VH 1 "s_register_operand" "w")]
892 UNSPEC_VRSQRTE))]
893 "TARGET_NEON_FP16INST"
894 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
895 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
896)
897
88f77cba
JB
898(define_insn "*umin<mode>3_neon"
899 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
900 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
901 (match_operand:VDQIW 2 "s_register_operand" "w")))]
902 "TARGET_NEON"
c956e102 903 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 904 [(set_attr "type" "neon_minmax<q>")]
c956e102 905)
88f77cba
JB
906
907(define_insn "*umax<mode>3_neon"
908 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
909 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
910 (match_operand:VDQIW 2 "s_register_operand" "w")))]
911 "TARGET_NEON"
c956e102 912 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 913 [(set_attr "type" "neon_minmax<q>")]
c956e102 914)
88f77cba
JB
915
916(define_insn "*smin<mode>3_neon"
917 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
918 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
919 (match_operand:VDQW 2 "s_register_operand" "w")))]
920 "TARGET_NEON"
c956e102 921 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 922 [(set (attr "type")
b75b1be2 923 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
924 (const_string "neon_fp_minmax_s<q>")
925 (const_string "neon_minmax<q>")))]
c956e102 926)
88f77cba
JB
927
928(define_insn "*smax<mode>3_neon"
929 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
930 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
931 (match_operand:VDQW 2 "s_register_operand" "w")))]
932 "TARGET_NEON"
c956e102 933 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 934 [(set (attr "type")
b75b1be2 935 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
936 (const_string "neon_fp_minmax_s<q>")
937 (const_string "neon_minmax<q>")))]
c956e102 938)
88f77cba
JB
939
940; TODO: V2DI shifts are current disabled because there are bugs in the
941; generic vectorizer code. It ends up creating a V2DI constructor with
942; SImode elements.
943
d44463a9 944(define_insn "vashl<mode>3"
31a0c825
DP
945 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
946 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
e009dfb3 947 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
31a0c825
DP
948 "TARGET_NEON"
949 {
950 switch (which_alternative)
951 {
952 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
953 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
954 <MODE>mode,
955 VALID_NEON_QREG_MODE (<MODE>mode),
956 true);
957 default: gcc_unreachable ();
958 }
959 }
f7379e5e 960 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
31a0c825
DP
961)
962
963(define_insn "vashr<mode>3_imm"
88f77cba 964 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
31a0c825 965 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
e009dfb3 966 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
88f77cba 967 "TARGET_NEON"
31a0c825
DP
968 {
969 return neon_output_shift_immediate ("vshr", 's', &operands[2],
970 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
971 false);
972 }
f7379e5e 973 [(set_attr "type" "neon_shift_imm<q>")]
c956e102 974)
88f77cba 975
31a0c825
DP
976(define_insn "vlshr<mode>3_imm"
977 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
978 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
e009dfb3 979 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
31a0c825
DP
980 "TARGET_NEON"
981 {
982 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
983 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
984 false);
985 }
f7379e5e 986 [(set_attr "type" "neon_shift_imm<q>")]
31a0c825
DP
987)
988
88f77cba
JB
989; Used for implementing logical shift-right, which is a left-shift by a negative
990; amount, with signed operands. This is essentially the same as ashl<mode>3
991; above, but using an unspec in case GCC tries anything tricky with negative
992; shift amounts.
993
994(define_insn "ashl<mode>3_signed"
995 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
996 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
997 (match_operand:VDQI 2 "s_register_operand" "w")]
998 UNSPEC_ASHIFT_SIGNED))]
999 "TARGET_NEON"
c956e102 1000 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 1001 [(set_attr "type" "neon_shift_reg<q>")]
c956e102 1002)
88f77cba
JB
1003
1004; Used for implementing logical shift-right, which is a left-shift by a negative
1005; amount, with unsigned operands.
1006
1007(define_insn "ashl<mode>3_unsigned"
1008 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1009 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1010 (match_operand:VDQI 2 "s_register_operand" "w")]
1011 UNSPEC_ASHIFT_UNSIGNED))]
1012 "TARGET_NEON"
c956e102 1013 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 1014 [(set_attr "type" "neon_shift_reg<q>")]
c956e102 1015)
88f77cba 1016
d44463a9 1017(define_expand "vashr<mode>3"
cd65e265
DZ
1018 [(set (match_operand:VDQIW 0 "s_register_operand")
1019 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1020 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
88f77cba
JB
1021 "TARGET_NEON"
1022{
56b15099 1023 if (s_register_operand (operands[2], <MODE>mode))
31a0c825 1024 {
56b15099 1025 rtx neg = gen_reg_rtx (<MODE>mode);
31a0c825
DP
1026 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1027 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1028 }
1029 else
1030 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
88f77cba
JB
1031 DONE;
1032})
1033
d44463a9 1034(define_expand "vlshr<mode>3"
cd65e265
DZ
1035 [(set (match_operand:VDQIW 0 "s_register_operand")
1036 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1037 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
88f77cba
JB
1038 "TARGET_NEON"
1039{
56b15099 1040 if (s_register_operand (operands[2], <MODE>mode))
31a0c825 1041 {
56b15099 1042 rtx neg = gen_reg_rtx (<MODE>mode);
31a0c825
DP
1043 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1044 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1045 }
1046 else
1047 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
88f77cba
JB
1048 DONE;
1049})
1050
3f2dc806
AS
1051;; 64-bit shifts
1052
1053;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1054;; leaving the upper half uninitalized. This is OK since the shift
1055;; instruction only looks at the low 8 bits anyway. To avoid confusing
1056;; data flow analysis however, we pretend the full register is set
1057;; using an unspec.
1058(define_insn "neon_load_count"
1059 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1060 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1061 UNSPEC_LOAD_COUNT))]
1062 "TARGET_NEON"
1063 "@
1064 vld1.32\t{%P0[0]}, %A1
1065 vmov.32\t%P0[0], %1"
f7379e5e 1066 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
3f2dc806
AS
1067)
1068
88f77cba
JB
1069;; Widening operations
1070
93c590ee 1071(define_expand "widen_ssum<mode>3"
cd65e265 1072 [(set (match_operand:<V_double_width> 0 "s_register_operand")
93c590ee
MC
1073 (plus:<V_double_width>
1074 (sign_extend:<V_double_width>
cd65e265
DZ
1075 (match_operand:VQI 1 "s_register_operand"))
1076 (match_operand:<V_double_width> 2 "s_register_operand")))]
93c590ee
MC
1077 "TARGET_NEON"
1078 {
1079 machine_mode mode = GET_MODE (operands[1]);
1080 rtx p1, p2;
1081
1082 p1 = arm_simd_vect_par_cnst_half (mode, false);
1083 p2 = arm_simd_vect_par_cnst_half (mode, true);
1084
1085 if (operands[0] != operands[2])
1086 emit_move_insn (operands[0], operands[2]);
1087
1088 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1089 operands[1],
1090 p1,
1091 operands[0]));
1092 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1093 operands[1],
1094 p2,
1095 operands[0]));
1096 DONE;
1097 }
1098)
1099
b8c36603
KT
1100(define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1101 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1102 (plus:<V_double_width>
1103 (sign_extend:<V_double_width>
1104 (vec_select:<V_HALF>
93c590ee
MC
1105 (match_operand:VQI 1 "s_register_operand" "%w")
1106 (match_operand:VQI 2 "vect_par_constant_low" "")))
b8c36603 1107 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
93c590ee
MC
1108 "TARGET_NEON"
1109{
1110 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1111 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1112}
1113 [(set_attr "type" "neon_add_widen")])
1114
b8c36603
KT
1115(define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1116 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1117 (plus:<V_double_width>
1118 (sign_extend:<V_double_width>
1119 (vec_select:<V_HALF>
1120 (match_operand:VQI 1 "s_register_operand" "%w")
93c590ee 1121 (match_operand:VQI 2 "vect_par_constant_high" "")))
b8c36603 1122 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
93c590ee
MC
1123 "TARGET_NEON"
1124{
1125 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1126 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1127}
1128 [(set_attr "type" "neon_add_widen")])
1129
88f77cba
JB
1130(define_insn "widen_ssum<mode>3"
1131 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
93c590ee
MC
1132 (plus:<V_widen>
1133 (sign_extend:<V_widen>
1134 (match_operand:VW 1 "s_register_operand" "%w"))
1135 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
88f77cba 1136 "TARGET_NEON"
c956e102 1137 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
f7379e5e 1138 [(set_attr "type" "neon_add_widen")]
c956e102 1139)
88f77cba 1140
93c590ee 1141(define_expand "widen_usum<mode>3"
cd65e265 1142 [(set (match_operand:<V_double_width> 0 "s_register_operand")
93c590ee
MC
1143 (plus:<V_double_width>
1144 (zero_extend:<V_double_width>
cd65e265
DZ
1145 (match_operand:VQI 1 "s_register_operand"))
1146 (match_operand:<V_double_width> 2 "s_register_operand")))]
93c590ee
MC
1147 "TARGET_NEON"
1148 {
1149 machine_mode mode = GET_MODE (operands[1]);
1150 rtx p1, p2;
1151
1152 p1 = arm_simd_vect_par_cnst_half (mode, false);
1153 p2 = arm_simd_vect_par_cnst_half (mode, true);
1154
1155 if (operands[0] != operands[2])
1156 emit_move_insn (operands[0], operands[2]);
1157
1158 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1159 operands[1],
1160 p1,
1161 operands[0]));
1162 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1163 operands[1],
1164 p2,
1165 operands[0]));
1166 DONE;
1167 }
1168)
1169
b8c36603
KT
1170(define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1171 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1172 (plus:<V_double_width>
1173 (zero_extend:<V_double_width>
1174 (vec_select:<V_HALF>
93c590ee
MC
1175 (match_operand:VQI 1 "s_register_operand" "%w")
1176 (match_operand:VQI 2 "vect_par_constant_low" "")))
b8c36603 1177 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
93c590ee
MC
1178 "TARGET_NEON"
1179{
1180 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1181 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1182}
1183 [(set_attr "type" "neon_add_widen")])
1184
b8c36603
KT
1185(define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1186 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1187 (plus:<V_double_width>
1188 (zero_extend:<V_double_width>
1189 (vec_select:<V_HALF>
1190 (match_operand:VQI 1 "s_register_operand" "%w")
93c590ee 1191 (match_operand:VQI 2 "vect_par_constant_high" "")))
b8c36603 1192 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
93c590ee
MC
1193 "TARGET_NEON"
1194{
1195 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1196 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1197}
1198 [(set_attr "type" "neon_add_widen")])
1199
88f77cba
JB
1200(define_insn "widen_usum<mode>3"
1201 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1202 (plus:<V_widen> (zero_extend:<V_widen>
1203 (match_operand:VW 1 "s_register_operand" "%w"))
1204 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1205 "TARGET_NEON"
c956e102 1206 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
f7379e5e 1207 [(set_attr "type" "neon_add_widen")]
c956e102 1208)
88f77cba 1209
88f77cba
JB
1210;; Helpers for quad-word reduction operations
1211
1212; Add (or smin, smax...) the low N/2 elements of the N-element vector
1213; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1214; N/2-element vector.
1215
1216(define_insn "quad_halves_<code>v4si"
1217 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
728dc153 1218 (VQH_OPS:V2SI
88f77cba
JB
1219 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1220 (parallel [(const_int 0) (const_int 1)]))
1221 (vec_select:V2SI (match_dup 1)
1222 (parallel [(const_int 2) (const_int 3)]))))]
1223 "TARGET_NEON"
c956e102
MS
1224 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1225 [(set_attr "vqh_mnem" "<VQH_mnem>")
f7379e5e 1226 (set_attr "type" "neon_reduc_<VQH_type>_q")]
c956e102 1227)
88f77cba
JB
1228
1229(define_insn "quad_halves_<code>v4sf"
1230 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
728dc153 1231 (VQHS_OPS:V2SF
88f77cba
JB
1232 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1233 (parallel [(const_int 0) (const_int 1)]))
1234 (vec_select:V2SF (match_dup 1)
1235 (parallel [(const_int 2) (const_int 3)]))))]
400cfcf5 1236 "TARGET_NEON && flag_unsafe_math_optimizations"
c956e102
MS
1237 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1238 [(set_attr "vqh_mnem" "<VQH_mnem>")
f7379e5e 1239 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
c956e102 1240)
88f77cba
JB
1241
1242(define_insn "quad_halves_<code>v8hi"
1243 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
728dc153 1244 (VQH_OPS:V4HI
88f77cba
JB
1245 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1246 (parallel [(const_int 0) (const_int 1)
1247 (const_int 2) (const_int 3)]))
1248 (vec_select:V4HI (match_dup 1)
1249 (parallel [(const_int 4) (const_int 5)
1250 (const_int 6) (const_int 7)]))))]
1251 "TARGET_NEON"
c956e102
MS
1252 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1253 [(set_attr "vqh_mnem" "<VQH_mnem>")
f7379e5e 1254 (set_attr "type" "neon_reduc_<VQH_type>_q")]
c956e102 1255)
88f77cba
JB
1256
1257(define_insn "quad_halves_<code>v16qi"
1258 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
728dc153 1259 (VQH_OPS:V8QI
88f77cba
JB
1260 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1261 (parallel [(const_int 0) (const_int 1)
1262 (const_int 2) (const_int 3)
1263 (const_int 4) (const_int 5)
1264 (const_int 6) (const_int 7)]))
1265 (vec_select:V8QI (match_dup 1)
1266 (parallel [(const_int 8) (const_int 9)
1267 (const_int 10) (const_int 11)
1268 (const_int 12) (const_int 13)
1269 (const_int 14) (const_int 15)]))))]
1270 "TARGET_NEON"
c956e102
MS
1271 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1272 [(set_attr "vqh_mnem" "<VQH_mnem>")
f7379e5e 1273 (set_attr "type" "neon_reduc_<VQH_type>_q")]
c956e102 1274)
88f77cba 1275
0f38f229 1276(define_expand "move_hi_quad_<mode>"
cd65e265
DZ
1277 [(match_operand:ANY128 0 "s_register_operand")
1278 (match_operand:<V_HALF> 1 "s_register_operand")]
0f38f229 1279 "TARGET_NEON"
88f77cba 1280{
d92aed06
RS
1281 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1282 GET_MODE_SIZE (<V_HALF>mode)),
1283 operands[1]);
0f38f229
TB
1284 DONE;
1285})
1286
1287(define_expand "move_lo_quad_<mode>"
cd65e265
DZ
1288 [(match_operand:ANY128 0 "s_register_operand")
1289 (match_operand:<V_HALF> 1 "s_register_operand")]
0f38f229 1290 "TARGET_NEON"
88f77cba 1291{
d92aed06
RS
1292 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1293 <MODE>mode, 0),
1294 operands[1]);
0f38f229
TB
1295 DONE;
1296})
88f77cba
JB
1297
1298;; Reduction operations
1299
89edc986 1300(define_expand "reduc_plus_scal_<mode>"
cd65e265
DZ
1301 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1302 (match_operand:VD 1 "s_register_operand")]
400cfcf5 1303 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
88f77cba 1304{
89edc986
AL
1305 rtx vec = gen_reg_rtx (<MODE>mode);
1306 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1307 &gen_neon_vpadd_internal<mode>);
89edc986 1308 /* The same result is actually computed into every element. */
ff03930a 1309 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1310 DONE;
1311})
1312
89edc986 1313(define_expand "reduc_plus_scal_<mode>"
cd65e265
DZ
1314 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1315 (match_operand:VQ 1 "s_register_operand")]
0094f21b
JB
1316 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1317 && !BYTES_BIG_ENDIAN"
88f77cba
JB
1318{
1319 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1320
1321 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
89edc986 1322 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
88f77cba
JB
1323
1324 DONE;
1325})
1326
89edc986 1327(define_expand "reduc_plus_scal_v2di"
cd65e265
DZ
1328 [(match_operand:DI 0 "nonimmediate_operand")
1329 (match_operand:V2DI 1 "s_register_operand")]
89edc986
AL
1330 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1331{
1332 rtx vec = gen_reg_rtx (V2DImode);
1333
1334 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
ff03930a 1335 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
89edc986
AL
1336
1337 DONE;
1338})
1339
1340(define_insn "arm_reduc_plus_internal_v2di"
88f77cba
JB
1341 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1342 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1343 UNSPEC_VPADD))]
0094f21b 1344 "TARGET_NEON && !BYTES_BIG_ENDIAN"
c956e102 1345 "vadd.i64\t%e0, %e1, %f1"
f7379e5e 1346 [(set_attr "type" "neon_add_q")]
c956e102 1347)
88f77cba 1348
f5dcbee1 1349(define_expand "reduc_smin_scal_<mode>"
cd65e265
DZ
1350 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1351 (match_operand:VD 1 "s_register_operand")]
400cfcf5 1352 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
88f77cba 1353{
f5dcbee1
AL
1354 rtx vec = gen_reg_rtx (<MODE>mode);
1355
1356 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1357 &gen_neon_vpsmin<mode>);
f5dcbee1 1358 /* The result is computed into every element of the vector. */
ff03930a 1359 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1360 DONE;
1361})
1362
f5dcbee1 1363(define_expand "reduc_smin_scal_<mode>"
cd65e265
DZ
1364 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1365 (match_operand:VQ 1 "s_register_operand")]
0094f21b
JB
1366 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1367 && !BYTES_BIG_ENDIAN"
88f77cba
JB
1368{
1369 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1370
1371 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
f5dcbee1 1372 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
88f77cba
JB
1373
1374 DONE;
1375})
1376
f5dcbee1 1377(define_expand "reduc_smax_scal_<mode>"
cd65e265
DZ
1378 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1379 (match_operand:VD 1 "s_register_operand")]
400cfcf5 1380 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
88f77cba 1381{
f5dcbee1
AL
1382 rtx vec = gen_reg_rtx (<MODE>mode);
1383 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1384 &gen_neon_vpsmax<mode>);
f5dcbee1 1385 /* The result is computed into every element of the vector. */
ff03930a 1386 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1387 DONE;
1388})
1389
f5dcbee1 1390(define_expand "reduc_smax_scal_<mode>"
cd65e265
DZ
1391 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1392 (match_operand:VQ 1 "s_register_operand")]
0094f21b
JB
1393 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1394 && !BYTES_BIG_ENDIAN"
88f77cba
JB
1395{
1396 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1397
1398 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
f5dcbee1 1399 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
88f77cba
JB
1400
1401 DONE;
1402})
1403
f5dcbee1 1404(define_expand "reduc_umin_scal_<mode>"
cd65e265
DZ
1405 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1406 (match_operand:VDI 1 "s_register_operand")]
88f77cba
JB
1407 "TARGET_NEON"
1408{
f5dcbee1
AL
1409 rtx vec = gen_reg_rtx (<MODE>mode);
1410 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1411 &gen_neon_vpumin<mode>);
f5dcbee1 1412 /* The result is computed into every element of the vector. */
ff03930a 1413 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1414 DONE;
1415})
1416
f5dcbee1 1417(define_expand "reduc_umin_scal_<mode>"
cd65e265
DZ
1418 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1419 (match_operand:VQI 1 "s_register_operand")]
0094f21b 1420 "TARGET_NEON && !BYTES_BIG_ENDIAN"
88f77cba
JB
1421{
1422 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1423
1424 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
f5dcbee1 1425 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
88f77cba
JB
1426
1427 DONE;
1428})
1429
f5dcbee1 1430(define_expand "reduc_umax_scal_<mode>"
cd65e265
DZ
1431 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1432 (match_operand:VDI 1 "s_register_operand")]
88f77cba
JB
1433 "TARGET_NEON"
1434{
f5dcbee1
AL
1435 rtx vec = gen_reg_rtx (<MODE>mode);
1436 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1437 &gen_neon_vpumax<mode>);
f5dcbee1 1438 /* The result is computed into every element of the vector. */
ff03930a 1439 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1440 DONE;
1441})
1442
f5dcbee1 1443(define_expand "reduc_umax_scal_<mode>"
cd65e265
DZ
1444 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1445 (match_operand:VQI 1 "s_register_operand")]
0094f21b 1446 "TARGET_NEON && !BYTES_BIG_ENDIAN"
88f77cba
JB
1447{
1448 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1449
1450 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
f5dcbee1 1451 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
88f77cba
JB
1452
1453 DONE;
1454})
1455
1456(define_insn "neon_vpadd_internal<mode>"
1457 [(set (match_operand:VD 0 "s_register_operand" "=w")
1458 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1459 (match_operand:VD 2 "s_register_operand" "w")]
1460 UNSPEC_VPADD))]
1461 "TARGET_NEON"
c956e102
MS
1462 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1463 ;; Assume this schedules like vadd.
003bb7f3 1464 [(set (attr "type")
b75b1be2 1465 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1466 (const_string "neon_fp_reduc_add_s<q>")
1467 (const_string "neon_reduc_add<q>")))]
c956e102 1468)
88f77cba 1469
55a9b91b
MW
1470(define_insn "neon_vpaddv4hf"
1471 [(set
1472 (match_operand:V4HF 0 "s_register_operand" "=w")
1473 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1474 (match_operand:V4HF 2 "s_register_operand" "w")]
1475 UNSPEC_VPADD))]
1476 "TARGET_NEON_FP16INST"
1477 "vpadd.f16\t%P0, %P1, %P2"
1478 [(set_attr "type" "neon_reduc_add")]
1479)
1480
88f77cba
JB
1481(define_insn "neon_vpsmin<mode>"
1482 [(set (match_operand:VD 0 "s_register_operand" "=w")
1483 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1484 (match_operand:VD 2 "s_register_operand" "w")]
1485 UNSPEC_VPSMIN))]
1486 "TARGET_NEON"
c956e102 1487 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
003bb7f3 1488 [(set (attr "type")
b75b1be2 1489 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1490 (const_string "neon_fp_reduc_minmax_s<q>")
1491 (const_string "neon_reduc_minmax<q>")))]
c956e102 1492)
88f77cba
JB
1493
1494(define_insn "neon_vpsmax<mode>"
1495 [(set (match_operand:VD 0 "s_register_operand" "=w")
1496 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1497 (match_operand:VD 2 "s_register_operand" "w")]
1498 UNSPEC_VPSMAX))]
1499 "TARGET_NEON"
c956e102 1500 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
003bb7f3 1501 [(set (attr "type")
b75b1be2 1502 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1503 (const_string "neon_fp_reduc_minmax_s<q>")
1504 (const_string "neon_reduc_minmax<q>")))]
c956e102 1505)
88f77cba
JB
1506
1507(define_insn "neon_vpumin<mode>"
1508 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1509 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1510 (match_operand:VDI 2 "s_register_operand" "w")]
1511 UNSPEC_VPUMIN))]
1512 "TARGET_NEON"
c956e102 1513 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
f7379e5e 1514 [(set_attr "type" "neon_reduc_minmax<q>")]
c956e102 1515)
88f77cba
JB
1516
1517(define_insn "neon_vpumax<mode>"
1518 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1519 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1520 (match_operand:VDI 2 "s_register_operand" "w")]
1521 UNSPEC_VPUMAX))]
1522 "TARGET_NEON"
c956e102 1523 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
f7379e5e 1524 [(set_attr "type" "neon_reduc_minmax<q>")]
c956e102 1525)
88f77cba
JB
1526
1527;; Saturating arithmetic
1528
1529; NOTE: Neon supports many more saturating variants of instructions than the
1530; following, but these are all GCC currently understands.
1531; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1532; yet either, although these patterns may be used by intrinsics when they're
1533; added.
1534
1535(define_insn "*ss_add<mode>_neon"
1536 [(set (match_operand:VD 0 "s_register_operand" "=w")
1537 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1538 (match_operand:VD 2 "s_register_operand" "w")))]
1539 "TARGET_NEON"
c956e102 1540 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
f7379e5e 1541 [(set_attr "type" "neon_qadd<q>")]
c956e102 1542)
88f77cba
JB
1543
1544(define_insn "*us_add<mode>_neon"
1545 [(set (match_operand:VD 0 "s_register_operand" "=w")
1546 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1547 (match_operand:VD 2 "s_register_operand" "w")))]
1548 "TARGET_NEON"
c956e102 1549 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
f7379e5e 1550 [(set_attr "type" "neon_qadd<q>")]
c956e102 1551)
88f77cba
JB
1552
1553(define_insn "*ss_sub<mode>_neon"
1554 [(set (match_operand:VD 0 "s_register_operand" "=w")
1555 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1556 (match_operand:VD 2 "s_register_operand" "w")))]
1557 "TARGET_NEON"
c956e102 1558 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
f7379e5e 1559 [(set_attr "type" "neon_qsub<q>")]
c956e102 1560)
88f77cba
JB
1561
1562(define_insn "*us_sub<mode>_neon"
1563 [(set (match_operand:VD 0 "s_register_operand" "=w")
1564 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1565 (match_operand:VD 2 "s_register_operand" "w")))]
1566 "TARGET_NEON"
c956e102 1567 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
f7379e5e 1568 [(set_attr "type" "neon_qsub<q>")]
c956e102 1569)
88f77cba 1570
5bfc5baa
JB
1571;; Conditional instructions. These are comparisons with conditional moves for
1572;; vectors. They perform the assignment:
1573;;
1574;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1575;;
1576;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1577;; element-wise.
1578
e9e1d143 1579(define_expand "vcond<mode><mode>"
cd65e265 1580 [(set (match_operand:VDQW 0 "s_register_operand")
5bfc5baa 1581 (if_then_else:VDQW
f35c297f 1582 (match_operator 3 "comparison_operator"
cd65e265
DZ
1583 [(match_operand:VDQW 4 "s_register_operand")
1584 (match_operand:VDQW 5 "nonmemory_operand")])
1585 (match_operand:VDQW 1 "s_register_operand")
1586 (match_operand:VDQW 2 "s_register_operand")))]
5bfc5baa
JB
1587 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1588{
f35c297f 1589 int inverse = 0;
ff522f7f 1590 int use_zero_form = 0;
f35c297f
KT
1591 int swap_bsl_operands = 0;
1592 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1593 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1594
94f0f2cc
JG
1595 rtx (*base_comparison) (rtx, rtx, rtx);
1596 rtx (*complimentary_comparison) (rtx, rtx, rtx);
f35c297f 1597
5bfc5baa
JB
1598 switch (GET_CODE (operands[3]))
1599 {
1600 case GE:
ff522f7f 1601 case GT:
f35c297f 1602 case LE:
ff522f7f 1603 case LT:
f35c297f 1604 case EQ:
ff522f7f
ZC
1605 if (operands[5] == CONST0_RTX (<MODE>mode))
1606 {
1607 use_zero_form = 1;
1608 break;
1609 }
1610 /* Fall through. */
f35c297f
KT
1611 default:
1612 if (!REG_P (operands[5]))
1613 operands[5] = force_reg (<MODE>mode, operands[5]);
1614 }
1615
1616 switch (GET_CODE (operands[3]))
1617 {
1618 case LT:
1619 case UNLT:
1620 inverse = 1;
1621 /* Fall through. */
1622 case GE:
1623 case UNGE:
1624 case ORDERED:
1625 case UNORDERED:
1626 base_comparison = gen_neon_vcge<mode>;
1627 complimentary_comparison = gen_neon_vcgt<mode>;
1628 break;
1629 case LE:
1630 case UNLE:
1631 inverse = 1;
1632 /* Fall through. */
5bfc5baa 1633 case GT:
f35c297f
KT
1634 case UNGT:
1635 base_comparison = gen_neon_vcgt<mode>;
1636 complimentary_comparison = gen_neon_vcge<mode>;
5bfc5baa 1637 break;
5bfc5baa 1638 case EQ:
f35c297f
KT
1639 case NE:
1640 case UNEQ:
1641 base_comparison = gen_neon_vceq<mode>;
1642 complimentary_comparison = gen_neon_vceq<mode>;
5bfc5baa 1643 break;
f35c297f
KT
1644 default:
1645 gcc_unreachable ();
1646 }
1647
1648 switch (GET_CODE (operands[3]))
1649 {
1650 case LT:
5bfc5baa 1651 case LE:
f35c297f
KT
1652 case GT:
1653 case GE:
1654 case EQ:
1655 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1656 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1657 a GE b -> a GE b
1658 a GT b -> a GT b
1659 a LE b -> b GE a
1660 a LT b -> b GT a
ff522f7f
ZC
1661 a EQ b -> a EQ b
1662 Note that there also exist direct comparison against 0 forms,
1663 so catch those as a special case. */
1664 if (use_zero_form)
1665 {
1666 inverse = 0;
1667 switch (GET_CODE (operands[3]))
1668 {
1669 case LT:
1670 base_comparison = gen_neon_vclt<mode>;
1671 break;
1672 case LE:
1673 base_comparison = gen_neon_vcle<mode>;
1674 break;
1675 default:
1676 /* Do nothing, other zero form cases already have the correct
1677 base_comparison. */
1678 break;
1679 }
1680 }
f35c297f
KT
1681
1682 if (!inverse)
94f0f2cc 1683 emit_insn (base_comparison (mask, operands[4], operands[5]));
5bfc5baa 1684 else
94f0f2cc 1685 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
5bfc5baa 1686 break;
f35c297f
KT
1687 case UNLT:
1688 case UNLE:
1689 case UNGT:
1690 case UNGE:
1691 case NE:
1692 /* Vector compare returns false for lanes which are unordered, so if we use
1693 the inverse of the comparison we actually want to emit, then
1694 swap the operands to BSL, we will end up with the correct result.
1695 Note that a NE NaN and NaN NE b are true for all a, b.
1696
1697 Our transformations are:
1698 a GE b -> !(b GT a)
1699 a GT b -> !(b GE a)
1700 a LE b -> !(a GT b)
1701 a LT b -> !(a GE b)
1702 a NE b -> !(a EQ b) */
1703
1704 if (inverse)
94f0f2cc 1705 emit_insn (base_comparison (mask, operands[4], operands[5]));
5bfc5baa 1706 else
94f0f2cc 1707 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
f35c297f
KT
1708
1709 swap_bsl_operands = 1;
5bfc5baa 1710 break;
f35c297f
KT
1711 case UNEQ:
1712 /* We check (a > b || b > a). combining these comparisons give us
1713 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1714 will then give us (a == b || a UNORDERED b) as intended. */
1715
94f0f2cc
JG
1716 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1717 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
f35c297f
KT
1718 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1719 swap_bsl_operands = 1;
1720 break;
1721 case UNORDERED:
1722 /* Operands are ORDERED iff (a > b || b >= a).
1723 Swapping the operands to BSL will give the UNORDERED case. */
1724 swap_bsl_operands = 1;
1725 /* Fall through. */
1726 case ORDERED:
94f0f2cc
JG
1727 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1728 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
f35c297f 1729 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
5bfc5baa 1730 break;
5bfc5baa
JB
1731 default:
1732 gcc_unreachable ();
1733 }
f35c297f
KT
1734
1735 if (swap_bsl_operands)
5bfc5baa
JB
1736 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1737 operands[1]));
1738 else
1739 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1740 operands[2]));
5bfc5baa
JB
1741 DONE;
1742})
1743
e9e1d143 1744(define_expand "vcondu<mode><mode>"
cd65e265 1745 [(set (match_operand:VDQIW 0 "s_register_operand")
5bfc5baa
JB
1746 (if_then_else:VDQIW
1747 (match_operator 3 "arm_comparison_operator"
cd65e265
DZ
1748 [(match_operand:VDQIW 4 "s_register_operand")
1749 (match_operand:VDQIW 5 "s_register_operand")])
1750 (match_operand:VDQIW 1 "s_register_operand")
1751 (match_operand:VDQIW 2 "s_register_operand")))]
5bfc5baa
JB
1752 "TARGET_NEON"
1753{
1754 rtx mask;
1755 int inverse = 0, immediate_zero = 0;
1756
1757 mask = gen_reg_rtx (<V_cmp_result>mode);
1758
1759 if (operands[5] == CONST0_RTX (<MODE>mode))
1760 immediate_zero = 1;
1761 else if (!REG_P (operands[5]))
1762 operands[5] = force_reg (<MODE>mode, operands[5]);
1763
1764 switch (GET_CODE (operands[3]))
1765 {
1766 case GEU:
94f0f2cc 1767 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
5bfc5baa
JB
1768 break;
1769
1770 case GTU:
94f0f2cc 1771 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
5bfc5baa
JB
1772 break;
1773
1774 case EQ:
94f0f2cc 1775 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
5bfc5baa
JB
1776 break;
1777
1778 case LEU:
1779 if (immediate_zero)
94f0f2cc 1780 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
5bfc5baa 1781 else
94f0f2cc 1782 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
5bfc5baa
JB
1783 break;
1784
1785 case LTU:
1786 if (immediate_zero)
94f0f2cc 1787 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
5bfc5baa 1788 else
94f0f2cc 1789 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
5bfc5baa
JB
1790 break;
1791
1792 case NE:
94f0f2cc 1793 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
5bfc5baa
JB
1794 inverse = 1;
1795 break;
1796
1797 default:
1798 gcc_unreachable ();
1799 }
1800
1801 if (inverse)
1802 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1803 operands[1]));
1804 else
1805 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1806 operands[2]));
1807
1808 DONE;
1809})
1810
88f77cba
JB
1811;; Patterns for builtins.
1812
1813; good for plain vadd, vaddq.
1814
bab53516 1815(define_expand "neon_vadd<mode>"
cd65e265
DZ
1816 [(match_operand:VCVTF 0 "s_register_operand")
1817 (match_operand:VCVTF 1 "s_register_operand")
1818 (match_operand:VCVTF 2 "s_register_operand")]
bab53516
SL
1819 "TARGET_NEON"
1820{
1821 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1822 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1823 else
1824 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1825 operands[2]));
1826 DONE;
1827})
1828
55a9b91b
MW
1829(define_expand "neon_vadd<mode>"
1830 [(match_operand:VH 0 "s_register_operand")
1831 (match_operand:VH 1 "s_register_operand")
1832 (match_operand:VH 2 "s_register_operand")]
1833 "TARGET_NEON_FP16INST"
1834{
1835 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
1836 DONE;
1837})
1838
1839(define_expand "neon_vsub<mode>"
1840 [(match_operand:VH 0 "s_register_operand")
1841 (match_operand:VH 1 "s_register_operand")
1842 (match_operand:VH 2 "s_register_operand")]
1843 "TARGET_NEON_FP16INST"
1844{
1845 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
1846 DONE;
1847})
1848
bab53516
SL
1849; Note that NEON operations don't support the full IEEE 754 standard: in
1850; particular, denormal values are flushed to zero. This means that GCC cannot
1851; use those instructions for autovectorization, etc. unless
1852; -funsafe-math-optimizations is in effect (in which case flush-to-zero
9c582551 1853; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
bab53516
SL
1854; header) must work in either case: if -funsafe-math-optimizations is given,
1855; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1856; expand to unspecs (which may potentially limit the extent to which they might
1857; be optimized by generic code).
1858
1859; Used for intrinsics when flag_unsafe_math_optimizations is false.
1860
1861(define_insn "neon_vadd<mode>_unspec"
0d0b79a6
RR
1862 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1863 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1864 (match_operand:VCVTF 2 "s_register_operand" "w")]
88f77cba
JB
1865 UNSPEC_VADD))]
1866 "TARGET_NEON"
c956e102 1867 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 1868 [(set (attr "type")
b75b1be2 1869 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1870 (const_string "neon_fp_addsub_s<q>")
1871 (const_string "neon_add<q>")))]
c956e102 1872)
88f77cba 1873
94f0f2cc 1874(define_insn "neon_vaddl<sup><mode>"
88f77cba
JB
1875 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1876 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
94f0f2cc
JG
1877 (match_operand:VDI 2 "s_register_operand" "w")]
1878 VADDL))]
88f77cba 1879 "TARGET_NEON"
94f0f2cc 1880 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
f7379e5e 1881 [(set_attr "type" "neon_add_long")]
c956e102 1882)
88f77cba 1883
94f0f2cc 1884(define_insn "neon_vaddw<sup><mode>"
88f77cba
JB
1885 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1886 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
94f0f2cc
JG
1887 (match_operand:VDI 2 "s_register_operand" "w")]
1888 VADDW))]
88f77cba 1889 "TARGET_NEON"
94f0f2cc 1890 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
f7379e5e 1891 [(set_attr "type" "neon_add_widen")]
c956e102 1892)
88f77cba
JB
1893
1894; vhadd and vrhadd.
1895
94f0f2cc 1896(define_insn "neon_v<r>hadd<sup><mode>"
88f77cba
JB
1897 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1898 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
94f0f2cc
JG
1899 (match_operand:VDQIW 2 "s_register_operand" "w")]
1900 VHADD))]
88f77cba 1901 "TARGET_NEON"
94f0f2cc 1902 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 1903 [(set_attr "type" "neon_add_halve_q")]
c956e102 1904)
88f77cba 1905
94f0f2cc 1906(define_insn "neon_vqadd<sup><mode>"
88f77cba
JB
1907 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1908 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
1909 (match_operand:VDQIX 2 "s_register_operand" "w")]
1910 VQADD))]
88f77cba 1911 "TARGET_NEON"
94f0f2cc 1912 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 1913 [(set_attr "type" "neon_qadd<q>")]
c956e102 1914)
88f77cba 1915
94f0f2cc 1916(define_insn "neon_v<r>addhn<mode>"
88f77cba
JB
1917 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1918 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
1919 (match_operand:VN 2 "s_register_operand" "w")]
1920 VADDHN))]
88f77cba 1921 "TARGET_NEON"
94f0f2cc 1922 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
f7379e5e 1923 [(set_attr "type" "neon_add_halve_narrow_q")]
c956e102 1924)
88f77cba 1925
94f0f2cc
JG
1926;; Polynomial and Float multiplication.
1927(define_insn "neon_vmul<pf><mode>"
1928 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1929 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1930 (match_operand:VPF 2 "s_register_operand" "w")]
88f77cba
JB
1931 UNSPEC_VMUL))]
1932 "TARGET_NEON"
94f0f2cc 1933 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 1934 [(set (attr "type")
b75b1be2 1935 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1936 (const_string "neon_fp_mul_s<q>")
1937 (const_string "neon_mul_<V_elem_ch><q>")))]
c956e102 1938)
88f77cba 1939
6da37857
MW
1940(define_insn "mul<mode>3"
1941 [(set
1942 (match_operand:VH 0 "s_register_operand" "=w")
1943 (mult:VH
1944 (match_operand:VH 1 "s_register_operand" "w")
1945 (match_operand:VH 2 "s_register_operand" "w")))]
1946 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
1947 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1948 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1949)
1950
55a9b91b
MW
1951(define_insn "neon_vmulf<mode>"
1952 [(set
1953 (match_operand:VH 0 "s_register_operand" "=w")
1954 (mult:VH
1955 (match_operand:VH 1 "s_register_operand" "w")
1956 (match_operand:VH 2 "s_register_operand" "w")))]
1957 "TARGET_NEON_FP16INST"
1958 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1959 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1960)
1961
bab53516 1962(define_expand "neon_vmla<mode>"
cd65e265
DZ
1963 [(match_operand:VDQW 0 "s_register_operand")
1964 (match_operand:VDQW 1 "s_register_operand")
1965 (match_operand:VDQW 2 "s_register_operand")
1966 (match_operand:VDQW 3 "s_register_operand")]
bab53516
SL
1967 "TARGET_NEON"
1968{
1969 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1970 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1971 operands[2], operands[3]));
1972 else
1973 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1974 operands[2], operands[3]));
1975 DONE;
1976})
1977
c4216388
MGD
1978(define_expand "neon_vfma<VCVTF:mode>"
1979 [(match_operand:VCVTF 0 "s_register_operand")
1980 (match_operand:VCVTF 1 "s_register_operand")
1981 (match_operand:VCVTF 2 "s_register_operand")
94f0f2cc 1982 (match_operand:VCVTF 3 "s_register_operand")]
c4216388
MGD
1983 "TARGET_NEON && TARGET_FMA"
1984{
1985 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1986 operands[1]));
1987 DONE;
1988})
1989
55a9b91b
MW
1990(define_expand "neon_vfma<VH:mode>"
1991 [(match_operand:VH 0 "s_register_operand")
1992 (match_operand:VH 1 "s_register_operand")
1993 (match_operand:VH 2 "s_register_operand")
1994 (match_operand:VH 3 "s_register_operand")]
1995 "TARGET_NEON_FP16INST"
1996{
1997 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1998 operands[1]));
1999 DONE;
2000})
2001
c4216388
MGD
2002(define_expand "neon_vfms<VCVTF:mode>"
2003 [(match_operand:VCVTF 0 "s_register_operand")
2004 (match_operand:VCVTF 1 "s_register_operand")
2005 (match_operand:VCVTF 2 "s_register_operand")
94f0f2cc 2006 (match_operand:VCVTF 3 "s_register_operand")]
c4216388
MGD
2007 "TARGET_NEON && TARGET_FMA"
2008{
2009 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2010 operands[1]));
2011 DONE;
2012})
2013
55a9b91b
MW
2014(define_expand "neon_vfms<VH:mode>"
2015 [(match_operand:VH 0 "s_register_operand")
2016 (match_operand:VH 1 "s_register_operand")
2017 (match_operand:VH 2 "s_register_operand")
2018 (match_operand:VH 3 "s_register_operand")]
2019 "TARGET_NEON_FP16INST"
2020{
2021 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2022 operands[1]));
2023 DONE;
2024})
2025
06e95715
KT
2026;; The expand RTL structure here is not important.
2027;; We use the gen_* functions anyway.
2028;; We just need something to wrap the iterators around.
2029
2030(define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2031 [(set (match_operand:VCVTF 0 "s_register_operand")
2032 (unspec:VCVTF
2033 [(match_operand:VCVTF 1 "s_register_operand")
2034 (PLUSMINUS:<VFML>
2035 (match_operand:<VFML> 2 "s_register_operand")
2036 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2037 "TARGET_FP16FML"
2038{
2039 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2040 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2041 operands[1],
2042 operands[2],
2043 operands[3],
2044 half, half));
2045 DONE;
2046})
2047
2048(define_insn "vfmal_low<mode>_intrinsic"
2049 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2050 (fma:VCVTF
2051 (float_extend:VCVTF
2052 (vec_select:<VFMLSEL>
2053 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2054 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2055 (float_extend:VCVTF
2056 (vec_select:<VFMLSEL>
2057 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2058 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2059 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2060 "TARGET_FP16FML"
2061 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2062 [(set_attr "type" "neon_fp_mla_s<q>")]
2063)
2064
2065(define_insn "vfmsl_high<mode>_intrinsic"
2066 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2067 (fma:VCVTF
2068 (float_extend:VCVTF
2069 (neg:<VFMLSEL>
2070 (vec_select:<VFMLSEL>
2071 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2072 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2073 (float_extend:VCVTF
2074 (vec_select:<VFMLSEL>
2075 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2076 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2077 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2078 "TARGET_FP16FML"
2079 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2080 [(set_attr "type" "neon_fp_mla_s<q>")]
2081)
2082
2083(define_insn "vfmal_high<mode>_intrinsic"
2084 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2085 (fma:VCVTF
2086 (float_extend:VCVTF
2087 (vec_select:<VFMLSEL>
2088 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2089 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2090 (float_extend:VCVTF
2091 (vec_select:<VFMLSEL>
2092 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2093 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2094 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2095 "TARGET_FP16FML"
2096 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2097 [(set_attr "type" "neon_fp_mla_s<q>")]
2098)
2099
2100(define_insn "vfmsl_low<mode>_intrinsic"
2101 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2102 (fma:VCVTF
2103 (float_extend:VCVTF
2104 (neg:<VFMLSEL>
2105 (vec_select:<VFMLSEL>
2106 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2107 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2108 (float_extend:VCVTF
2109 (vec_select:<VFMLSEL>
2110 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2111 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2112 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2113 "TARGET_FP16FML"
2114 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2115 [(set_attr "type" "neon_fp_mla_s<q>")]
2116)
2117
eccf4d70
KT
2118(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2119 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2120 (unspec:VCVTF
2121 [(match_operand:VCVTF 1 "s_register_operand")
2122 (PLUSMINUS:<VFML>
2123 (match_operand:<VFML> 2 "s_register_operand")
2124 (match_operand:<VFML> 3 "s_register_operand"))
2125 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2126 "TARGET_FP16FML"
2127{
2128 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2129 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2130 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2131 (operands[0], operands[1],
2132 operands[2], operands[3],
2133 half, lane));
2134 DONE;
2135})
2136
2137(define_insn "vfmal_lane_low<mode>_intrinsic"
2138 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2139 (fma:VCVTF
2140 (float_extend:VCVTF
2141 (vec_select:<VFMLSEL>
2142 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2143 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2144 (float_extend:VCVTF
2145 (vec_duplicate:<VFMLSEL>
2146 (vec_select:HF
2147 (match_operand:<VFML> 3 "s_register_operand" "x")
2148 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2149 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2150 "TARGET_FP16FML"
2151 {
2152 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2153 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2154 {
2155 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2156 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2157 }
2158 else
2159 {
2160 operands[5] = GEN_INT (lane);
2161 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2162 }
2163 }
2164 [(set_attr "type" "neon_fp_mla_s<q>")]
2165)
2166
2167(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2168 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2169 (unspec:VCVTF
2170 [(match_operand:VCVTF 1 "s_register_operand")
2171 (PLUSMINUS:<VFML>
2172 (match_operand:<VFML> 2 "s_register_operand")
2173 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2174 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2175 "TARGET_FP16FML"
2176{
2177 rtx lane
2178 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2179 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2180 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2181 (operands[0], operands[1], operands[2], operands[3],
2182 half, lane));
2183 DONE;
2184})
2185
2186;; Used to implement the intrinsics:
99cf78cf
TC
2187;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2188;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
eccf4d70
KT
2189;; Needs a bit of care to get the modes of the different sub-expressions right
2190;; due to 'a' and 'b' having different sizes and make sure we use the right
2191;; S or D subregister to select the appropriate lane from.
2192
2193(define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2194 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2195 (fma:VCVTF
2196 (float_extend:VCVTF
2197 (vec_select:<VFMLSEL>
2198 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2199 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2200 (float_extend:VCVTF
2201 (vec_duplicate:<VFMLSEL>
2202 (vec_select:HF
2203 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2204 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2205 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2206 "TARGET_FP16FML"
2207 {
2208 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2209 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2210 int new_lane = lane % elts_per_reg;
2211 int regdiff = lane / elts_per_reg;
2212 operands[5] = GEN_INT (new_lane);
2213 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2214 because we want the print_operand code to print the appropriate
2215 S or D register prefix. */
2216 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2217 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2218 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2219 }
2220 [(set_attr "type" "neon_fp_mla_s<q>")]
2221)
2222
2223;; Used to implement the intrinsics:
99cf78cf
TC
2224;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2225;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
eccf4d70
KT
2226;; Needs a bit of care to get the modes of the different sub-expressions right
2227;; due to 'a' and 'b' having different sizes and make sure we use the right
2228;; S or D subregister to select the appropriate lane from.
2229
2230(define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2231 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2232 (fma:VCVTF
2233 (float_extend:VCVTF
2234 (vec_select:<VFMLSEL>
2235 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2236 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2237 (float_extend:VCVTF
2238 (vec_duplicate:<VFMLSEL>
2239 (vec_select:HF
2240 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2241 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2242 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2243 "TARGET_FP16FML"
2244 {
2245 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2246 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2247 int new_lane = lane % elts_per_reg;
2248 int regdiff = lane / elts_per_reg;
2249 operands[5] = GEN_INT (new_lane);
2250 /* We re-create operands[3] in the halved VFMLSEL mode
2251 because we've calculated the correct half-width subreg to extract
2252 the lane from and we want to print *that* subreg instead. */
2253 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2254 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2255 }
2256 [(set_attr "type" "neon_fp_mla_s<q>")]
2257)
2258
2259(define_insn "vfmal_lane_high<mode>_intrinsic"
2260 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2261 (fma:VCVTF
2262 (float_extend:VCVTF
2263 (vec_select:<VFMLSEL>
2264 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2265 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2266 (float_extend:VCVTF
2267 (vec_duplicate:<VFMLSEL>
2268 (vec_select:HF
2269 (match_operand:<VFML> 3 "s_register_operand" "x")
2270 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2271 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2272 "TARGET_FP16FML"
2273 {
2274 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2275 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2276 {
2277 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2278 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2279 }
2280 else
2281 {
2282 operands[5] = GEN_INT (lane);
2283 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2284 }
2285 }
2286 [(set_attr "type" "neon_fp_mla_s<q>")]
2287)
2288
2289(define_insn "vfmsl_lane_low<mode>_intrinsic"
2290 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2291 (fma:VCVTF
2292 (float_extend:VCVTF
2293 (neg:<VFMLSEL>
2294 (vec_select:<VFMLSEL>
2295 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2296 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2297 (float_extend:VCVTF
2298 (vec_duplicate:<VFMLSEL>
2299 (vec_select:HF
2300 (match_operand:<VFML> 3 "s_register_operand" "x")
2301 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2302 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2303 "TARGET_FP16FML"
2304 {
2305 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2306 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2307 {
2308 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2309 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2310 }
2311 else
2312 {
2313 operands[5] = GEN_INT (lane);
2314 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2315 }
2316 }
2317 [(set_attr "type" "neon_fp_mla_s<q>")]
2318)
2319
2320;; Used to implement the intrinsics:
99cf78cf
TC
2321;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2322;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
eccf4d70
KT
2323;; Needs a bit of care to get the modes of the different sub-expressions right
2324;; due to 'a' and 'b' having different sizes and make sure we use the right
2325;; S or D subregister to select the appropriate lane from.
2326
2327(define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2328 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2329 (fma:VCVTF
2330 (float_extend:VCVTF
2331 (neg:<VFMLSEL>
2332 (vec_select:<VFMLSEL>
2333 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2334 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2335 (float_extend:VCVTF
2336 (vec_duplicate:<VFMLSEL>
2337 (vec_select:HF
2338 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2339 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2340 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2341 "TARGET_FP16FML"
2342 {
2343 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2344 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2345 int new_lane = lane % elts_per_reg;
2346 int regdiff = lane / elts_per_reg;
2347 operands[5] = GEN_INT (new_lane);
2348 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2349 because we want the print_operand code to print the appropriate
2350 S or D register prefix. */
2351 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2352 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2353 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2354 }
2355 [(set_attr "type" "neon_fp_mla_s<q>")]
2356)
2357
2358;; Used to implement the intrinsics:
99cf78cf
TC
2359;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2360;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
eccf4d70
KT
2361;; Needs a bit of care to get the modes of the different sub-expressions right
2362;; due to 'a' and 'b' having different sizes and make sure we use the right
2363;; S or D subregister to select the appropriate lane from.
2364
2365(define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2366 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2367 (fma:VCVTF
2368 (float_extend:VCVTF
2369 (neg:<VFMLSEL>
2370 (vec_select:<VFMLSEL>
2371 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2372 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2373 (float_extend:VCVTF
2374 (vec_duplicate:<VFMLSEL>
2375 (vec_select:HF
2376 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2377 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2378 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2379 "TARGET_FP16FML"
2380 {
2381 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2382 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2383 int new_lane = lane % elts_per_reg;
2384 int regdiff = lane / elts_per_reg;
2385 operands[5] = GEN_INT (new_lane);
2386 /* We re-create operands[3] in the halved VFMLSEL mode
2387 because we've calculated the correct half-width subreg to extract
2388 the lane from and we want to print *that* subreg instead. */
2389 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2390 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2391 }
2392 [(set_attr "type" "neon_fp_mla_s<q>")]
2393)
2394
2395(define_insn "vfmsl_lane_high<mode>_intrinsic"
2396 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2397 (fma:VCVTF
2398 (float_extend:VCVTF
2399 (neg:<VFMLSEL>
2400 (vec_select:<VFMLSEL>
2401 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2402 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2403 (float_extend:VCVTF
2404 (vec_duplicate:<VFMLSEL>
2405 (vec_select:HF
2406 (match_operand:<VFML> 3 "s_register_operand" "x")
2407 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2408 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2409 "TARGET_FP16FML"
2410 {
2411 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2412 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2413 {
2414 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2415 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2416 }
2417 else
2418 {
2419 operands[5] = GEN_INT (lane);
2420 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2421 }
2422 }
2423 [(set_attr "type" "neon_fp_mla_s<q>")]
2424)
2425
bab53516
SL
2426; Used for intrinsics when flag_unsafe_math_optimizations is false.
2427
2428(define_insn "neon_vmla<mode>_unspec"
f7379e5e
JG
2429 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2430 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2431 (match_operand:VDQW 2 "s_register_operand" "w")
2432 (match_operand:VDQW 3 "s_register_operand" "w")]
bab53516 2433 UNSPEC_VMLA))]
88f77cba 2434 "TARGET_NEON"
c956e102 2435 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
003bb7f3 2436 [(set (attr "type")
b75b1be2 2437 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
2438 (const_string "neon_fp_mla_s<q>")
2439 (const_string "neon_mla_<V_elem_ch><q>")))]
c956e102 2440)
88f77cba 2441
94f0f2cc 2442(define_insn "neon_vmlal<sup><mode>"
88f77cba
JB
2443 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2444 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2445 (match_operand:VW 2 "s_register_operand" "w")
94f0f2cc
JG
2446 (match_operand:VW 3 "s_register_operand" "w")]
2447 VMLAL))]
88f77cba 2448 "TARGET_NEON"
94f0f2cc 2449 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
f7379e5e 2450 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
c956e102 2451)
88f77cba 2452
bab53516 2453(define_expand "neon_vmls<mode>"
cd65e265
DZ
2454 [(match_operand:VDQW 0 "s_register_operand")
2455 (match_operand:VDQW 1 "s_register_operand")
2456 (match_operand:VDQW 2 "s_register_operand")
2457 (match_operand:VDQW 3 "s_register_operand")]
bab53516
SL
2458 "TARGET_NEON"
2459{
2460 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2461 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2462 operands[1], operands[2], operands[3]));
2463 else
2464 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2465 operands[2], operands[3]));
2466 DONE;
2467})
2468
2469; Used for intrinsics when flag_unsafe_math_optimizations is false.
2470
2471(define_insn "neon_vmls<mode>_unspec"
f7379e5e
JG
2472 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2473 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2474 (match_operand:VDQW 2 "s_register_operand" "w")
2475 (match_operand:VDQW 3 "s_register_operand" "w")]
bab53516 2476 UNSPEC_VMLS))]
88f77cba 2477 "TARGET_NEON"
c956e102 2478 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
003bb7f3 2479 [(set (attr "type")
b75b1be2 2480 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
2481 (const_string "neon_fp_mla_s<q>")
2482 (const_string "neon_mla_<V_elem_ch><q>")))]
c956e102 2483)
88f77cba 2484
94f0f2cc 2485(define_insn "neon_vmlsl<sup><mode>"
88f77cba
JB
2486 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2487 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2488 (match_operand:VW 2 "s_register_operand" "w")
94f0f2cc
JG
2489 (match_operand:VW 3 "s_register_operand" "w")]
2490 VMLSL))]
88f77cba 2491 "TARGET_NEON"
94f0f2cc 2492 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
f7379e5e 2493 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
c956e102 2494)
88f77cba 2495
94f0f2cc
JG
2496;; vqdmulh, vqrdmulh
2497(define_insn "neon_vq<r>dmulh<mode>"
88f77cba
JB
2498 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2499 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
94f0f2cc
JG
2500 (match_operand:VMDQI 2 "s_register_operand" "w")]
2501 VQDMULH))]
88f77cba 2502 "TARGET_NEON"
94f0f2cc 2503 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2504 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
c956e102 2505)
88f77cba 2506
5f2ca3b2
MW
2507;; vqrdmlah, vqrdmlsh
2508(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2509 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2510 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2511 (match_operand:VMDQI 2 "s_register_operand" "w")
2512 (match_operand:VMDQI 3 "s_register_operand" "w")]
2513 VQRDMLH_AS))]
2514 "TARGET_NEON_RDMA"
2515 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2516 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2517)
2518
88f77cba
JB
2519(define_insn "neon_vqdmlal<mode>"
2520 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2521 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2522 (match_operand:VMDI 2 "s_register_operand" "w")
94f0f2cc 2523 (match_operand:VMDI 3 "s_register_operand" "w")]
88f77cba
JB
2524 UNSPEC_VQDMLAL))]
2525 "TARGET_NEON"
c956e102 2526 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
f7379e5e 2527 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
c956e102 2528)
88f77cba
JB
2529
2530(define_insn "neon_vqdmlsl<mode>"
2531 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2532 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2533 (match_operand:VMDI 2 "s_register_operand" "w")
94f0f2cc 2534 (match_operand:VMDI 3 "s_register_operand" "w")]
88f77cba
JB
2535 UNSPEC_VQDMLSL))]
2536 "TARGET_NEON"
c956e102 2537 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
f7379e5e 2538 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
c956e102 2539)
88f77cba 2540
94f0f2cc 2541(define_insn "neon_vmull<sup><mode>"
88f77cba
JB
2542 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2543 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
94f0f2cc
JG
2544 (match_operand:VW 2 "s_register_operand" "w")]
2545 VMULL))]
88f77cba 2546 "TARGET_NEON"
94f0f2cc 2547 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
f7379e5e 2548 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
c956e102 2549)
88f77cba
JB
2550
2551(define_insn "neon_vqdmull<mode>"
2552 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2553 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
94f0f2cc 2554 (match_operand:VMDI 2 "s_register_operand" "w")]
88f77cba
JB
2555 UNSPEC_VQDMULL))]
2556 "TARGET_NEON"
c956e102 2557 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
f7379e5e 2558 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
c956e102 2559)
88f77cba 2560
bab53516 2561(define_expand "neon_vsub<mode>"
cd65e265
DZ
2562 [(match_operand:VCVTF 0 "s_register_operand")
2563 (match_operand:VCVTF 1 "s_register_operand")
2564 (match_operand:VCVTF 2 "s_register_operand")]
bab53516
SL
2565 "TARGET_NEON"
2566{
2567 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2568 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2569 else
2570 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2571 operands[2]));
2572 DONE;
2573})
2574
2575; Used for intrinsics when flag_unsafe_math_optimizations is false.
2576
2577(define_insn "neon_vsub<mode>_unspec"
0d0b79a6
RR
2578 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2579 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2580 (match_operand:VCVTF 2 "s_register_operand" "w")]
88f77cba
JB
2581 UNSPEC_VSUB))]
2582 "TARGET_NEON"
c956e102 2583 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 2584 [(set (attr "type")
b75b1be2 2585 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
2586 (const_string "neon_fp_addsub_s<q>")
2587 (const_string "neon_sub<q>")))]
c956e102 2588)
88f77cba 2589
94f0f2cc 2590(define_insn "neon_vsubl<sup><mode>"
88f77cba
JB
2591 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2592 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
94f0f2cc
JG
2593 (match_operand:VDI 2 "s_register_operand" "w")]
2594 VSUBL))]
88f77cba 2595 "TARGET_NEON"
94f0f2cc 2596 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
f7379e5e 2597 [(set_attr "type" "neon_sub_long")]
c956e102 2598)
88f77cba 2599
94f0f2cc 2600(define_insn "neon_vsubw<sup><mode>"
88f77cba
JB
2601 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2602 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
94f0f2cc
JG
2603 (match_operand:VDI 2 "s_register_operand" "w")]
2604 VSUBW))]
88f77cba 2605 "TARGET_NEON"
94f0f2cc 2606 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
f7379e5e 2607 [(set_attr "type" "neon_sub_widen")]
c956e102 2608)
88f77cba 2609
94f0f2cc 2610(define_insn "neon_vqsub<sup><mode>"
88f77cba
JB
2611 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2612 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
2613 (match_operand:VDQIX 2 "s_register_operand" "w")]
2614 VQSUB))]
88f77cba 2615 "TARGET_NEON"
94f0f2cc 2616 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2617 [(set_attr "type" "neon_qsub<q>")]
c956e102 2618)
88f77cba 2619
94f0f2cc 2620(define_insn "neon_vhsub<sup><mode>"
88f77cba
JB
2621 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2622 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
94f0f2cc
JG
2623 (match_operand:VDQIW 2 "s_register_operand" "w")]
2624 VHSUB))]
88f77cba 2625 "TARGET_NEON"
94f0f2cc 2626 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2627 [(set_attr "type" "neon_sub_halve<q>")]
c956e102 2628)
88f77cba 2629
94f0f2cc 2630(define_insn "neon_v<r>subhn<mode>"
88f77cba
JB
2631 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2632 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
2633 (match_operand:VN 2 "s_register_operand" "w")]
2634 VSUBHN))]
88f77cba 2635 "TARGET_NEON"
94f0f2cc 2636 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
f7379e5e 2637 [(set_attr "type" "neon_sub_halve_narrow_q")]
c956e102 2638)
88f77cba 2639
381811fa
KT
2640;; These may expand to an UNSPEC pattern when a floating point mode is used
2641;; without unsafe math optimizations.
2642(define_expand "neon_vc<cmp_op><mode>"
cd65e265 2643 [(match_operand:<V_cmp_result> 0 "s_register_operand")
381811fa 2644 (neg:<V_cmp_result>
cd65e265
DZ
2645 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2646 (match_operand:VDQW 2 "reg_or_zero_operand")))]
88f77cba 2647 "TARGET_NEON"
381811fa
KT
2648 {
2649 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2650 are enabled. */
2651 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2652 && !flag_unsafe_math_optimizations)
2653 {
2654 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2655 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2656 whereas this expander iterates over the integer modes as well,
2657 but we will never expand to UNSPECs for the integer comparisons. */
2658 switch (<MODE>mode)
2659 {
4e10a5a7 2660 case E_V2SFmode:
381811fa
KT
2661 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2662 operands[1],
2663 operands[2]));
2664 break;
4e10a5a7 2665 case E_V4SFmode:
381811fa
KT
2666 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2667 operands[1],
2668 operands[2]));
2669 break;
2670 default:
2671 gcc_unreachable ();
2672 }
2673 }
2674 else
2675 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2676 operands[1],
2677 operands[2]));
2678 DONE;
2679 }
c956e102 2680)
88f77cba 2681
381811fa 2682(define_insn "neon_vc<cmp_op><mode>_insn"
5bfc5baa 2683 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
381811fa
KT
2684 (neg:<V_cmp_result>
2685 (COMPARISONS:<V_cmp_result>
2686 (match_operand:VDQW 1 "s_register_operand" "w,w")
2687 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2688 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2689 && !flag_unsafe_math_optimizations)"
2690 {
2691 char pattern[100];
2692 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2693 " %%<V_reg>1, %s",
2694 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2695 ? "f" : "<cmp_type>",
2696 which_alternative == 0
2697 ? "%<V_reg>2" : "#0");
2698 output_asm_insn (pattern, operands);
2699 return "";
2700 }
003bb7f3 2701 [(set (attr "type")
381811fa 2702 (if_then_else (match_operand 2 "zero_operand")
f7379e5e 2703 (const_string "neon_compare_zero<q>")
381811fa 2704 (const_string "neon_compare<q>")))]
c956e102 2705)
88f77cba 2706
381811fa 2707(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
5bfc5baa
JB
2708 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2709 (unspec:<V_cmp_result>
381811fa
KT
2710 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2711 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2712 NEON_VCMP))]
88f77cba 2713 "TARGET_NEON"
381811fa
KT
2714 {
2715 char pattern[100];
2716 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2717 " %%<V_reg>1, %s",
2718 which_alternative == 0
2719 ? "%<V_reg>2" : "#0");
2720 output_asm_insn (pattern, operands);
2721 return "";
2722}
2723 [(set_attr "type" "neon_fp_compare_s<q>")]
c956e102 2724)
88f77cba 2725
55a9b91b
MW
2726(define_expand "neon_vc<cmp_op><mode>"
2727 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2728 (neg:<V_cmp_result>
2729 (COMPARISONS:VH
2730 (match_operand:VH 1 "s_register_operand")
2731 (match_operand:VH 2 "reg_or_zero_operand")))]
2732 "TARGET_NEON_FP16INST"
2733{
2734 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2735 are enabled. */
2736 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2737 && !flag_unsafe_math_optimizations)
2738 emit_insn
2739 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2740 (operands[0], operands[1], operands[2]));
2741 else
2742 emit_insn
2743 (gen_neon_vc<cmp_op><mode>_fp16insn
2744 (operands[0], operands[1], operands[2]));
2745 DONE;
2746})
2747
2748(define_insn "neon_vc<cmp_op><mode>_fp16insn"
2749 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2750 (neg:<V_cmp_result>
2751 (COMPARISONS:<V_cmp_result>
2752 (match_operand:VH 1 "s_register_operand" "w,w")
2753 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2754 "TARGET_NEON_FP16INST
2755 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2756 && !flag_unsafe_math_optimizations)"
2757{
2758 char pattern[100];
2759 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2760 " %%<V_reg>1, %s",
2761 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2762 ? "f" : "<cmp_type>",
2763 which_alternative == 0
2764 ? "%<V_reg>2" : "#0");
2765 output_asm_insn (pattern, operands);
2766 return "";
2767}
2768 [(set (attr "type")
2769 (if_then_else (match_operand 2 "zero_operand")
2770 (const_string "neon_compare_zero<q>")
2771 (const_string "neon_compare<q>")))])
2772
2773(define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2774 [(set
2775 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2776 (unspec:<V_cmp_result>
2777 [(match_operand:VH 1 "s_register_operand" "w,w")
2778 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2779 NEON_VCMP))]
2780 "TARGET_NEON_FP16INST"
2781{
2782 char pattern[100];
2783 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2784 " %%<V_reg>1, %s",
2785 which_alternative == 0
2786 ? "%<V_reg>2" : "#0");
2787 output_asm_insn (pattern, operands);
2788 return "";
2789}
2790 [(set_attr "type" "neon_fp_compare_s<q>")])
2791
381811fa 2792(define_insn "neon_vc<cmp_op>u<mode>"
fd92bb80 2793 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
381811fa
KT
2794 (neg:<V_cmp_result>
2795 (GTUGEU:<V_cmp_result>
2796 (match_operand:VDQIW 1 "s_register_operand" "w")
2797 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
fd92bb80 2798 "TARGET_NEON"
381811fa 2799 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2800 [(set_attr "type" "neon_compare<q>")]
fd92bb80
MGD
2801)
2802
381811fa
KT
2803(define_expand "neon_vca<cmp_op><mode>"
2804 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2805 (neg:<V_cmp_result>
2806 (GTGE:<V_cmp_result>
2807 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2808 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
5bfc5baa 2809 "TARGET_NEON"
381811fa
KT
2810 {
2811 if (flag_unsafe_math_optimizations)
2812 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2813 operands[2]));
2814 else
2815 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2816 operands[1],
2817 operands[2]));
2818 DONE;
2819 }
5bfc5baa
JB
2820)
2821
381811fa 2822(define_insn "neon_vca<cmp_op><mode>_insn"
88f77cba 2823 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
381811fa
KT
2824 (neg:<V_cmp_result>
2825 (GTGE:<V_cmp_result>
2826 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2827 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2828 "TARGET_NEON && flag_unsafe_math_optimizations"
2829 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2830 [(set_attr "type" "neon_fp_compare_s<q>")]
c956e102 2831)
88f77cba 2832
381811fa 2833(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
88f77cba
JB
2834 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2835 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
94f0f2cc 2836 (match_operand:VCVTF 2 "s_register_operand" "w")]
381811fa 2837 NEON_VACMP))]
88f77cba 2838 "TARGET_NEON"
381811fa 2839 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2840 [(set_attr "type" "neon_fp_compare_s<q>")]
c956e102 2841)
88f77cba 2842
55a9b91b
MW
2843(define_expand "neon_vca<cmp_op><mode>"
2844 [(set
2845 (match_operand:<V_cmp_result> 0 "s_register_operand")
2846 (neg:<V_cmp_result>
2847 (GLTE:<V_cmp_result>
2848 (abs:VH (match_operand:VH 1 "s_register_operand"))
2849 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2850 "TARGET_NEON_FP16INST"
2851{
2852 if (flag_unsafe_math_optimizations)
2853 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2854 (operands[0], operands[1], operands[2]));
2855 else
2856 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2857 (operands[0], operands[1], operands[2]));
2858 DONE;
2859})
2860
2861(define_insn "neon_vca<cmp_op><mode>_fp16insn"
2862 [(set
2863 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2864 (neg:<V_cmp_result>
2865 (GLTE:<V_cmp_result>
2866 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2867 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2868 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2869 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2870 [(set_attr "type" "neon_fp_compare_s<q>")]
2871)
2872
2873(define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2874 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2875 (unspec:<V_cmp_result>
2876 [(match_operand:VH 1 "s_register_operand" "w")
2877 (match_operand:VH 2 "s_register_operand" "w")]
2878 NEON_VAGLTE))]
2879 "TARGET_NEON"
2880 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2881 [(set_attr "type" "neon_fp_compare_s<q>")]
2882)
2883
2884(define_expand "neon_vc<cmp_op>z<mode>"
2885 [(set
2886 (match_operand:<V_cmp_result> 0 "s_register_operand")
2887 (COMPARISONS:<V_cmp_result>
2888 (match_operand:VH 1 "s_register_operand")
2889 (const_int 0)))]
2890 "TARGET_NEON_FP16INST"
2891 {
2892 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2893 CONST0_RTX (<MODE>mode)));
2894 DONE;
2895})
2896
88f77cba
JB
2897(define_insn "neon_vtst<mode>"
2898 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2899 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
94f0f2cc 2900 (match_operand:VDQIW 2 "s_register_operand" "w")]
88f77cba
JB
2901 UNSPEC_VTST))]
2902 "TARGET_NEON"
c956e102 2903 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2904 [(set_attr "type" "neon_tst<q>")]
c956e102 2905)
88f77cba 2906
94f0f2cc
JG
2907(define_insn "neon_vabd<sup><mode>"
2908 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2909 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2910 (match_operand:VDQIW 2 "s_register_operand" "w")]
2911 VABD))]
88f77cba 2912 "TARGET_NEON"
94f0f2cc
JG
2913 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2914 [(set_attr "type" "neon_abd<q>")]
c956e102 2915)
88f77cba 2916
55a9b91b
MW
2917(define_insn "neon_vabd<mode>"
2918 [(set (match_operand:VH 0 "s_register_operand" "=w")
2919 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2920 (match_operand:VH 2 "s_register_operand" "w")]
2921 UNSPEC_VABD_F))]
2922 "TARGET_NEON_FP16INST"
2923 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2924 [(set_attr "type" "neon_abd<q>")]
2925)
2926
94f0f2cc
JG
2927(define_insn "neon_vabdf<mode>"
2928 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2929 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2930 (match_operand:VCVTF 2 "s_register_operand" "w")]
2931 UNSPEC_VABD_F))]
2932 "TARGET_NEON"
2933 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2934 [(set_attr "type" "neon_fp_abd_s<q>")]
2935)
2936
2937(define_insn "neon_vabdl<sup><mode>"
88f77cba
JB
2938 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2939 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
94f0f2cc
JG
2940 (match_operand:VW 2 "s_register_operand" "w")]
2941 VABDL))]
88f77cba 2942 "TARGET_NEON"
94f0f2cc 2943 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
f7379e5e 2944 [(set_attr "type" "neon_abd_long")]
c956e102 2945)
88f77cba 2946
94f0f2cc 2947(define_insn "neon_vaba<sup><mode>"
88f77cba 2948 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
5b28d821 2949 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
94f0f2cc
JG
2950 (match_operand:VDQIW 3 "s_register_operand" "w")]
2951 VABD)
5b28d821 2952 (match_operand:VDQIW 1 "s_register_operand" "0")))]
88f77cba 2953 "TARGET_NEON"
94f0f2cc 2954 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
f7379e5e 2955 [(set_attr "type" "neon_arith_acc<q>")]
c956e102 2956)
88f77cba 2957
94f0f2cc 2958(define_insn "neon_vabal<sup><mode>"
88f77cba 2959 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
5b28d821 2960 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
94f0f2cc
JG
2961 (match_operand:VW 3 "s_register_operand" "w")]
2962 VABDL)
5b28d821 2963 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
88f77cba 2964 "TARGET_NEON"
94f0f2cc 2965 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
f7379e5e 2966 [(set_attr "type" "neon_arith_acc<q>")]
c956e102 2967)
88f77cba 2968
84ae7213
PW
2969(define_expand "<sup>sadv16qi"
2970 [(use (match_operand:V4SI 0 "register_operand"))
2971 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
2972 (use (match_operand:V16QI 2 "register_operand"))] VABAL)
2973 (use (match_operand:V4SI 3 "register_operand"))]
2974 "TARGET_NEON"
2975 {
2976 rtx reduc = gen_reg_rtx (V8HImode);
2977 rtx op1_highpart = gen_reg_rtx (V8QImode);
2978 rtx op2_highpart = gen_reg_rtx (V8QImode);
2979
2980 emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
2981 gen_lowpart (V8QImode, operands[1]),
2982 gen_lowpart (V8QImode, operands[2])));
2983
2984 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
2985 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
2986 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
2987 op1_highpart, op2_highpart));
2988 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
2989
2990 emit_move_insn (operands[0], operands[3]);
2991 DONE;
2992 }
2993)
2994
94f0f2cc
JG
2995(define_insn "neon_v<maxmin><sup><mode>"
2996 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2997 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2998 (match_operand:VDQIW 2 "s_register_operand" "w")]
2999 VMAXMIN))]
88f77cba 3000 "TARGET_NEON"
94f0f2cc
JG
3001 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3002 [(set_attr "type" "neon_minmax<q>")]
c956e102 3003)
88f77cba 3004
94f0f2cc
JG
3005(define_insn "neon_v<maxmin>f<mode>"
3006 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3007 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3008 (match_operand:VCVTF 2 "s_register_operand" "w")]
3009 VMAXMINF))]
88f77cba 3010 "TARGET_NEON"
94f0f2cc 3011 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
0a18c19f
DS
3012 [(set_attr "type" "neon_fp_minmax_s<q>")]
3013)
3014
55a9b91b
MW
3015(define_insn "neon_v<maxmin>f<mode>"
3016 [(set (match_operand:VH 0 "s_register_operand" "=w")
3017 (unspec:VH
3018 [(match_operand:VH 1 "s_register_operand" "w")
3019 (match_operand:VH 2 "s_register_operand" "w")]
3020 VMAXMINF))]
3021 "TARGET_NEON_FP16INST"
3022 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3023 [(set_attr "type" "neon_fp_minmax_s<q>")]
3024)
3025
3026(define_insn "neon_vp<maxmin>fv4hf"
3027 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3028 (unspec:V4HF
3029 [(match_operand:V4HF 1 "s_register_operand" "w")
3030 (match_operand:V4HF 2 "s_register_operand" "w")]
3031 VPMAXMINF))]
3032 "TARGET_NEON_FP16INST"
3033 "vp<maxmin>.f16\t%P0, %P1, %P2"
3034 [(set_attr "type" "neon_reduc_minmax")]
3035)
3036
3037(define_insn "neon_<fmaxmin_op><mode>"
3038 [(set
3039 (match_operand:VH 0 "s_register_operand" "=w")
3040 (unspec:VH
3041 [(match_operand:VH 1 "s_register_operand" "w")
3042 (match_operand:VH 2 "s_register_operand" "w")]
3043 VMAXMINFNM))]
3044 "TARGET_NEON_FP16INST"
3045 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3046 [(set_attr "type" "neon_fp_minmax_s<q>")]
3047)
9fc158eb
BB
3048
3049;; v<maxmin>nm intrinsics.
3050(define_insn "neon_<fmaxmin_op><mode>"
3051 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3052 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3053 (match_operand:VCVTF 2 "s_register_operand" "w")]
3054 VMAXMINFNM))]
c8d61ab8 3055 "TARGET_NEON && TARGET_VFP5"
9fc158eb
BB
3056 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3057 [(set_attr "type" "neon_fp_minmax_s<q>")]
3058)
55a9b91b 3059
0a18c19f
DS
3060;; Vector forms for the IEEE-754 fmax()/fmin() functions
3061(define_insn "<fmaxmin><mode>3"
3062 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3063 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3064 (match_operand:VCVTF 2 "s_register_operand" "w")]
3065 VMAXMINFNM))]
c8d61ab8 3066 "TARGET_NEON && TARGET_VFP5"
0a18c19f 3067 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
94f0f2cc 3068 [(set_attr "type" "neon_fp_minmax_s<q>")]
c956e102 3069)
88f77cba
JB
3070
3071(define_expand "neon_vpadd<mode>"
cd65e265
DZ
3072 [(match_operand:VD 0 "s_register_operand")
3073 (match_operand:VD 1 "s_register_operand")
3074 (match_operand:VD 2 "s_register_operand")]
88f77cba
JB
3075 "TARGET_NEON"
3076{
3077 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3078 operands[2]));
3079 DONE;
3080})
3081
94f0f2cc 3082(define_insn "neon_vpaddl<sup><mode>"
88f77cba 3083 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
94f0f2cc
JG
3084 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3085 VPADDL))]
88f77cba 3086 "TARGET_NEON"
94f0f2cc 3087 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3088 [(set_attr "type" "neon_reduc_add_long")]
c956e102 3089)
88f77cba 3090
94f0f2cc 3091(define_insn "neon_vpadal<sup><mode>"
88f77cba
JB
3092 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3093 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
94f0f2cc
JG
3094 (match_operand:VDQIW 2 "s_register_operand" "w")]
3095 VPADAL))]
88f77cba 3096 "TARGET_NEON"
94f0f2cc 3097 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
f7379e5e 3098 [(set_attr "type" "neon_reduc_add_acc")]
c956e102 3099)
88f77cba 3100
94f0f2cc
JG
3101(define_insn "neon_vp<maxmin><sup><mode>"
3102 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3103 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3104 (match_operand:VDI 2 "s_register_operand" "w")]
3105 VPMAXMIN))]
88f77cba 3106 "TARGET_NEON"
94f0f2cc
JG
3107 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3108 [(set_attr "type" "neon_reduc_minmax<q>")]
c956e102 3109)
88f77cba 3110
94f0f2cc
JG
3111(define_insn "neon_vp<maxmin>f<mode>"
3112 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3113 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3114 (match_operand:VCVTF 2 "s_register_operand" "w")]
3115 VPMAXMINF))]
88f77cba 3116 "TARGET_NEON"
94f0f2cc
JG
3117 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3118 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
c956e102 3119)
88f77cba
JB
3120
3121(define_insn "neon_vrecps<mode>"
3122 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3123 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
94f0f2cc 3124 (match_operand:VCVTF 2 "s_register_operand" "w")]
88f77cba
JB
3125 UNSPEC_VRECPS))]
3126 "TARGET_NEON"
c956e102 3127 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 3128 [(set_attr "type" "neon_fp_recps_s<q>")]
c956e102 3129)
88f77cba 3130
55a9b91b
MW
3131(define_insn "neon_vrecps<mode>"
3132 [(set
3133 (match_operand:VH 0 "s_register_operand" "=w")
3134 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3135 (match_operand:VH 2 "s_register_operand" "w")]
3136 UNSPEC_VRECPS))]
3137 "TARGET_NEON_FP16INST"
3138 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3139 [(set_attr "type" "neon_fp_recps_s<q>")]
3140)
3141
88f77cba
JB
3142(define_insn "neon_vrsqrts<mode>"
3143 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3144 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
94f0f2cc 3145 (match_operand:VCVTF 2 "s_register_operand" "w")]
88f77cba
JB
3146 UNSPEC_VRSQRTS))]
3147 "TARGET_NEON"
c956e102 3148 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 3149 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
c956e102 3150)
88f77cba 3151
55a9b91b
MW
3152(define_insn "neon_vrsqrts<mode>"
3153 [(set
3154 (match_operand:VH 0 "s_register_operand" "=w")
3155 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3156 (match_operand:VH 2 "s_register_operand" "w")]
3157 UNSPEC_VRSQRTS))]
3158 "TARGET_NEON_FP16INST"
3159 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3160 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3161)
3162
bab53516 3163(define_expand "neon_vabs<mode>"
cd65e265
DZ
3164 [(match_operand:VDQW 0 "s_register_operand")
3165 (match_operand:VDQW 1 "s_register_operand")]
88f77cba 3166 "TARGET_NEON"
bab53516
SL
3167{
3168 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3169 DONE;
3170})
88f77cba
JB
3171
3172(define_insn "neon_vqabs<mode>"
3173 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
94f0f2cc 3174 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
88f77cba
JB
3175 UNSPEC_VQABS))]
3176 "TARGET_NEON"
c956e102 3177 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3178 [(set_attr "type" "neon_qabs<q>")]
c956e102 3179)
88f77cba 3180
7a10ea9f
KT
3181(define_insn "neon_bswap<mode>"
3182 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3183 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3184 "TARGET_NEON"
3185 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3186 [(set_attr "type" "neon_rev<q>")]
3187)
3188
88f77cba 3189(define_expand "neon_vneg<mode>"
cd65e265
DZ
3190 [(match_operand:VDQW 0 "s_register_operand")
3191 (match_operand:VDQW 1 "s_register_operand")]
88f77cba
JB
3192 "TARGET_NEON"
3193{
3194 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3195 DONE;
3196})
3197
c2b7062d
TC
3198
3199;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
3200;; fact that their usage need to guarantee that the source vectors are
3201;; contiguous. It would be wrong to describe the operation without being able
3202;; to describe the permute that is also required, but even if that is done
3203;; the permute would have been created as a LOAD_LANES which means the values
3204;; in the registers are in the wrong order.
3205(define_insn "neon_vcadd<rot><mode>"
3206 [(set (match_operand:VF 0 "register_operand" "=w")
3207 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
3208 (match_operand:VF 2 "register_operand" "w")]
3209 VCADD))]
3210 "TARGET_COMPLEX"
3211 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
3212 [(set_attr "type" "neon_fcadd")]
3213)
3214
3215(define_insn "neon_vcmla<rot><mode>"
3216 [(set (match_operand:VF 0 "register_operand" "=w")
3217 (plus:VF (match_operand:VF 1 "register_operand" "0")
3218 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
3219 (match_operand:VF 3 "register_operand" "w")]
3220 VCMLA)))]
3221 "TARGET_COMPLEX"
3222 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
3223 [(set_attr "type" "neon_fcmla")]
3224)
3225
3226(define_insn "neon_vcmla_lane<rot><mode>"
3227 [(set (match_operand:VF 0 "s_register_operand" "=w")
3228 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
3229 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
3230 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
3231 (match_operand:SI 4 "const_int_operand" "n")]
3232 VCMLA)))]
3233 "TARGET_COMPLEX"
3234 {
ee8045e5 3235 operands = neon_vcmla_lane_prepare_operands (operands);
c2b7062d
TC
3236 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3237 }
3238 [(set_attr "type" "neon_fcmla")]
3239)
3240
3241(define_insn "neon_vcmla_laneq<rot><mode>"
3242 [(set (match_operand:VDF 0 "s_register_operand" "=w")
3243 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3244 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3245 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3246 (match_operand:SI 4 "const_int_operand" "n")]
3247 VCMLA)))]
3248 "TARGET_COMPLEX"
3249 {
ee8045e5 3250 operands = neon_vcmla_lane_prepare_operands (operands);
c2b7062d
TC
3251 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3252 }
3253 [(set_attr "type" "neon_fcmla")]
3254)
3255
3256(define_insn "neon_vcmlaq_lane<rot><mode>"
3257 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3258 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3259 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3260 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3261 (match_operand:SI 4 "const_int_operand" "n")]
3262 VCMLA)))]
3263 "TARGET_COMPLEX"
3264 {
ee8045e5 3265 operands = neon_vcmla_lane_prepare_operands (operands);
c2b7062d
TC
3266 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3267 }
3268 [(set_attr "type" "neon_fcmla")]
3269)
3270
3271
f8e109ba
TC
3272;; These instructions map to the __builtins for the Dot Product operations.
3273(define_insn "neon_<sup>dot<vsi2qi>"
3274 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3275 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3276 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3277 "register_operand" "w")
3278 (match_operand:<VSI2QI> 3
3279 "register_operand" "w")]
3280 DOTPROD)))]
3281 "TARGET_DOTPROD"
3282 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
51e6029f 3283 [(set_attr "type" "neon_dot<q>")]
f8e109ba
TC
3284)
3285
f348846e
SMW
3286;; These instructions map to the __builtins for the Dot Product operations.
3287(define_insn "neon_usdot<vsi2qi>"
3288 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3289 (plus:VCVTI
3290 (unspec:VCVTI
3291 [(match_operand:<VSI2QI> 2 "register_operand" "w")
3292 (match_operand:<VSI2QI> 3 "register_operand" "w")]
3293 UNSPEC_DOT_US)
3294 (match_operand:VCVTI 1 "register_operand" "0")))]
3295 "TARGET_I8MM"
3296 "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3297 [(set_attr "type" "neon_dot<q>")]
3298)
3299
f8e109ba
TC
3300;; These instructions map to the __builtins for the Dot Product
3301;; indexed operations.
3302(define_insn "neon_<sup>dot_lane<vsi2qi>"
3303 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3304 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3305 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3306 "register_operand" "w")
3307 (match_operand:V8QI 3 "register_operand" "t")
3308 (match_operand:SI 4 "immediate_operand" "i")]
3309 DOTPROD)))]
3310 "TARGET_DOTPROD"
3311 {
3312 operands[4]
3313 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3314 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3315 }
51e6029f 3316 [(set_attr "type" "neon_dot<q>")]
f8e109ba
TC
3317)
3318
f348846e
SMW
3319;; These instructions map to the __builtins for the Dot Product
3320;; indexed operations in the v8.6 I8MM extension.
3321(define_insn "neon_<sup>dot_lane<vsi2qi>"
3322 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3323 (plus:VCVTI
3324 (unspec:VCVTI
3325 [(match_operand:<VSI2QI> 2 "register_operand" "w")
3326 (match_operand:V8QI 3 "register_operand" "t")
3327 (match_operand:SI 4 "immediate_operand" "i")]
3328 DOTPROD_I8MM)
3329 (match_operand:VCVTI 1 "register_operand" "0")))]
3330 "TARGET_I8MM"
3331 {
3332 operands[4] = GEN_INT (INTVAL (operands[4]));
3333 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3334 }
3335 [(set_attr "type" "neon_dot<q>")]
3336)
3337
f8e109ba
TC
3338;; These expands map to the Dot Product optab the vectorizer checks for.
3339;; The auto-vectorizer expects a dot product builtin that also does an
3340;; accumulation into the provided register.
3341;; Given the following pattern
3342;;
3343;; for (i=0; i<len; i++) {
3344;; c = a[i] * b[i];
3345;; r += c;
3346;; }
3347;; return result;
3348;;
3349;; This can be auto-vectorized to
3350;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3351;;
3352;; given enough iterations. However the vectorizer can keep unrolling the loop
3353;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3354;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3355;; ...
3356;;
3357;; and so the vectorizer provides r, in which the result has to be accumulated.
3358(define_expand "<sup>dot_prod<vsi2qi>"
3359 [(set (match_operand:VCVTI 0 "register_operand")
3360 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3361 "register_operand")
3362 (match_operand:<VSI2QI> 2
3363 "register_operand")]
3364 DOTPROD)
3365 (match_operand:VCVTI 3 "register_operand")))]
3366 "TARGET_DOTPROD"
3367{
3368 emit_insn (
3369 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3370 operands[2]));
3371 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3372 DONE;
3373})
3374
97f518b3
JW
3375(define_expand "neon_copysignf<mode>"
3376 [(match_operand:VCVTF 0 "register_operand")
3377 (match_operand:VCVTF 1 "register_operand")
3378 (match_operand:VCVTF 2 "register_operand")]
3379 "TARGET_NEON"
3380 "{
3381 rtx v_bitmask_cast;
3382 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
4199c859 3383 rtx c = gen_int_mode (0x80000000, SImode);
97f518b3
JW
3384
3385 emit_move_insn (v_bitmask,
59d06c05 3386 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
97f518b3
JW
3387 emit_move_insn (operands[0], operands[2]);
3388 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3389 <VCVTF:V_cmp_result>mode, 0);
3390 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3391 operands[1]));
3392
3393 DONE;
3394 }"
3395)
3396
88f77cba
JB
3397(define_insn "neon_vqneg<mode>"
3398 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
94f0f2cc 3399 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
88f77cba
JB
3400 UNSPEC_VQNEG))]
3401 "TARGET_NEON"
c956e102 3402 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3403 [(set_attr "type" "neon_qneg<q>")]
c956e102 3404)
88f77cba
JB
3405
3406(define_insn "neon_vcls<mode>"
3407 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
94f0f2cc 3408 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
88f77cba
JB
3409 UNSPEC_VCLS))]
3410 "TARGET_NEON"
c956e102 3411 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3412 [(set_attr "type" "neon_cls<q>")]
c956e102 3413)
88f77cba 3414
b3b7bbce 3415(define_insn "clz<mode>2"
88f77cba 3416 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
b3b7bbce 3417 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
88f77cba 3418 "TARGET_NEON"
c956e102 3419 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3420 [(set_attr "type" "neon_cnt<q>")]
c956e102 3421)
88f77cba 3422
b3b7bbce 3423(define_expand "neon_vclz<mode>"
cd65e265
DZ
3424 [(match_operand:VDQIW 0 "s_register_operand")
3425 (match_operand:VDQIW 1 "s_register_operand")]
b3b7bbce
SL
3426 "TARGET_NEON"
3427{
3428 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3429 DONE;
3430})
3431
3432(define_insn "popcount<mode>2"
88f77cba 3433 [(set (match_operand:VE 0 "s_register_operand" "=w")
b3b7bbce 3434 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
88f77cba 3435 "TARGET_NEON"
c956e102 3436 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3437 [(set_attr "type" "neon_cnt<q>")]
c956e102 3438)
88f77cba 3439
b3b7bbce 3440(define_expand "neon_vcnt<mode>"
cd65e265
DZ
3441 [(match_operand:VE 0 "s_register_operand")
3442 (match_operand:VE 1 "s_register_operand")]
b3b7bbce
SL
3443 "TARGET_NEON"
3444{
3445 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3446 DONE;
3447})
3448
55a9b91b
MW
3449(define_insn "neon_vrecpe<mode>"
3450 [(set (match_operand:VH 0 "s_register_operand" "=w")
3451 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3452 UNSPEC_VRECPE))]
3453 "TARGET_NEON_FP16INST"
3454 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3455 [(set_attr "type" "neon_fp_recpe_s<q>")]
3456)
3457
88f77cba
JB
3458(define_insn "neon_vrecpe<mode>"
3459 [(set (match_operand:V32 0 "s_register_operand" "=w")
94f0f2cc 3460 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
88f77cba
JB
3461 UNSPEC_VRECPE))]
3462 "TARGET_NEON"
c956e102 3463 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3464 [(set_attr "type" "neon_fp_recpe_s<q>")]
c956e102 3465)
88f77cba
JB
3466
3467(define_insn "neon_vrsqrte<mode>"
3468 [(set (match_operand:V32 0 "s_register_operand" "=w")
94f0f2cc 3469 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
88f77cba
JB
3470 UNSPEC_VRSQRTE))]
3471 "TARGET_NEON"
c956e102 3472 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3473 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
c956e102 3474)
88f77cba
JB
3475
3476(define_expand "neon_vmvn<mode>"
cd65e265
DZ
3477 [(match_operand:VDQIW 0 "s_register_operand")
3478 (match_operand:VDQIW 1 "s_register_operand")]
88f77cba
JB
3479 "TARGET_NEON"
3480{
3481 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3482 DONE;
3483})
3484
89ffa8fc
JB
3485(define_insn "neon_vget_lane<mode>_sext_internal"
3486 [(set (match_operand:SI 0 "s_register_operand" "=r")
3487 (sign_extend:SI
3488 (vec_select:<V_elem>
3489 (match_operand:VD 1 "s_register_operand" "w")
3490 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
88f77cba 3491 "TARGET_NEON"
874d42b9
JM
3492{
3493 if (BYTES_BIG_ENDIAN)
3494 {
3495 int elt = INTVAL (operands[2]);
3496 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3497 operands[2] = GEN_INT (elt);
3498 }
c3b1709a 3499 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
874d42b9 3500}
f7379e5e 3501 [(set_attr "type" "neon_to_gp")]
c956e102 3502)
88f77cba 3503
89ffa8fc
JB
3504(define_insn "neon_vget_lane<mode>_zext_internal"
3505 [(set (match_operand:SI 0 "s_register_operand" "=r")
3506 (zero_extend:SI
3507 (vec_select:<V_elem>
3508 (match_operand:VD 1 "s_register_operand" "w")
3509 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3510 "TARGET_NEON"
874d42b9
JM
3511{
3512 if (BYTES_BIG_ENDIAN)
3513 {
3514 int elt = INTVAL (operands[2]);
3515 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3516 operands[2] = GEN_INT (elt);
3517 }
c3b1709a 3518 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
874d42b9 3519}
f7379e5e 3520 [(set_attr "type" "neon_to_gp")]
89ffa8fc 3521)
88f77cba 3522
89ffa8fc
JB
3523(define_insn "neon_vget_lane<mode>_sext_internal"
3524 [(set (match_operand:SI 0 "s_register_operand" "=r")
3525 (sign_extend:SI
3526 (vec_select:<V_elem>
4b644867 3527 (match_operand:VQ2 1 "s_register_operand" "w")
89ffa8fc 3528 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
88f77cba 3529 "TARGET_NEON"
b617fc71 3530{
89ffa8fc
JB
3531 rtx ops[3];
3532 int regno = REGNO (operands[1]);
3533 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3534 unsigned int elt = INTVAL (operands[2]);
874d42b9
JM
3535 unsigned int elt_adj = elt % halfelts;
3536
3537 if (BYTES_BIG_ENDIAN)
3538 elt_adj = halfelts - 1 - elt_adj;
89ffa8fc
JB
3539
3540 ops[0] = operands[0];
3541 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
874d42b9 3542 ops[2] = GEN_INT (elt_adj);
c3b1709a 3543 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
89ffa8fc
JB
3544
3545 return "";
b617fc71 3546}
f7379e5e 3547 [(set_attr "type" "neon_to_gp_q")]
c956e102 3548)
88f77cba 3549
89ffa8fc
JB
3550(define_insn "neon_vget_lane<mode>_zext_internal"
3551 [(set (match_operand:SI 0 "s_register_operand" "=r")
3552 (zero_extend:SI
3553 (vec_select:<V_elem>
4b644867 3554 (match_operand:VQ2 1 "s_register_operand" "w")
89ffa8fc 3555 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
88f77cba
JB
3556 "TARGET_NEON"
3557{
89ffa8fc 3558 rtx ops[3];
88f77cba
JB
3559 int regno = REGNO (operands[1]);
3560 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3561 unsigned int elt = INTVAL (operands[2]);
874d42b9
JM
3562 unsigned int elt_adj = elt % halfelts;
3563
3564 if (BYTES_BIG_ENDIAN)
3565 elt_adj = halfelts - 1 - elt_adj;
88f77cba
JB
3566
3567 ops[0] = operands[0];
3568 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
874d42b9 3569 ops[2] = GEN_INT (elt_adj);
c3b1709a 3570 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
88f77cba
JB
3571
3572 return "";
3573}
f7379e5e 3574 [(set_attr "type" "neon_to_gp_q")]
89ffa8fc
JB
3575)
3576
3577(define_expand "neon_vget_lane<mode>"
cd65e265
DZ
3578 [(match_operand:<V_ext> 0 "s_register_operand")
3579 (match_operand:VDQW 1 "s_register_operand")
3580 (match_operand:SI 2 "immediate_operand")]
89ffa8fc
JB
3581 "TARGET_NEON"
3582{
874d42b9
JM
3583 if (BYTES_BIG_ENDIAN)
3584 {
3585 /* The intrinsics are defined in terms of a model where the
3586 element ordering in memory is vldm order, whereas the generic
3587 RTL is defined in terms of a model where the element ordering
3588 in memory is array order. Convert the lane number to conform
3589 to this model. */
3590 unsigned int elt = INTVAL (operands[2]);
3591 unsigned int reg_nelts
6c825cd4 3592 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
874d42b9
JM
3593 elt ^= reg_nelts - 1;
3594 operands[2] = GEN_INT (elt);
3595 }
3596
6c825cd4 3597 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
ff03930a
JJ
3598 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3599 operands[2]));
89ffa8fc 3600 else
94f0f2cc
JG
3601 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3602 operands[1],
3603 operands[2]));
3604 DONE;
3605})
3606
3607(define_expand "neon_vget_laneu<mode>"
cd65e265
DZ
3608 [(match_operand:<V_ext> 0 "s_register_operand")
3609 (match_operand:VDQIW 1 "s_register_operand")
3610 (match_operand:SI 2 "immediate_operand")]
94f0f2cc
JG
3611 "TARGET_NEON"
3612{
94f0f2cc 3613 if (BYTES_BIG_ENDIAN)
89ffa8fc 3614 {
94f0f2cc
JG
3615 /* The intrinsics are defined in terms of a model where the
3616 element ordering in memory is vldm order, whereas the generic
3617 RTL is defined in terms of a model where the element ordering
3618 in memory is array order. Convert the lane number to conform
3619 to this model. */
3620 unsigned int elt = INTVAL (operands[2]);
3621 unsigned int reg_nelts
6c825cd4 3622 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
94f0f2cc
JG
3623 elt ^= reg_nelts - 1;
3624 operands[2] = GEN_INT (elt);
89ffa8fc 3625 }
94f0f2cc 3626
6c825cd4 3627 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
ff03930a
JJ
3628 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3629 operands[2]));
94f0f2cc
JG
3630 else
3631 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3632 operands[1],
3633 operands[2]));
89ffa8fc
JB
3634 DONE;
3635})
3636
a277dd9b 3637(define_expand "neon_vget_lanedi"
cd65e265
DZ
3638 [(match_operand:DI 0 "s_register_operand")
3639 (match_operand:DI 1 "s_register_operand")
3640 (match_operand:SI 2 "immediate_operand")]
89ffa8fc
JB
3641 "TARGET_NEON"
3642{
a277dd9b
SL
3643 emit_move_insn (operands[0], operands[1]);
3644 DONE;
3645})
88f77cba 3646
a277dd9b 3647(define_expand "neon_vget_lanev2di"
cd65e265
DZ
3648 [(match_operand:DI 0 "s_register_operand")
3649 (match_operand:V2DI 1 "s_register_operand")
3650 (match_operand:SI 2 "immediate_operand")]
88f77cba
JB
3651 "TARGET_NEON"
3652{
69b23ad6
CL
3653 int lane;
3654
3655if (BYTES_BIG_ENDIAN)
3656 {
3657 /* The intrinsics are defined in terms of a model where the
3658 element ordering in memory is vldm order, whereas the generic
3659 RTL is defined in terms of a model where the element ordering
3660 in memory is array order. Convert the lane number to conform
3661 to this model. */
3662 unsigned int elt = INTVAL (operands[2]);
3663 unsigned int reg_nelts = 2;
3664 elt ^= reg_nelts - 1;
3665 operands[2] = GEN_INT (elt);
3666 }
3667
3668 lane = INTVAL (operands[2]);
eaa80f64
AL
3669 gcc_assert ((lane ==0) || (lane == 1));
3670 emit_move_insn (operands[0], lane == 0
3671 ? gen_lowpart (DImode, operands[1])
3672 : gen_highpart (DImode, operands[1]));
a277dd9b
SL
3673 DONE;
3674})
b617fc71 3675
a277dd9b 3676(define_expand "neon_vset_lane<mode>"
cd65e265
DZ
3677 [(match_operand:VDQ 0 "s_register_operand")
3678 (match_operand:<V_elem> 1 "s_register_operand")
3679 (match_operand:VDQ 2 "s_register_operand")
3680 (match_operand:SI 3 "immediate_operand")]
88f77cba
JB
3681 "TARGET_NEON"
3682{
88f77cba
JB
3683 unsigned int elt = INTVAL (operands[3]);
3684
a277dd9b
SL
3685 if (BYTES_BIG_ENDIAN)
3686 {
3687 unsigned int reg_nelts
6c825cd4 3688 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
a277dd9b
SL
3689 elt ^= reg_nelts - 1;
3690 }
b617fc71 3691
a277dd9b
SL
3692 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3693 GEN_INT (1 << elt), operands[2]));
3694 DONE;
3695})
88f77cba 3696
a277dd9b 3697; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
88f77cba 3698
a277dd9b 3699(define_expand "neon_vset_lanedi"
cd65e265
DZ
3700 [(match_operand:DI 0 "s_register_operand")
3701 (match_operand:DI 1 "s_register_operand")
3702 (match_operand:DI 2 "s_register_operand")
3703 (match_operand:SI 3 "immediate_operand")]
88f77cba
JB
3704 "TARGET_NEON"
3705{
a277dd9b
SL
3706 emit_move_insn (operands[0], operands[1]);
3707 DONE;
3708})
88f77cba
JB
3709
3710(define_expand "neon_vcreate<mode>"
cd65e265
DZ
3711 [(match_operand:VD_RE 0 "s_register_operand")
3712 (match_operand:DI 1 "general_operand")]
88f77cba
JB
3713 "TARGET_NEON"
3714{
3715 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3716 emit_move_insn (operands[0], src);
3717 DONE;
3718})
3719
3720(define_insn "neon_vdup_n<mode>"
814a4c3b 3721 [(set (match_operand:VX 0 "s_register_operand" "=w")
a277dd9b 3722 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
88f77cba 3723 "TARGET_NEON"
c3b1709a 3724 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
f7379e5e 3725 [(set_attr "type" "neon_from_gp<q>")]
c956e102 3726)
88f77cba 3727
92422235
CL
3728(define_insn "neon_vdup_nv4hf"
3729 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3730 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3731 "TARGET_NEON"
3732 "vdup.16\t%P0, %1"
3733 [(set_attr "type" "neon_from_gp")]
3734)
3735
3736(define_insn "neon_vdup_nv8hf"
3737 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3738 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3739 "TARGET_NEON"
3740 "vdup.16\t%q0, %1"
3741 [(set_attr "type" "neon_from_gp_q")]
3742)
3743
17a13507
MI
3744(define_insn "neon_vdup_nv4bf"
3745 [(set (match_operand:V4BF 0 "s_register_operand" "=w")
3746 (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))]
3747 "TARGET_NEON"
3748 "vdup.16\t%P0, %1"
3749 [(set_attr "type" "neon_from_gp")]
3750)
3751
3752(define_insn "neon_vdup_nv8bf"
3753 [(set (match_operand:V8BF 0 "s_register_operand" "=w")
3754 (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))]
3755 "TARGET_NEON"
3756 "vdup.16\t%q0, %1"
3757 [(set_attr "type" "neon_from_gp_q")]
3758)
3759
814a4c3b
DJ
3760(define_insn "neon_vdup_n<mode>"
3761 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
a277dd9b 3762 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
814a4c3b
DJ
3763 "TARGET_NEON"
3764 "@
c3b1709a
RR
3765 vdup.<V_sz_elem>\t%<V_reg>0, %1
3766 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
f7379e5e 3767 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
814a4c3b
DJ
3768)
3769
a277dd9b 3770(define_expand "neon_vdup_ndi"
cd65e265
DZ
3771 [(match_operand:DI 0 "s_register_operand")
3772 (match_operand:DI 1 "s_register_operand")]
88f77cba 3773 "TARGET_NEON"
a277dd9b
SL
3774{
3775 emit_move_insn (operands[0], operands[1]);
3776 DONE;
3777}
c956e102 3778)
88f77cba
JB
3779
3780(define_insn "neon_vdup_nv2di"
a277dd9b
SL
3781 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3782 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
88f77cba 3783 "TARGET_NEON"
a277dd9b 3784 "@
c3b1709a
RR
3785 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3786 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3787 [(set_attr "length" "8")
f7379e5e 3788 (set_attr "type" "multiple")]
c956e102 3789)
88f77cba 3790
a277dd9b
SL
3791(define_insn "neon_vdup_lane<mode>_internal"
3792 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3793 (vec_duplicate:VDQW
3794 (vec_select:<V_elem>
3795 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3796 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
88f77cba 3797 "TARGET_NEON"
b617fc71 3798{
a277dd9b
SL
3799 if (BYTES_BIG_ENDIAN)
3800 {
3801 int elt = INTVAL (operands[2]);
3802 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3803 operands[2] = GEN_INT (elt);
3804 }
3805 if (<Is_d_reg>)
3806 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3807 else
3808 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
b617fc71 3809}
f7379e5e 3810 [(set_attr "type" "neon_dup<q>")]
c956e102 3811)
88f77cba 3812
b1a970a5 3813(define_insn "neon_vdup_lane<mode>_internal"
17a13507
MI
3814 [(set (match_operand:VHFBF 0 "s_register_operand" "=w")
3815 (vec_duplicate:VHFBF
b1a970a5
MW
3816 (vec_select:<V_elem>
3817 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3818 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
17a13507 3819 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
b1a970a5
MW
3820{
3821 if (BYTES_BIG_ENDIAN)
3822 {
3823 int elt = INTVAL (operands[2]);
3824 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3825 operands[2] = GEN_INT (elt);
3826 }
3827 if (<Is_d_reg>)
3828 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3829 else
3830 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3831}
3832 [(set_attr "type" "neon_dup<q>")]
3833)
3834
a277dd9b 3835(define_expand "neon_vdup_lane<mode>"
cd65e265
DZ
3836 [(match_operand:VDQW 0 "s_register_operand")
3837 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3838 (match_operand:SI 2 "immediate_operand")]
88f77cba 3839 "TARGET_NEON"
b617fc71 3840{
a277dd9b
SL
3841 if (BYTES_BIG_ENDIAN)
3842 {
3843 unsigned int elt = INTVAL (operands[2]);
3844 unsigned int reg_nelts
6c825cd4 3845 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
a277dd9b
SL
3846 elt ^= reg_nelts - 1;
3847 operands[2] = GEN_INT (elt);
3848 }
3849 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3850 operands[2]));
3851 DONE;
3852})
88f77cba 3853
b1a970a5 3854(define_expand "neon_vdup_lane<mode>"
17a13507 3855 [(match_operand:VHFBF 0 "s_register_operand")
b1a970a5
MW
3856 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3857 (match_operand:SI 2 "immediate_operand")]
17a13507 3858 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
b1a970a5
MW
3859{
3860 if (BYTES_BIG_ENDIAN)
3861 {
3862 unsigned int elt = INTVAL (operands[2]);
3863 unsigned int reg_nelts
3864 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3865 elt ^= reg_nelts - 1;
3866 operands[2] = GEN_INT (elt);
3867 }
3868 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3869 operands[2]));
3870 DONE;
3871})
3872
88f77cba
JB
3873; Scalar index is ignored, since only zero is valid here.
3874(define_expand "neon_vdup_lanedi"
cd65e265
DZ
3875 [(match_operand:DI 0 "s_register_operand")
3876 (match_operand:DI 1 "s_register_operand")
3877 (match_operand:SI 2 "immediate_operand")]
88f77cba
JB
3878 "TARGET_NEON"
3879{
3880 emit_move_insn (operands[0], operands[1]);
3881 DONE;
3882})
3883
a277dd9b
SL
3884; Likewise for v2di, as the DImode second operand has only a single element.
3885(define_expand "neon_vdup_lanev2di"
cd65e265
DZ
3886 [(match_operand:V2DI 0 "s_register_operand")
3887 (match_operand:DI 1 "s_register_operand")
3888 (match_operand:SI 2 "immediate_operand")]
88f77cba 3889 "TARGET_NEON"
b617fc71 3890{
a277dd9b
SL
3891 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3892 DONE;
3893})
88f77cba 3894
b440f324
RH
3895; Disabled before reload because we don't want combine doing something silly,
3896; but used by the post-reload expansion of neon_vcombine.
3897(define_insn "*neon_vswp<mode>"
3898 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3899 (match_operand:VDQX 1 "s_register_operand" "+w"))
3900 (set (match_dup 1) (match_dup 0))]
3901 "TARGET_NEON && reload_completed"
dc2c7a52 3902 "vswp\t%<V_reg>0, %<V_reg>1"
f7379e5e 3903 [(set_attr "type" "neon_permute<q>")]
b440f324
RH
3904)
3905
88f77cba
JB
3906;; In this insn, operand 1 should be low, and operand 2 the high part of the
3907;; dest vector.
3908;; FIXME: A different implementation of this builtin could make it much
3909;; more likely that we wouldn't actually need to output anything (we could make
3910;; it so that the reg allocator puts things in the right places magically
3911;; instead). Lack of subregs for vectors makes that tricky though, I think.
3912
b440f324 3913(define_insn_and_split "neon_vcombine<mode>"
88f77cba 3914 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
b440f324
RH
3915 (vec_concat:<V_DOUBLE>
3916 (match_operand:VDX 1 "s_register_operand" "w")
3917 (match_operand:VDX 2 "s_register_operand" "w")))]
88f77cba 3918 "TARGET_NEON"
b440f324
RH
3919 "#"
3920 "&& reload_completed"
3921 [(const_int 0)]
88f77cba 3922{
b440f324
RH
3923 neon_split_vcombine (operands);
3924 DONE;
f7379e5e
JG
3925}
3926[(set_attr "type" "multiple")]
3927)
88f77cba 3928
ddfd2edf
RS
3929(define_expand "neon_vget_high<mode>"
3930 [(match_operand:<V_HALF> 0 "s_register_operand")
2d22ab64 3931 (match_operand:VQXBF 1 "s_register_operand")]
a277dd9b
SL
3932 "TARGET_NEON"
3933{
ddfd2edf
RS
3934 emit_move_insn (operands[0],
3935 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3936 GET_MODE_SIZE (<V_HALF>mode)));
3937 DONE;
3938})
a277dd9b 3939
ddfd2edf
RS
3940(define_expand "neon_vget_low<mode>"
3941 [(match_operand:<V_HALF> 0 "s_register_operand")
3942 (match_operand:VQX 1 "s_register_operand")]
88f77cba
JB
3943 "TARGET_NEON"
3944{
ddfd2edf
RS
3945 emit_move_insn (operands[0],
3946 simplify_gen_subreg (<V_HALF>mode, operands[1],
3947 <MODE>mode, 0));
3948 DONE;
3949})
88f77cba 3950
5bf4dcf2
DP
3951(define_insn "float<mode><V_cvtto>2"
3952 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3953 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3954 "TARGET_NEON && !flag_rounding_math"
3955 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3956 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
5bf4dcf2
DP
3957)
3958
3959(define_insn "floatuns<mode><V_cvtto>2"
3960 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3961 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3962 "TARGET_NEON && !flag_rounding_math"
3963 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3964 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
5bf4dcf2
DP
3965)
3966
3967(define_insn "fix_trunc<mode><V_cvtto>2"
3968 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3969 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3970 "TARGET_NEON"
3971 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3972 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
5bf4dcf2
DP
3973)
3974
3975(define_insn "fixuns_trunc<mode><V_cvtto>2"
3976 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3977 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3978 "TARGET_NEON"
3979 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3980 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
5bf4dcf2
DP
3981)
3982
94f0f2cc 3983(define_insn "neon_vcvt<sup><mode>"
88f77cba 3984 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
94f0f2cc
JG
3985 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3986 VCVT_US))]
88f77cba 3987 "TARGET_NEON"
94f0f2cc 3988 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3989 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
c956e102 3990)
88f77cba 3991
94f0f2cc 3992(define_insn "neon_vcvt<sup><mode>"
88f77cba 3993 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
94f0f2cc
JG
3994 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3995 VCVT_US))]
88f77cba 3996 "TARGET_NEON"
94f0f2cc 3997 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3998 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
c956e102 3999)
88f77cba 4000
5819f96f
KT
4001(define_insn "neon_vcvtv4sfv4hf"
4002 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4003 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4004 UNSPEC_VCVT))]
4005 "TARGET_NEON && TARGET_FP16"
4006 "vcvt.f32.f16\t%q0, %P1"
f7379e5e 4007 [(set_attr "type" "neon_fp_cvt_widen_h")]
5819f96f
KT
4008)
4009
4010(define_insn "neon_vcvtv4hfv4sf"
4011 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4012 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4013 UNSPEC_VCVT))]
4014 "TARGET_NEON && TARGET_FP16"
4015 "vcvt.f16.f32\t%P0, %q1"
f7379e5e 4016 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
5819f96f
KT
4017)
4018
55a9b91b
MW
4019(define_insn "neon_vcvt<sup><mode>"
4020 [(set
4021 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4022 (unspec:<VH_CVTTO>
4023 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4024 VCVT_US))]
4025 "TARGET_NEON_FP16INST"
4026 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4027 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4028)
4029
4030(define_insn "neon_vcvt<sup><mode>"
4031 [(set
4032 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4033 (unspec:<VH_CVTTO>
4034 [(match_operand:VH 1 "s_register_operand" "w")]
4035 VCVT_US))]
4036 "TARGET_NEON_FP16INST"
4037 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4038 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4039)
4040
94f0f2cc 4041(define_insn "neon_vcvt<sup>_n<mode>"
88f77cba
JB
4042 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4043 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
94f0f2cc
JG
4044 (match_operand:SI 2 "immediate_operand" "i")]
4045 VCVT_US_N))]
88f77cba 4046 "TARGET_NEON"
b617fc71 4047{
d57daa0c 4048 arm_const_bounds (operands[2], 1, 33);
94f0f2cc 4049 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 4050}
f7379e5e 4051 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
c956e102 4052)
88f77cba 4053
55a9b91b
MW
4054(define_insn "neon_vcvt<sup>_n<mode>"
4055 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4056 (unspec:<VH_CVTTO>
4057 [(match_operand:VH 1 "s_register_operand" "w")
4058 (match_operand:SI 2 "immediate_operand" "i")]
4059 VCVT_US_N))]
4060 "TARGET_NEON_FP16INST"
4061{
d57daa0c 4062 arm_const_bounds (operands[2], 0, 17);
55a9b91b
MW
4063 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4064}
4065 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4066)
4067
94f0f2cc 4068(define_insn "neon_vcvt<sup>_n<mode>"
88f77cba
JB
4069 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4070 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
94f0f2cc
JG
4071 (match_operand:SI 2 "immediate_operand" "i")]
4072 VCVT_US_N))]
88f77cba 4073 "TARGET_NEON"
b617fc71 4074{
d57daa0c 4075 arm_const_bounds (operands[2], 1, 33);
94f0f2cc 4076 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 4077}
f7379e5e 4078 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
c956e102 4079)
88f77cba 4080
55a9b91b
MW
4081(define_insn "neon_vcvt<sup>_n<mode>"
4082 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4083 (unspec:<VH_CVTTO>
4084 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4085 (match_operand:SI 2 "immediate_operand" "i")]
4086 VCVT_US_N))]
4087 "TARGET_NEON_FP16INST"
4088{
d57daa0c 4089 arm_const_bounds (operands[2], 0, 17);
55a9b91b
MW
4090 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4091}
4092 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4093)
4094
4095(define_insn "neon_vcvt<vcvth_op><sup><mode>"
4096 [(set
4097 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4098 (unspec:<VH_CVTTO>
4099 [(match_operand:VH 1 "s_register_operand" "w")]
4100 VCVT_HF_US))]
4101 "TARGET_NEON_FP16INST"
4102 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4103 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4104)
4105
88f77cba
JB
4106(define_insn "neon_vmovn<mode>"
4107 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
94f0f2cc 4108 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
88f77cba
JB
4109 UNSPEC_VMOVN))]
4110 "TARGET_NEON"
c956e102 4111 "vmovn.<V_if_elem>\t%P0, %q1"
f7379e5e 4112 [(set_attr "type" "neon_shift_imm_narrow_q")]
c956e102 4113)
88f77cba 4114
94f0f2cc 4115(define_insn "neon_vqmovn<sup><mode>"
88f77cba 4116 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
94f0f2cc
JG
4117 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4118 VQMOVN))]
88f77cba 4119 "TARGET_NEON"
94f0f2cc 4120 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
f7379e5e 4121 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
c956e102 4122)
88f77cba
JB
4123
4124(define_insn "neon_vqmovun<mode>"
4125 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
94f0f2cc 4126 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
88f77cba
JB
4127 UNSPEC_VQMOVUN))]
4128 "TARGET_NEON"
c956e102 4129 "vqmovun.<V_s_elem>\t%P0, %q1"
f7379e5e 4130 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
c956e102 4131)
88f77cba 4132
94f0f2cc 4133(define_insn "neon_vmovl<sup><mode>"
88f77cba 4134 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
94f0f2cc
JG
4135 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4136 VMOVL))]
88f77cba 4137 "TARGET_NEON"
94f0f2cc 4138 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
f7379e5e 4139 [(set_attr "type" "neon_shift_imm_long")]
c956e102 4140)
88f77cba
JB
4141
4142(define_insn "neon_vmul_lane<mode>"
4143 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4144 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4145 (match_operand:VMD 2 "s_register_operand"
4146 "<scalar_mul_constraint>")
94f0f2cc 4147 (match_operand:SI 3 "immediate_operand" "i")]
88f77cba
JB
4148 UNSPEC_VMUL_LANE))]
4149 "TARGET_NEON"
b617fc71 4150{
b617fc71
JB
4151 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4152}
003bb7f3 4153 [(set (attr "type")
b75b1be2 4154 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4155 (const_string "neon_fp_mul_s_scalar<q>")
4156 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
c956e102 4157)
88f77cba
JB
4158
4159(define_insn "neon_vmul_lane<mode>"
4160 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4161 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4162 (match_operand:<V_HALF> 2 "s_register_operand"
4163 "<scalar_mul_constraint>")
94f0f2cc 4164 (match_operand:SI 3 "immediate_operand" "i")]
88f77cba
JB
4165 UNSPEC_VMUL_LANE))]
4166 "TARGET_NEON"
b617fc71 4167{
b617fc71
JB
4168 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4169}
003bb7f3 4170 [(set (attr "type")
b75b1be2 4171 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4172 (const_string "neon_fp_mul_s_scalar<q>")
4173 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
c956e102 4174)
88f77cba 4175
55a9b91b
MW
4176(define_insn "neon_vmul_lane<mode>"
4177 [(set (match_operand:VH 0 "s_register_operand" "=w")
4178 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4179 (match_operand:V4HF 2 "s_register_operand"
4180 "<scalar_mul_constraint>")
4181 (match_operand:SI 3 "immediate_operand" "i")]
4182 UNSPEC_VMUL_LANE))]
4183 "TARGET_NEON_FP16INST"
4184 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4185 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4186)
4187
94f0f2cc 4188(define_insn "neon_vmull<sup>_lane<mode>"
88f77cba
JB
4189 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4190 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4191 (match_operand:VMDI 2 "s_register_operand"
4192 "<scalar_mul_constraint>")
94f0f2cc
JG
4193 (match_operand:SI 3 "immediate_operand" "i")]
4194 VMULL_LANE))]
88f77cba 4195 "TARGET_NEON"
b617fc71 4196{
94f0f2cc 4197 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
b617fc71 4198}
f7379e5e 4199 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
c956e102 4200)
88f77cba
JB
4201
4202(define_insn "neon_vqdmull_lane<mode>"
4203 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4204 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4205 (match_operand:VMDI 2 "s_register_operand"
4206 "<scalar_mul_constraint>")
94f0f2cc 4207 (match_operand:SI 3 "immediate_operand" "i")]
88f77cba
JB
4208 UNSPEC_VQDMULL_LANE))]
4209 "TARGET_NEON"
b617fc71 4210{
b617fc71
JB
4211 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4212}
f7379e5e 4213 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
c956e102 4214)
88f77cba 4215
94f0f2cc 4216(define_insn "neon_vq<r>dmulh_lane<mode>"
88f77cba
JB
4217 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4218 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4219 (match_operand:<V_HALF> 2 "s_register_operand"
4220 "<scalar_mul_constraint>")
94f0f2cc
JG
4221 (match_operand:SI 3 "immediate_operand" "i")]
4222 VQDMULH_LANE))]
88f77cba 4223 "TARGET_NEON"
b617fc71 4224{
94f0f2cc 4225 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
b617fc71 4226}
f7379e5e 4227 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
c956e102 4228)
88f77cba 4229
94f0f2cc 4230(define_insn "neon_vq<r>dmulh_lane<mode>"
88f77cba
JB
4231 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4232 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4233 (match_operand:VMDI 2 "s_register_operand"
4234 "<scalar_mul_constraint>")
94f0f2cc
JG
4235 (match_operand:SI 3 "immediate_operand" "i")]
4236 VQDMULH_LANE))]
88f77cba 4237 "TARGET_NEON"
b617fc71 4238{
94f0f2cc 4239 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
b617fc71 4240}
f7379e5e 4241 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
c956e102 4242)
88f77cba 4243
5f2ca3b2
MW
4244;; vqrdmlah_lane, vqrdmlsh_lane
4245(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4246 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4247 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4248 (match_operand:VMQI 2 "s_register_operand" "w")
4249 (match_operand:<V_HALF> 3 "s_register_operand"
4250 "<scalar_mul_constraint>")
4251 (match_operand:SI 4 "immediate_operand" "i")]
4252 VQRDMLH_AS))]
4253 "TARGET_NEON_RDMA"
4254{
4255 return
4256 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4257}
4258 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4259)
4260
4261(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4262 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4263 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4264 (match_operand:VMDI 2 "s_register_operand" "w")
4265 (match_operand:VMDI 3 "s_register_operand"
4266 "<scalar_mul_constraint>")
4267 (match_operand:SI 4 "immediate_operand" "i")]
4268 VQRDMLH_AS))]
4269 "TARGET_NEON_RDMA"
4270{
4271 return
4272 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4273}
4274 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4275)
4276
88f77cba
JB
4277(define_insn "neon_vmla_lane<mode>"
4278 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4279 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4280 (match_operand:VMD 2 "s_register_operand" "w")
4281 (match_operand:VMD 3 "s_register_operand"
4282 "<scalar_mul_constraint>")
94f0f2cc 4283 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4284 UNSPEC_VMLA_LANE))]
4285 "TARGET_NEON"
b617fc71 4286{
b617fc71
JB
4287 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4288}
003bb7f3 4289 [(set (attr "type")
b75b1be2 4290 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4291 (const_string "neon_fp_mla_s_scalar<q>")
4292 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
c956e102 4293)
88f77cba
JB
4294
4295(define_insn "neon_vmla_lane<mode>"
4296 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4297 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4298 (match_operand:VMQ 2 "s_register_operand" "w")
4299 (match_operand:<V_HALF> 3 "s_register_operand"
4300 "<scalar_mul_constraint>")
94f0f2cc 4301 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4302 UNSPEC_VMLA_LANE))]
4303 "TARGET_NEON"
b617fc71 4304{
b617fc71
JB
4305 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4306}
003bb7f3 4307 [(set (attr "type")
b75b1be2 4308 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4309 (const_string "neon_fp_mla_s_scalar<q>")
4310 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
c956e102 4311)
88f77cba 4312
94f0f2cc 4313(define_insn "neon_vmlal<sup>_lane<mode>"
88f77cba
JB
4314 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4315 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4316 (match_operand:VMDI 2 "s_register_operand" "w")
4317 (match_operand:VMDI 3 "s_register_operand"
4318 "<scalar_mul_constraint>")
94f0f2cc
JG
4319 (match_operand:SI 4 "immediate_operand" "i")]
4320 VMLAL_LANE))]
88f77cba 4321 "TARGET_NEON"
b617fc71 4322{
94f0f2cc 4323 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
b617fc71 4324}
f7379e5e 4325 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
c956e102 4326)
88f77cba
JB
4327
4328(define_insn "neon_vqdmlal_lane<mode>"
4329 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4330 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4331 (match_operand:VMDI 2 "s_register_operand" "w")
4332 (match_operand:VMDI 3 "s_register_operand"
4333 "<scalar_mul_constraint>")
94f0f2cc 4334 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4335 UNSPEC_VQDMLAL_LANE))]
4336 "TARGET_NEON"
b617fc71 4337{
b617fc71
JB
4338 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4339}
f7379e5e 4340 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
c956e102 4341)
88f77cba
JB
4342
4343(define_insn "neon_vmls_lane<mode>"
4344 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4345 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4346 (match_operand:VMD 2 "s_register_operand" "w")
4347 (match_operand:VMD 3 "s_register_operand"
4348 "<scalar_mul_constraint>")
94f0f2cc 4349 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4350 UNSPEC_VMLS_LANE))]
4351 "TARGET_NEON"
b617fc71 4352{
b617fc71
JB
4353 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4354}
003bb7f3 4355 [(set (attr "type")
b75b1be2 4356 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4357 (const_string "neon_fp_mla_s_scalar<q>")
4358 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
c956e102 4359)
88f77cba
JB
4360
4361(define_insn "neon_vmls_lane<mode>"
4362 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4363 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4364 (match_operand:VMQ 2 "s_register_operand" "w")
4365 (match_operand:<V_HALF> 3 "s_register_operand"
4366 "<scalar_mul_constraint>")
94f0f2cc 4367 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4368 UNSPEC_VMLS_LANE))]
4369 "TARGET_NEON"
b617fc71 4370{
b617fc71
JB
4371 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4372}
003bb7f3 4373 [(set (attr "type")
b75b1be2 4374 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4375 (const_string "neon_fp_mla_s_scalar<q>")
4376 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
c956e102 4377)
88f77cba 4378
94f0f2cc 4379(define_insn "neon_vmlsl<sup>_lane<mode>"
88f77cba
JB
4380 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4381 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4382 (match_operand:VMDI 2 "s_register_operand" "w")
4383 (match_operand:VMDI 3 "s_register_operand"
4384 "<scalar_mul_constraint>")
94f0f2cc
JG
4385 (match_operand:SI 4 "immediate_operand" "i")]
4386 VMLSL_LANE))]
88f77cba 4387 "TARGET_NEON"
b617fc71 4388{
94f0f2cc 4389 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
b617fc71 4390}
f7379e5e 4391 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
c956e102 4392)
88f77cba
JB
4393
4394(define_insn "neon_vqdmlsl_lane<mode>"
4395 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4396 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4397 (match_operand:VMDI 2 "s_register_operand" "w")
4398 (match_operand:VMDI 3 "s_register_operand"
4399 "<scalar_mul_constraint>")
94f0f2cc 4400 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4401 UNSPEC_VQDMLSL_LANE))]
4402 "TARGET_NEON"
b617fc71 4403{
b617fc71
JB
4404 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4405}
f7379e5e 4406 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
c956e102 4407)
88f77cba
JB
4408
4409; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4410; core register into a temp register, then use a scalar taken from that. This
4411; isn't an optimal solution if e.g. the scalar has just been read from memory
4412; or extracted from another vector. The latter case it's currently better to
4413; use the "_lane" variant, and the former case can probably be implemented
4414; using vld1_lane, but that hasn't been done yet.
4415
4416(define_expand "neon_vmul_n<mode>"
cd65e265
DZ
4417 [(match_operand:VMD 0 "s_register_operand")
4418 (match_operand:VMD 1 "s_register_operand")
4419 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4420 "TARGET_NEON"
4421{
4422 rtx tmp = gen_reg_rtx (<MODE>mode);
4423 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4424 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc 4425 const0_rtx));
88f77cba
JB
4426 DONE;
4427})
4428
4429(define_expand "neon_vmul_n<mode>"
cd65e265
DZ
4430 [(match_operand:VMQ 0 "s_register_operand")
4431 (match_operand:VMQ 1 "s_register_operand")
4432 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4433 "TARGET_NEON"
4434{
4435 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4436 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4437 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc 4438 const0_rtx));
88f77cba
JB
4439 DONE;
4440})
4441
55a9b91b
MW
4442(define_expand "neon_vmul_n<mode>"
4443 [(match_operand:VH 0 "s_register_operand")
4444 (match_operand:VH 1 "s_register_operand")
4445 (match_operand:<V_elem> 2 "s_register_operand")]
4446 "TARGET_NEON_FP16INST"
4447{
4448 rtx tmp = gen_reg_rtx (V4HFmode);
4449 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4450 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4451 const0_rtx));
4452 DONE;
4453})
4454
94f0f2cc 4455(define_expand "neon_vmulls_n<mode>"
cd65e265
DZ
4456 [(match_operand:<V_widen> 0 "s_register_operand")
4457 (match_operand:VMDI 1 "s_register_operand")
4458 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4459 "TARGET_NEON"
4460{
4461 rtx tmp = gen_reg_rtx (<MODE>mode);
4462 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
94f0f2cc
JG
4463 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4464 const0_rtx));
4465 DONE;
4466})
4467
4468(define_expand "neon_vmullu_n<mode>"
cd65e265
DZ
4469 [(match_operand:<V_widen> 0 "s_register_operand")
4470 (match_operand:VMDI 1 "s_register_operand")
4471 (match_operand:<V_elem> 2 "s_register_operand")]
94f0f2cc
JG
4472 "TARGET_NEON"
4473{
4474 rtx tmp = gen_reg_rtx (<MODE>mode);
4475 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4476 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4477 const0_rtx));
88f77cba
JB
4478 DONE;
4479})
4480
4481(define_expand "neon_vqdmull_n<mode>"
cd65e265
DZ
4482 [(match_operand:<V_widen> 0 "s_register_operand")
4483 (match_operand:VMDI 1 "s_register_operand")
4484 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4485 "TARGET_NEON"
4486{
4487 rtx tmp = gen_reg_rtx (<MODE>mode);
4488 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4489 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc 4490 const0_rtx));
88f77cba
JB
4491 DONE;
4492})
4493
4494(define_expand "neon_vqdmulh_n<mode>"
cd65e265
DZ
4495 [(match_operand:VMDI 0 "s_register_operand")
4496 (match_operand:VMDI 1 "s_register_operand")
4497 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4498 "TARGET_NEON"
4499{
4500 rtx tmp = gen_reg_rtx (<MODE>mode);
4501 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4502 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc
JG
4503 const0_rtx));
4504 DONE;
4505})
4506
4507(define_expand "neon_vqrdmulh_n<mode>"
cd65e265
DZ
4508 [(match_operand:VMDI 0 "s_register_operand")
4509 (match_operand:VMDI 1 "s_register_operand")
4510 (match_operand:<V_elem> 2 "s_register_operand")]
94f0f2cc
JG
4511 "TARGET_NEON"
4512{
4513 rtx tmp = gen_reg_rtx (<MODE>mode);
4514 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4515 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4516 const0_rtx));
88f77cba
JB
4517 DONE;
4518})
4519
4520(define_expand "neon_vqdmulh_n<mode>"
cd65e265
DZ
4521 [(match_operand:VMQI 0 "s_register_operand")
4522 (match_operand:VMQI 1 "s_register_operand")
4523 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4524 "TARGET_NEON"
4525{
4526 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4527 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4528 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc
JG
4529 const0_rtx));
4530 DONE;
4531})
4532
4533(define_expand "neon_vqrdmulh_n<mode>"
cd65e265
DZ
4534 [(match_operand:VMQI 0 "s_register_operand")
4535 (match_operand:VMQI 1 "s_register_operand")
4536 (match_operand:<V_elem> 2 "s_register_operand")]
94f0f2cc
JG
4537 "TARGET_NEON"
4538{
4539 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4540 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4541 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4542 const0_rtx));
88f77cba
JB
4543 DONE;
4544})
4545
4546(define_expand "neon_vmla_n<mode>"
cd65e265
DZ
4547 [(match_operand:VMD 0 "s_register_operand")
4548 (match_operand:VMD 1 "s_register_operand")
4549 (match_operand:VMD 2 "s_register_operand")
4550 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4551 "TARGET_NEON"
4552{
4553 rtx tmp = gen_reg_rtx (<MODE>mode);
4554 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4555 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4556 tmp, const0_rtx));
88f77cba
JB
4557 DONE;
4558})
4559
4560(define_expand "neon_vmla_n<mode>"
cd65e265
DZ
4561 [(match_operand:VMQ 0 "s_register_operand")
4562 (match_operand:VMQ 1 "s_register_operand")
4563 (match_operand:VMQ 2 "s_register_operand")
4564 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4565 "TARGET_NEON"
4566{
4567 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4568 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4569 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4570 tmp, const0_rtx));
88f77cba
JB
4571 DONE;
4572})
4573
94f0f2cc 4574(define_expand "neon_vmlals_n<mode>"
cd65e265
DZ
4575 [(match_operand:<V_widen> 0 "s_register_operand")
4576 (match_operand:<V_widen> 1 "s_register_operand")
4577 (match_operand:VMDI 2 "s_register_operand")
4578 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4579 "TARGET_NEON"
4580{
4581 rtx tmp = gen_reg_rtx (<MODE>mode);
4582 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
94f0f2cc
JG
4583 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4584 tmp, const0_rtx));
4585 DONE;
4586})
4587
4588(define_expand "neon_vmlalu_n<mode>"
cd65e265
DZ
4589 [(match_operand:<V_widen> 0 "s_register_operand")
4590 (match_operand:<V_widen> 1 "s_register_operand")
4591 (match_operand:VMDI 2 "s_register_operand")
4592 (match_operand:<V_elem> 3 "s_register_operand")]
94f0f2cc
JG
4593 "TARGET_NEON"
4594{
4595 rtx tmp = gen_reg_rtx (<MODE>mode);
4596 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4597 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4598 tmp, const0_rtx));
88f77cba
JB
4599 DONE;
4600})
4601
4602(define_expand "neon_vqdmlal_n<mode>"
cd65e265
DZ
4603 [(match_operand:<V_widen> 0 "s_register_operand")
4604 (match_operand:<V_widen> 1 "s_register_operand")
4605 (match_operand:VMDI 2 "s_register_operand")
4606 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4607 "TARGET_NEON"
4608{
4609 rtx tmp = gen_reg_rtx (<MODE>mode);
4610 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4611 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4612 tmp, const0_rtx));
88f77cba
JB
4613 DONE;
4614})
4615
4616(define_expand "neon_vmls_n<mode>"
cd65e265
DZ
4617 [(match_operand:VMD 0 "s_register_operand")
4618 (match_operand:VMD 1 "s_register_operand")
4619 (match_operand:VMD 2 "s_register_operand")
4620 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4621 "TARGET_NEON"
4622{
4623 rtx tmp = gen_reg_rtx (<MODE>mode);
4624 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4625 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4626 tmp, const0_rtx));
88f77cba
JB
4627 DONE;
4628})
4629
4630(define_expand "neon_vmls_n<mode>"
cd65e265
DZ
4631 [(match_operand:VMQ 0 "s_register_operand")
4632 (match_operand:VMQ 1 "s_register_operand")
4633 (match_operand:VMQ 2 "s_register_operand")
4634 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4635 "TARGET_NEON"
4636{
4637 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4638 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4639 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc
JG
4640 tmp, const0_rtx));
4641 DONE;
4642})
4643
4644(define_expand "neon_vmlsls_n<mode>"
cd65e265
DZ
4645 [(match_operand:<V_widen> 0 "s_register_operand")
4646 (match_operand:<V_widen> 1 "s_register_operand")
4647 (match_operand:VMDI 2 "s_register_operand")
4648 (match_operand:<V_elem> 3 "s_register_operand")]
94f0f2cc
JG
4649 "TARGET_NEON"
4650{
4651 rtx tmp = gen_reg_rtx (<MODE>mode);
4652 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4653 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4654 tmp, const0_rtx));
88f77cba
JB
4655 DONE;
4656})
4657
94f0f2cc 4658(define_expand "neon_vmlslu_n<mode>"
cd65e265
DZ
4659 [(match_operand:<V_widen> 0 "s_register_operand")
4660 (match_operand:<V_widen> 1 "s_register_operand")
4661 (match_operand:VMDI 2 "s_register_operand")
4662 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4663 "TARGET_NEON"
4664{
4665 rtx tmp = gen_reg_rtx (<MODE>mode);
4666 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
94f0f2cc
JG
4667 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4668 tmp, const0_rtx));
88f77cba
JB
4669 DONE;
4670})
4671
4672(define_expand "neon_vqdmlsl_n<mode>"
cd65e265
DZ
4673 [(match_operand:<V_widen> 0 "s_register_operand")
4674 (match_operand:<V_widen> 1 "s_register_operand")
4675 (match_operand:VMDI 2 "s_register_operand")
4676 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4677 "TARGET_NEON"
4678{
4679 rtx tmp = gen_reg_rtx (<MODE>mode);
4680 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4681 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4682 tmp, const0_rtx));
88f77cba
JB
4683 DONE;
4684})
4685
ff03f2d1 4686(define_insn "@neon_vext<mode>"
88f77cba
JB
4687 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4688 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4689 (match_operand:VDQX 2 "s_register_operand" "w")
4690 (match_operand:SI 3 "immediate_operand" "i")]
4691 UNSPEC_VEXT))]
4692 "TARGET_NEON"
b617fc71 4693{
d57daa0c 4694 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
b617fc71
JB
4695 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4696}
f7379e5e 4697 [(set_attr "type" "neon_ext<q>")]
c956e102 4698)
88f77cba 4699
ff03f2d1 4700(define_insn "@neon_vrev64<mode>"
88f77cba 4701 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
94f0f2cc 4702 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
88f77cba
JB
4703 UNSPEC_VREV64))]
4704 "TARGET_NEON"
c956e102 4705 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 4706 [(set_attr "type" "neon_rev<q>")]
c956e102 4707)
88f77cba 4708
ff03f2d1 4709(define_insn "@neon_vrev32<mode>"
88f77cba 4710 [(set (match_operand:VX 0 "s_register_operand" "=w")
94f0f2cc 4711 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
88f77cba
JB
4712 UNSPEC_VREV32))]
4713 "TARGET_NEON"
c956e102 4714 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 4715 [(set_attr "type" "neon_rev<q>")]
c956e102 4716)
88f77cba 4717
ff03f2d1 4718(define_insn "@neon_vrev16<mode>"
88f77cba 4719 [(set (match_operand:VE 0 "s_register_operand" "=w")
94f0f2cc 4720 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
88f77cba
JB
4721 UNSPEC_VREV16))]
4722 "TARGET_NEON"
c956e102 4723 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 4724 [(set_attr "type" "neon_rev<q>")]
c956e102 4725)
88f77cba
JB
4726
4727; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4728; allocation. For an intrinsic of form:
4729; rD = vbsl_* (rS, rN, rM)
4730; We can use any of:
4731; vbsl rS, rN, rM (if D = S)
4732; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4733; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4734
4735(define_insn "neon_vbsl<mode>_internal"
4736 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4737 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4738 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4739 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4740 UNSPEC_VBSL))]
4741 "TARGET_NEON"
4742 "@
4743 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4744 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
c956e102 4745 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
f7379e5e 4746 [(set_attr "type" "neon_bsl<q>")]
c956e102 4747)
88f77cba
JB
4748
4749(define_expand "neon_vbsl<mode>"
cd65e265
DZ
4750 [(set (match_operand:VDQX 0 "s_register_operand")
4751 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4752 (match_operand:VDQX 2 "s_register_operand")
4753 (match_operand:VDQX 3 "s_register_operand")]
88f77cba
JB
4754 UNSPEC_VBSL))]
4755 "TARGET_NEON"
4756{
4757 /* We can't alias operands together if they have different modes. */
4758 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4759})
4760
94f0f2cc
JG
4761;; vshl, vrshl
4762(define_insn "neon_v<shift_op><sup><mode>"
88f77cba
JB
4763 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4764 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
4765 (match_operand:VDQIX 2 "s_register_operand" "w")]
4766 VSHL))]
88f77cba 4767 "TARGET_NEON"
94f0f2cc 4768 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 4769 [(set_attr "type" "neon_shift_imm<q>")]
c956e102 4770)
88f77cba 4771
94f0f2cc
JG
4772;; vqshl, vqrshl
4773(define_insn "neon_v<shift_op><sup><mode>"
88f77cba
JB
4774 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4775 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
4776 (match_operand:VDQIX 2 "s_register_operand" "w")]
4777 VQSHL))]
88f77cba 4778 "TARGET_NEON"
94f0f2cc 4779 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 4780 [(set_attr "type" "neon_sat_shift_imm<q>")]
c956e102 4781)
88f77cba 4782
94f0f2cc
JG
4783;; vshr_n, vrshr_n
4784(define_insn "neon_v<shift_op><sup>_n<mode>"
88f77cba
JB
4785 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4786 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
4787 (match_operand:SI 2 "immediate_operand" "i")]
4788 VSHR_N))]
88f77cba 4789 "TARGET_NEON"
b617fc71 4790{
d57daa0c 4791 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
94f0f2cc 4792 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 4793}
f7379e5e 4794 [(set_attr "type" "neon_shift_imm<q>")]
c956e102 4795)
88f77cba 4796
94f0f2cc
JG
4797;; vshrn_n, vrshrn_n
4798(define_insn "neon_v<shift_op>_n<mode>"
88f77cba
JB
4799 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4800 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
4801 (match_operand:SI 2 "immediate_operand" "i")]
4802 VSHRN_N))]
88f77cba 4803 "TARGET_NEON"
b617fc71 4804{
d57daa0c 4805 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
94f0f2cc 4806 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
b617fc71 4807}
f7379e5e 4808 [(set_attr "type" "neon_shift_imm_narrow_q")]
c956e102 4809)
88f77cba 4810
94f0f2cc
JG
4811;; vqshrn_n, vqrshrn_n
4812(define_insn "neon_v<shift_op><sup>_n<mode>"
88f77cba
JB
4813 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4814 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
4815 (match_operand:SI 2 "immediate_operand" "i")]
4816 VQSHRN_N))]
88f77cba 4817 "TARGET_NEON"
b617fc71 4818{
d57daa0c 4819 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
94f0f2cc 4820 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
b617fc71 4821}
f7379e5e 4822 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
c956e102 4823)
88f77cba 4824
94f0f2cc
JG
4825;; vqshrun_n, vqrshrun_n
4826(define_insn "neon_v<shift_op>_n<mode>"
88f77cba
JB
4827 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4828 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
4829 (match_operand:SI 2 "immediate_operand" "i")]
4830 VQSHRUN_N))]
88f77cba 4831 "TARGET_NEON"
b617fc71 4832{
d57daa0c 4833 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
94f0f2cc 4834 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
b617fc71 4835}
f7379e5e 4836 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
c956e102 4837)
88f77cba
JB
4838
4839(define_insn "neon_vshl_n<mode>"
4840 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4841 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc 4842 (match_operand:SI 2 "immediate_operand" "i")]
88f77cba
JB
4843 UNSPEC_VSHL_N))]
4844 "TARGET_NEON"
b617fc71 4845{
d57daa0c 4846 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
b617fc71
JB
4847 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4848}
f7379e5e 4849 [(set_attr "type" "neon_shift_imm<q>")]
c956e102 4850)
88f77cba 4851
94f0f2cc 4852(define_insn "neon_vqshl_<sup>_n<mode>"
88f77cba
JB
4853 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4854 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
4855 (match_operand:SI 2 "immediate_operand" "i")]
4856 VQSHL_N))]
88f77cba 4857 "TARGET_NEON"
b617fc71 4858{
d57daa0c 4859 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
94f0f2cc 4860 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 4861}
f7379e5e 4862 [(set_attr "type" "neon_sat_shift_imm<q>")]
c956e102 4863)
88f77cba
JB
4864
4865(define_insn "neon_vqshlu_n<mode>"
4866 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4867 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc 4868 (match_operand:SI 2 "immediate_operand" "i")]
88f77cba
JB
4869 UNSPEC_VQSHLU_N))]
4870 "TARGET_NEON"
b617fc71 4871{
d57daa0c 4872 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
94f0f2cc 4873 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 4874}
f7379e5e 4875 [(set_attr "type" "neon_sat_shift_imm<q>")]
c956e102 4876)
88f77cba 4877
94f0f2cc 4878(define_insn "neon_vshll<sup>_n<mode>"
88f77cba
JB
4879 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4880 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
94f0f2cc
JG
4881 (match_operand:SI 2 "immediate_operand" "i")]
4882 VSHLL_N))]
88f77cba 4883 "TARGET_NEON"
b617fc71 4884{
8cb32ff2 4885 /* The boundaries are: 0 < imm <= size. */
d57daa0c 4886 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
94f0f2cc 4887 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
b617fc71 4888}
f7379e5e 4889 [(set_attr "type" "neon_shift_imm_long")]
c956e102 4890)
88f77cba 4891
94f0f2cc
JG
4892;; vsra_n, vrsra_n
4893(define_insn "neon_v<shift_op><sup>_n<mode>"
88f77cba
JB
4894 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4895 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4896 (match_operand:VDQIX 2 "s_register_operand" "w")
94f0f2cc
JG
4897 (match_operand:SI 3 "immediate_operand" "i")]
4898 VSRA_N))]
88f77cba 4899 "TARGET_NEON"
b617fc71 4900{
d57daa0c 4901 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
94f0f2cc 4902 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
b617fc71 4903}
f7379e5e 4904 [(set_attr "type" "neon_shift_acc<q>")]
c956e102 4905)
88f77cba
JB
4906
4907(define_insn "neon_vsri_n<mode>"
4908 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4909 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4910 (match_operand:VDQIX 2 "s_register_operand" "w")
4911 (match_operand:SI 3 "immediate_operand" "i")]
4912 UNSPEC_VSRI))]
4913 "TARGET_NEON"
b617fc71 4914{
d57daa0c 4915 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
b617fc71
JB
4916 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4917}
f7379e5e 4918 [(set_attr "type" "neon_shift_reg<q>")]
c956e102 4919)
88f77cba
JB
4920
4921(define_insn "neon_vsli_n<mode>"
4922 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4923 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4924 (match_operand:VDQIX 2 "s_register_operand" "w")
4925 (match_operand:SI 3 "immediate_operand" "i")]
4926 UNSPEC_VSLI))]
4927 "TARGET_NEON"
b617fc71 4928{
d57daa0c 4929 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
b617fc71
JB
4930 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4931}
f7379e5e 4932 [(set_attr "type" "neon_shift_reg<q>")]
c956e102 4933)
88f77cba
JB
4934
4935(define_insn "neon_vtbl1v8qi"
4936 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4937 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4938 (match_operand:V8QI 2 "s_register_operand" "w")]
4939 UNSPEC_VTBL))]
4940 "TARGET_NEON"
c956e102 4941 "vtbl.8\t%P0, {%P1}, %P2"
f7379e5e 4942 [(set_attr "type" "neon_tbl1")]
c956e102 4943)
88f77cba
JB
4944
4945(define_insn "neon_vtbl2v8qi"
4946 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4947 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4948 (match_operand:V8QI 2 "s_register_operand" "w")]
4949 UNSPEC_VTBL))]
4950 "TARGET_NEON"
4951{
4952 rtx ops[4];
4953 int tabbase = REGNO (operands[1]);
4954
4955 ops[0] = operands[0];
4956 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4957 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4958 ops[3] = operands[2];
4959 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4960
4961 return "";
c956e102 4962}
f7379e5e 4963 [(set_attr "type" "neon_tbl2")]
c956e102 4964)
88f77cba
JB
4965
4966(define_insn "neon_vtbl3v8qi"
4967 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4968 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4969 (match_operand:V8QI 2 "s_register_operand" "w")]
4970 UNSPEC_VTBL))]
4971 "TARGET_NEON"
4972{
4973 rtx ops[5];
4974 int tabbase = REGNO (operands[1]);
4975
4976 ops[0] = operands[0];
4977 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4978 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4979 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4980 ops[4] = operands[2];
4981 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4982
4983 return "";
c956e102 4984}
f7379e5e 4985 [(set_attr "type" "neon_tbl3")]
c956e102 4986)
88f77cba
JB
4987
4988(define_insn "neon_vtbl4v8qi"
4989 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4990 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4991 (match_operand:V8QI 2 "s_register_operand" "w")]
4992 UNSPEC_VTBL))]
4993 "TARGET_NEON"
4994{
4995 rtx ops[6];
4996 int tabbase = REGNO (operands[1]);
4997
4998 ops[0] = operands[0];
4999 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5000 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5001 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5002 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5003 ops[5] = operands[2];
5004 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5005
5006 return "";
c956e102 5007}
f7379e5e 5008 [(set_attr "type" "neon_tbl4")]
c956e102 5009)
88f77cba 5010
b440f324
RH
5011;; These three are used by the vec_perm infrastructure for V16QImode.
5012(define_insn_and_split "neon_vtbl1v16qi"
5013 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5014 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5015 (match_operand:V16QI 2 "s_register_operand" "w")]
5016 UNSPEC_VTBL))]
5017 "TARGET_NEON"
5018 "#"
5019 "&& reload_completed"
5020 [(const_int 0)]
5021{
5022 rtx op0, op1, op2, part0, part2;
5023 unsigned ofs;
5024
5025 op0 = operands[0];
5026 op1 = gen_lowpart (TImode, operands[1]);
5027 op2 = operands[2];
5028
5029 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5030 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5031 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5032 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5033
5034 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5035 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5036 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5037 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5038 DONE;
f7379e5e
JG
5039}
5040 [(set_attr "type" "multiple")]
5041)
b440f324
RH
5042
5043(define_insn_and_split "neon_vtbl2v16qi"
5044 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5045 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5046 (match_operand:V16QI 2 "s_register_operand" "w")]
5047 UNSPEC_VTBL))]
5048 "TARGET_NEON"
5049 "#"
5050 "&& reload_completed"
5051 [(const_int 0)]
5052{
5053 rtx op0, op1, op2, part0, part2;
5054 unsigned ofs;
5055
5056 op0 = operands[0];
5057 op1 = operands[1];
5058 op2 = operands[2];
5059
5060 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5061 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5062 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5063 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5064
5065 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5066 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5067 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5068 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5069 DONE;
f7379e5e
JG
5070}
5071 [(set_attr "type" "multiple")]
5072)
b440f324
RH
5073
5074;; ??? Logically we should extend the regular neon_vcombine pattern to
5075;; handle quad-word input modes, producing octa-word output modes. But
5076;; that requires us to add support for octa-word vector modes in moves.
5077;; That seems overkill for this one use in vec_perm.
5078(define_insn_and_split "neon_vcombinev16qi"
5079 [(set (match_operand:OI 0 "s_register_operand" "=w")
5080 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5081 (match_operand:V16QI 2 "s_register_operand" "w")]
5082 UNSPEC_VCONCAT))]
5083 "TARGET_NEON"
5084 "#"
5085 "&& reload_completed"
5086 [(const_int 0)]
5087{
5088 neon_split_vcombine (operands);
5089 DONE;
f7379e5e
JG
5090}
5091[(set_attr "type" "multiple")]
5092)
b440f324 5093
88f77cba
JB
5094(define_insn "neon_vtbx1v8qi"
5095 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5096 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5097 (match_operand:V8QI 2 "s_register_operand" "w")
5098 (match_operand:V8QI 3 "s_register_operand" "w")]
5099 UNSPEC_VTBX))]
5100 "TARGET_NEON"
c956e102 5101 "vtbx.8\t%P0, {%P2}, %P3"
f7379e5e 5102 [(set_attr "type" "neon_tbl1")]
c956e102 5103)
88f77cba
JB
5104
5105(define_insn "neon_vtbx2v8qi"
5106 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5107 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5108 (match_operand:TI 2 "s_register_operand" "w")
5109 (match_operand:V8QI 3 "s_register_operand" "w")]
5110 UNSPEC_VTBX))]
5111 "TARGET_NEON"
5112{
5113 rtx ops[4];
5114 int tabbase = REGNO (operands[2]);
5115
5116 ops[0] = operands[0];
5117 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5118 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5119 ops[3] = operands[3];
5120 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5121
5122 return "";
c956e102 5123}
f7379e5e 5124 [(set_attr "type" "neon_tbl2")]
c956e102 5125)
88f77cba
JB
5126
5127(define_insn "neon_vtbx3v8qi"
5128 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5129 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5130 (match_operand:EI 2 "s_register_operand" "w")
5131 (match_operand:V8QI 3 "s_register_operand" "w")]
5132 UNSPEC_VTBX))]
5133 "TARGET_NEON"
5134{
5135 rtx ops[5];
5136 int tabbase = REGNO (operands[2]);
5137
5138 ops[0] = operands[0];
5139 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5140 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5141 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5142 ops[4] = operands[3];
5143 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5144
5145 return "";
c956e102 5146}
f7379e5e 5147 [(set_attr "type" "neon_tbl3")]
c956e102 5148)
88f77cba
JB
5149
5150(define_insn "neon_vtbx4v8qi"
5151 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5152 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5153 (match_operand:OI 2 "s_register_operand" "w")
5154 (match_operand:V8QI 3 "s_register_operand" "w")]
5155 UNSPEC_VTBX))]
5156 "TARGET_NEON"
5157{
5158 rtx ops[6];
5159 int tabbase = REGNO (operands[2]);
5160
5161 ops[0] = operands[0];
5162 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5163 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5164 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5165 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5166 ops[5] = operands[3];
5167 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5168
5169 return "";
c956e102 5170}
f7379e5e 5171 [(set_attr "type" "neon_tbl4")]
c956e102 5172)
88f77cba 5173
ff03f2d1 5174(define_expand "@neon_vtrn<mode>_internal"
28131dfe 5175 [(parallel
b1a970a5
MW
5176 [(set (match_operand:VDQWH 0 "s_register_operand")
5177 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5178 (match_operand:VDQWH 2 "s_register_operand")]
28131dfe 5179 UNSPEC_VTRN1))
b1a970a5
MW
5180 (set (match_operand:VDQWH 3 "s_register_operand")
5181 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
28131dfe
RE
5182 "TARGET_NEON"
5183 ""
5184)
5185
5186;; Note: Different operand numbering to handle tied registers correctly.
5187(define_insn "*neon_vtrn<mode>_insn"
b1a970a5
MW
5188 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5189 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5190 (match_operand:VDQWH 3 "s_register_operand" "2")]
5191 UNSPEC_VTRN1))
5192 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5193 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5194 UNSPEC_VTRN2))]
7e7cfcf6 5195 "TARGET_NEON"
28131dfe 5196 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
f7379e5e 5197 [(set_attr "type" "neon_permute<q>")]
c956e102 5198)
88f77cba 5199
ff03f2d1 5200(define_expand "@neon_vzip<mode>_internal"
28131dfe 5201 [(parallel
b1a970a5
MW
5202 [(set (match_operand:VDQWH 0 "s_register_operand")
5203 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5204 (match_operand:VDQWH 2 "s_register_operand")]
5205 UNSPEC_VZIP1))
5206 (set (match_operand:VDQWH 3 "s_register_operand")
5207 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
28131dfe
RE
5208 "TARGET_NEON"
5209 ""
5210)
5211
5212;; Note: Different operand numbering to handle tied registers correctly.
5213(define_insn "*neon_vzip<mode>_insn"
b1a970a5
MW
5214 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5215 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5216 (match_operand:VDQWH 3 "s_register_operand" "2")]
5217 UNSPEC_VZIP1))
5218 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5219 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5220 UNSPEC_VZIP2))]
7e7cfcf6 5221 "TARGET_NEON"
28131dfe 5222 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
f7379e5e 5223 [(set_attr "type" "neon_zip<q>")]
c956e102 5224)
88f77cba 5225
ff03f2d1 5226(define_expand "@neon_vuzp<mode>_internal"
28131dfe 5227 [(parallel
b1a970a5
MW
5228 [(set (match_operand:VDQWH 0 "s_register_operand")
5229 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5230 (match_operand:VDQWH 2 "s_register_operand")]
28131dfe 5231 UNSPEC_VUZP1))
cd65e265 5232 (set (match_operand:VDQWH 3 "s_register_operand")
b1a970a5 5233 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
28131dfe
RE
5234 "TARGET_NEON"
5235 ""
5236)
5237
5238;; Note: Different operand numbering to handle tied registers correctly.
5239(define_insn "*neon_vuzp<mode>_insn"
b1a970a5
MW
5240 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5241 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5242 (match_operand:VDQWH 3 "s_register_operand" "2")]
5243 UNSPEC_VUZP1))
5244 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5245 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5246 UNSPEC_VUZP2))]
88f77cba 5247 "TARGET_NEON"
28131dfe 5248 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
f7379e5e 5249 [(set_attr "type" "neon_zip<q>")]
c956e102 5250)
88f77cba 5251
3188ed59
RS
5252(define_expand "vec_load_lanes<mode><mode>"
5253 [(set (match_operand:VDQX 0 "s_register_operand")
5254 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5255 UNSPEC_VLD1))]
5256 "TARGET_NEON")
5257
88f77cba
JB
5258(define_insn "neon_vld1<mode>"
5259 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
6308e208 5260 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
88f77cba
JB
5261 UNSPEC_VLD1))]
5262 "TARGET_NEON"
6308e208 5263 "vld1.<V_sz_elem>\t%h0, %A1"
f7379e5e 5264 [(set_attr "type" "neon_load1_1reg<q>")]
c956e102 5265)
88f77cba 5266
22f9db64
CB
5267;; The lane numbers in the RTL are in GCC lane order, having been flipped
5268;; in arm_expand_neon_args. The lane numbers are restored to architectural
5269;; lane order here.
88f77cba
JB
5270(define_insn "neon_vld1_lane<mode>"
5271 [(set (match_operand:VDX 0 "s_register_operand" "=w")
6308e208 5272 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5273 (match_operand:VDX 2 "s_register_operand" "0")
5274 (match_operand:SI 3 "immediate_operand" "i")]
5275 UNSPEC_VLD1_LANE))]
5276 "TARGET_NEON"
5277{
22f9db64 5278 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba 5279 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
22f9db64 5280 operands[3] = GEN_INT (lane);
88f77cba 5281 if (max == 1)
6308e208 5282 return "vld1.<V_sz_elem>\t%P0, %A1";
88f77cba 5283 else
6308e208 5284 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
c956e102 5285}
f7379e5e 5286 [(set_attr "type" "neon_load1_one_lane<q>")]
c956e102 5287)
88f77cba 5288
22f9db64
CB
5289;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5290;; here on big endian targets.
88f77cba
JB
5291(define_insn "neon_vld1_lane<mode>"
5292 [(set (match_operand:VQX 0 "s_register_operand" "=w")
6308e208 5293 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5294 (match_operand:VQX 2 "s_register_operand" "0")
5295 (match_operand:SI 3 "immediate_operand" "i")]
5296 UNSPEC_VLD1_LANE))]
5297 "TARGET_NEON"
5298{
22f9db64 5299 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba 5300 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
22f9db64 5301 operands[3] = GEN_INT (lane);
88f77cba 5302 int regno = REGNO (operands[0]);
e68ffe57 5303 if (lane >= max / 2)
88f77cba
JB
5304 {
5305 lane -= max / 2;
5306 regno += 2;
5307 operands[3] = GEN_INT (lane);
5308 }
5309 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5310 if (max == 2)
6308e208 5311 return "vld1.<V_sz_elem>\t%P0, %A1";
88f77cba 5312 else
6308e208 5313 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
c956e102 5314}
f7379e5e 5315 [(set_attr "type" "neon_load1_one_lane<q>")]
c956e102 5316)
88f77cba
JB
5317
5318(define_insn "neon_vld1_dup<mode>"
92422235
CL
5319 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5320 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
88f77cba 5321 "TARGET_NEON"
27d2e612 5322 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
f7379e5e 5323 [(set_attr "type" "neon_load1_all_lanes<q>")]
27d2e612
RE
5324)
5325
5326;; Special case for DImode. Treat it exactly like a simple load.
5327(define_expand "neon_vld1_dupdi"
cd65e265
DZ
5328 [(set (match_operand:DI 0 "s_register_operand")
5329 (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
27d2e612
RE
5330 UNSPEC_VLD1))]
5331 "TARGET_NEON"
5332 ""
c956e102 5333)
88f77cba
JB
5334
5335(define_insn "neon_vld1_dup<mode>"
92422235
CL
5336 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5337 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
88f77cba
JB
5338 "TARGET_NEON"
5339{
8490252a 5340 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
c956e102 5341}
f7379e5e 5342 [(set_attr "type" "neon_load1_all_lanes<q>")]
8490252a
CL
5343)
5344
5345(define_insn_and_split "neon_vld1_dupv2di"
5346 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5347 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5348 "TARGET_NEON"
5349 "#"
5350 "&& reload_completed"
5351 [(const_int 0)]
5352 {
5353 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5354 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5355 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5356 DONE;
5357 }
5358 [(set_attr "length" "8")
f7379e5e 5359 (set_attr "type" "neon_load1_all_lanes_q")]
c956e102 5360)
88f77cba 5361
3188ed59
RS
5362(define_expand "vec_store_lanes<mode><mode>"
5363 [(set (match_operand:VDQX 0 "neon_struct_operand")
5364 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5365 UNSPEC_VST1))]
5366 "TARGET_NEON")
5367
88f77cba 5368(define_insn "neon_vst1<mode>"
6308e208 5369 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
88f77cba
JB
5370 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5371 UNSPEC_VST1))]
5372 "TARGET_NEON"
6308e208 5373 "vst1.<V_sz_elem>\t%h1, %A0"
f7379e5e 5374 [(set_attr "type" "neon_store1_1reg<q>")])
88f77cba 5375
22f9db64
CB
5376;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5377;; here on big endian targets.
88f77cba 5378(define_insn "neon_vst1_lane<mode>"
6308e208 5379 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
058e2674
UW
5380 (unspec:<V_elem>
5381 [(match_operand:VDX 1 "s_register_operand" "w")
5382 (match_operand:SI 2 "immediate_operand" "i")]
5383 UNSPEC_VST1_LANE))]
88f77cba
JB
5384 "TARGET_NEON"
5385{
22f9db64 5386 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba 5387 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
22f9db64 5388 operands[2] = GEN_INT (lane);
88f77cba 5389 if (max == 1)
6308e208 5390 return "vst1.<V_sz_elem>\t{%P1}, %A0";
88f77cba 5391 else
6308e208 5392 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
c956e102 5393}
f7379e5e
JG
5394 [(set_attr "type" "neon_store1_one_lane<q>")]
5395)
88f77cba 5396
22f9db64
CB
5397;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5398;; here on big endian targets.
88f77cba 5399(define_insn "neon_vst1_lane<mode>"
6308e208 5400 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
058e2674
UW
5401 (unspec:<V_elem>
5402 [(match_operand:VQX 1 "s_register_operand" "w")
5403 (match_operand:SI 2 "immediate_operand" "i")]
5404 UNSPEC_VST1_LANE))]
88f77cba
JB
5405 "TARGET_NEON"
5406{
22f9db64 5407 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5408 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5409 int regno = REGNO (operands[1]);
e68ffe57 5410 if (lane >= max / 2)
88f77cba
JB
5411 {
5412 lane -= max / 2;
5413 regno += 2;
88f77cba 5414 }
22f9db64 5415 operands[2] = GEN_INT (lane);
88f77cba
JB
5416 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5417 if (max == 2)
6308e208 5418 return "vst1.<V_sz_elem>\t{%P1}, %A0";
88f77cba 5419 else
6308e208 5420 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
c956e102 5421}
f7379e5e 5422 [(set_attr "type" "neon_store1_one_lane<q>")]
c956e102 5423)
88f77cba 5424
3188ed59
RS
5425(define_expand "vec_load_lanesti<mode>"
5426 [(set (match_operand:TI 0 "s_register_operand")
5427 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5428 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5429 UNSPEC_VLD2))]
5430 "TARGET_NEON")
5431
88f77cba
JB
5432(define_insn "neon_vld2<mode>"
5433 [(set (match_operand:TI 0 "s_register_operand" "=w")
6308e208 5434 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
eb637e76 5435 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5436 UNSPEC_VLD2))]
5437 "TARGET_NEON"
5438{
5439 if (<V_sz_elem> == 64)
6308e208 5440 return "vld1.64\t%h0, %A1";
88f77cba 5441 else
6308e208 5442 return "vld2.<V_sz_elem>\t%h0, %A1";
c956e102 5443}
003bb7f3 5444 [(set (attr "type")
c956e102 5445 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5446 (const_string "neon_load1_2reg<q>")
5447 (const_string "neon_load2_2reg<q>")))]
c956e102 5448)
88f77cba 5449
3188ed59
RS
5450(define_expand "vec_load_lanesoi<mode>"
5451 [(set (match_operand:OI 0 "s_register_operand")
5452 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4b644867 5453 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5454 UNSPEC_VLD2))]
5455 "TARGET_NEON")
5456
88f77cba
JB
5457(define_insn "neon_vld2<mode>"
5458 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 5459 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
eb637e76 5460 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5461 UNSPEC_VLD2))]
5462 "TARGET_NEON"
6308e208 5463 "vld2.<V_sz_elem>\t%h0, %A1"
f7379e5e 5464 [(set_attr "type" "neon_load2_2reg_q")])
88f77cba 5465
22f9db64
CB
5466;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5467;; here on big endian targets.
88f77cba
JB
5468(define_insn "neon_vld2_lane<mode>"
5469 [(set (match_operand:TI 0 "s_register_operand" "=w")
6308e208 5470 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5471 (match_operand:TI 2 "s_register_operand" "0")
5472 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5473 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5474 UNSPEC_VLD2_LANE))]
5475 "TARGET_NEON"
5476{
22f9db64 5477 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5478 int regno = REGNO (operands[0]);
5479 rtx ops[4];
88f77cba
JB
5480 ops[0] = gen_rtx_REG (DImode, regno);
5481 ops[1] = gen_rtx_REG (DImode, regno + 2);
5482 ops[2] = operands[1];
22f9db64 5483 ops[3] = GEN_INT (lane);
6308e208 5484 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
88f77cba 5485 return "";
c956e102 5486}
f7379e5e 5487 [(set_attr "type" "neon_load2_one_lane<q>")]
c956e102 5488)
88f77cba 5489
22f9db64
CB
5490;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5491;; here on big endian targets.
88f77cba
JB
5492(define_insn "neon_vld2_lane<mode>"
5493 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 5494 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5495 (match_operand:OI 2 "s_register_operand" "0")
5496 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5497 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5498 UNSPEC_VLD2_LANE))]
5499 "TARGET_NEON"
5500{
22f9db64 5501 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5502 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5503 int regno = REGNO (operands[0]);
5504 rtx ops[4];
e68ffe57 5505 if (lane >= max / 2)
88f77cba
JB
5506 {
5507 lane -= max / 2;
5508 regno += 2;
5509 }
5510 ops[0] = gen_rtx_REG (DImode, regno);
5511 ops[1] = gen_rtx_REG (DImode, regno + 4);
5512 ops[2] = operands[1];
5513 ops[3] = GEN_INT (lane);
6308e208 5514 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
88f77cba 5515 return "";
c956e102 5516}
f7379e5e 5517 [(set_attr "type" "neon_load2_one_lane<q>")]
c956e102 5518)
88f77cba
JB
5519
5520(define_insn "neon_vld2_dup<mode>"
5521 [(set (match_operand:TI 0 "s_register_operand" "=w")
6308e208 5522 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
eb637e76 5523 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5524 UNSPEC_VLD2_DUP))]
5525 "TARGET_NEON"
5526{
5527 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6308e208 5528 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
88f77cba 5529 else
6308e208 5530 return "vld1.<V_sz_elem>\t%h0, %A1";
c956e102 5531}
003bb7f3 5532 [(set (attr "type")
c956e102 5533 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
f7379e5e
JG
5534 (const_string "neon_load2_all_lanes<q>")
5535 (const_string "neon_load1_1reg<q>")))]
c956e102 5536)
88f77cba 5537
eb637e76
DB
5538(define_insn "neon_vld2_dupv8bf"
5539 [(set (match_operand:OI 0 "s_register_operand" "=w")
5540 (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5541 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5542 UNSPEC_VLD2_DUP))]
5543 "TARGET_BF16_SIMD"
5544 {
5545 rtx ops[5];
5546 int tabbase = REGNO (operands[0]);
5547
5548 ops[4] = operands[1];
5549 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5550 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5551 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5552 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5553 output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
5554 return "";
5555 }
5556 [(set_attr "type" "neon_load2_all_lanes_q")]
5557)
5558
3188ed59
RS
5559(define_expand "vec_store_lanesti<mode>"
5560 [(set (match_operand:TI 0 "neon_struct_operand")
5561 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5562 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5563 UNSPEC_VST2))]
5564 "TARGET_NEON")
5565
88f77cba 5566(define_insn "neon_vst2<mode>"
6308e208 5567 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
88f77cba 5568 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
ff229375 5569 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5570 UNSPEC_VST2))]
5571 "TARGET_NEON"
5572{
5573 if (<V_sz_elem> == 64)
6308e208 5574 return "vst1.64\t%h1, %A0";
88f77cba 5575 else
6308e208 5576 return "vst2.<V_sz_elem>\t%h1, %A0";
c956e102 5577}
003bb7f3 5578 [(set (attr "type")
c956e102 5579 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5580 (const_string "neon_store1_2reg<q>")
5581 (const_string "neon_store2_one_lane<q>")))]
c956e102 5582)
88f77cba 5583
3188ed59
RS
5584(define_expand "vec_store_lanesoi<mode>"
5585 [(set (match_operand:OI 0 "neon_struct_operand")
5586 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4b644867 5587 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5588 UNSPEC_VST2))]
5589 "TARGET_NEON")
5590
88f77cba 5591(define_insn "neon_vst2<mode>"
6308e208 5592 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
88f77cba 5593 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
ff229375 5594 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5595 UNSPEC_VST2))]
5596 "TARGET_NEON"
6308e208 5597 "vst2.<V_sz_elem>\t%h1, %A0"
f7379e5e 5598 [(set_attr "type" "neon_store2_4reg<q>")]
c956e102 5599)
88f77cba 5600
22f9db64
CB
5601;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5602;; here on big endian targets.
88f77cba 5603(define_insn "neon_vst2_lane<mode>"
6308e208 5604 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
5605 (unspec:<V_two_elem>
5606 [(match_operand:TI 1 "s_register_operand" "w")
5607 (match_operand:SI 2 "immediate_operand" "i")
4b644867 5608 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5609 UNSPEC_VST2_LANE))]
5610 "TARGET_NEON"
5611{
22f9db64 5612 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5613 int regno = REGNO (operands[1]);
5614 rtx ops[4];
88f77cba
JB
5615 ops[0] = operands[0];
5616 ops[1] = gen_rtx_REG (DImode, regno);
5617 ops[2] = gen_rtx_REG (DImode, regno + 2);
22f9db64 5618 ops[3] = GEN_INT (lane);
6308e208 5619 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
88f77cba 5620 return "";
c956e102 5621}
f7379e5e 5622 [(set_attr "type" "neon_store2_one_lane<q>")]
c956e102 5623)
88f77cba 5624
22f9db64
CB
5625;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5626;; here on big endian targets.
88f77cba 5627(define_insn "neon_vst2_lane<mode>"
6308e208 5628 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
5629 (unspec:<V_two_elem>
5630 [(match_operand:OI 1 "s_register_operand" "w")
5631 (match_operand:SI 2 "immediate_operand" "i")
4b644867 5632 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5633 UNSPEC_VST2_LANE))]
5634 "TARGET_NEON"
5635{
22f9db64 5636 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5637 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5638 int regno = REGNO (operands[1]);
5639 rtx ops[4];
e68ffe57 5640 if (lane >= max / 2)
88f77cba
JB
5641 {
5642 lane -= max / 2;
5643 regno += 2;
5644 }
5645 ops[0] = operands[0];
5646 ops[1] = gen_rtx_REG (DImode, regno);
5647 ops[2] = gen_rtx_REG (DImode, regno + 4);
5648 ops[3] = GEN_INT (lane);
6308e208 5649 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
88f77cba 5650 return "";
c956e102 5651}
f7379e5e 5652 [(set_attr "type" "neon_store2_one_lane<q>")]
c956e102 5653)
88f77cba 5654
3188ed59
RS
5655(define_expand "vec_load_lanesei<mode>"
5656 [(set (match_operand:EI 0 "s_register_operand")
5657 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5658 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5659 UNSPEC_VLD3))]
5660 "TARGET_NEON")
5661
88f77cba
JB
5662(define_insn "neon_vld3<mode>"
5663 [(set (match_operand:EI 0 "s_register_operand" "=w")
6308e208 5664 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
eb637e76 5665 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5666 UNSPEC_VLD3))]
5667 "TARGET_NEON"
5668{
5669 if (<V_sz_elem> == 64)
6308e208 5670 return "vld1.64\t%h0, %A1";
88f77cba 5671 else
6308e208 5672 return "vld3.<V_sz_elem>\t%h0, %A1";
c956e102 5673}
003bb7f3 5674 [(set (attr "type")
c956e102 5675 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5676 (const_string "neon_load1_3reg<q>")
5677 (const_string "neon_load3_3reg<q>")))]
c956e102 5678)
88f77cba 5679
3188ed59
RS
5680(define_expand "vec_load_lanesci<mode>"
5681 [(match_operand:CI 0 "s_register_operand")
5682 (match_operand:CI 1 "neon_struct_operand")
4b644867 5683 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5684 "TARGET_NEON"
5685{
5686 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5687 DONE;
5688})
5689
88f77cba 5690(define_expand "neon_vld3<mode>"
6308e208
RS
5691 [(match_operand:CI 0 "s_register_operand")
5692 (match_operand:CI 1 "neon_struct_operand")
eb637e76 5693 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5694 "TARGET_NEON"
5695{
6308e208
RS
5696 rtx mem;
5697
5698 mem = adjust_address (operands[1], EImode, 0);
5699 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5700 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5701 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
88f77cba
JB
5702 DONE;
5703})
5704
5705(define_insn "neon_vld3qa<mode>"
5706 [(set (match_operand:CI 0 "s_register_operand" "=w")
6308e208 5707 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
eb637e76 5708 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5709 UNSPEC_VLD3A))]
88f77cba
JB
5710 "TARGET_NEON"
5711{
5712 int regno = REGNO (operands[0]);
5713 rtx ops[4];
5714 ops[0] = gen_rtx_REG (DImode, regno);
5715 ops[1] = gen_rtx_REG (DImode, regno + 4);
5716 ops[2] = gen_rtx_REG (DImode, regno + 8);
a6217191 5717 ops[3] = operands[1];
6308e208 5718 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
88f77cba 5719 return "";
c956e102 5720}
f7379e5e 5721 [(set_attr "type" "neon_load3_3reg<q>")]
c956e102 5722)
88f77cba
JB
5723
5724(define_insn "neon_vld3qb<mode>"
5725 [(set (match_operand:CI 0 "s_register_operand" "=w")
6308e208
RS
5726 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5727 (match_operand:CI 2 "s_register_operand" "0")
eb637e76 5728 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5729 UNSPEC_VLD3B))]
88f77cba
JB
5730 "TARGET_NEON"
5731{
5732 int regno = REGNO (operands[0]);
5733 rtx ops[4];
5734 ops[0] = gen_rtx_REG (DImode, regno + 2);
5735 ops[1] = gen_rtx_REG (DImode, regno + 6);
5736 ops[2] = gen_rtx_REG (DImode, regno + 10);
6308e208
RS
5737 ops[3] = operands[1];
5738 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
88f77cba 5739 return "";
c956e102 5740}
f7379e5e 5741 [(set_attr "type" "neon_load3_3reg<q>")]
c956e102 5742)
88f77cba 5743
22f9db64
CB
5744;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5745;; here on big endian targets.
88f77cba
JB
5746(define_insn "neon_vld3_lane<mode>"
5747 [(set (match_operand:EI 0 "s_register_operand" "=w")
6308e208 5748 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5749 (match_operand:EI 2 "s_register_operand" "0")
5750 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5751 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5752 UNSPEC_VLD3_LANE))]
5753 "TARGET_NEON"
5754{
22f9db64 5755 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5756 int regno = REGNO (operands[0]);
5757 rtx ops[5];
88f77cba
JB
5758 ops[0] = gen_rtx_REG (DImode, regno);
5759 ops[1] = gen_rtx_REG (DImode, regno + 2);
5760 ops[2] = gen_rtx_REG (DImode, regno + 4);
5761 ops[3] = operands[1];
22f9db64 5762 ops[4] = GEN_INT (lane);
9594fe5e 5763 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
88f77cba
JB
5764 ops);
5765 return "";
c956e102 5766}
f7379e5e 5767 [(set_attr "type" "neon_load3_one_lane<q>")]
c956e102 5768)
88f77cba 5769
22f9db64
CB
5770;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5771;; here on big endian targets.
88f77cba
JB
5772(define_insn "neon_vld3_lane<mode>"
5773 [(set (match_operand:CI 0 "s_register_operand" "=w")
6308e208 5774 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5775 (match_operand:CI 2 "s_register_operand" "0")
5776 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5777 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5778 UNSPEC_VLD3_LANE))]
5779 "TARGET_NEON"
5780{
22f9db64 5781 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5782 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5783 int regno = REGNO (operands[0]);
5784 rtx ops[5];
e68ffe57 5785 if (lane >= max / 2)
88f77cba
JB
5786 {
5787 lane -= max / 2;
5788 regno += 2;
5789 }
5790 ops[0] = gen_rtx_REG (DImode, regno);
5791 ops[1] = gen_rtx_REG (DImode, regno + 4);
5792 ops[2] = gen_rtx_REG (DImode, regno + 8);
5793 ops[3] = operands[1];
5794 ops[4] = GEN_INT (lane);
9594fe5e 5795 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
88f77cba
JB
5796 ops);
5797 return "";
c956e102 5798}
f7379e5e 5799 [(set_attr "type" "neon_load3_one_lane<q>")]
c956e102 5800)
88f77cba
JB
5801
5802(define_insn "neon_vld3_dup<mode>"
5803 [(set (match_operand:EI 0 "s_register_operand" "=w")
6308e208 5804 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
eb637e76 5805 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5806 UNSPEC_VLD3_DUP))]
5807 "TARGET_NEON"
5808{
5809 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5810 {
5811 int regno = REGNO (operands[0]);
5812 rtx ops[4];
5813 ops[0] = gen_rtx_REG (DImode, regno);
5814 ops[1] = gen_rtx_REG (DImode, regno + 2);
5815 ops[2] = gen_rtx_REG (DImode, regno + 4);
5816 ops[3] = operands[1];
9594fe5e 5817 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
88f77cba
JB
5818 return "";
5819 }
5820 else
6308e208 5821 return "vld1.<V_sz_elem>\t%h0, %A1";
c956e102 5822}
003bb7f3 5823 [(set (attr "type")
c956e102 5824 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
f7379e5e
JG
5825 (const_string "neon_load3_all_lanes<q>")
5826 (const_string "neon_load1_1reg<q>")))])
88f77cba 5827
eb637e76
DB
5828(define_insn "neon_vld3_dupv8bf"
5829 [(set (match_operand:CI 0 "s_register_operand" "=w")
5830 (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5831 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5832 UNSPEC_VLD2_DUP))]
5833 "TARGET_BF16_SIMD"
5834 {
5835 rtx ops[4];
5836 int tabbase = REGNO (operands[0]);
5837
5838 ops[3] = operands[1];
5839 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5840 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5841 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5842 output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
5843 return "";
5844 }
5845 [(set_attr "type" "neon_load3_all_lanes_q")]
5846)
5847
3188ed59
RS
5848(define_expand "vec_store_lanesei<mode>"
5849 [(set (match_operand:EI 0 "neon_struct_operand")
5850 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5851 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5852 UNSPEC_VST3))]
5853 "TARGET_NEON")
5854
88f77cba 5855(define_insn "neon_vst3<mode>"
6308e208 5856 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
88f77cba 5857 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
ff229375 5858 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5859 UNSPEC_VST3))]
5860 "TARGET_NEON"
5861{
5862 if (<V_sz_elem> == 64)
6308e208 5863 return "vst1.64\t%h1, %A0";
88f77cba 5864 else
6308e208 5865 return "vst3.<V_sz_elem>\t%h1, %A0";
c956e102 5866}
003bb7f3 5867 [(set (attr "type")
c956e102 5868 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5869 (const_string "neon_store1_3reg<q>")
5870 (const_string "neon_store3_one_lane<q>")))])
88f77cba 5871
3188ed59
RS
5872(define_expand "vec_store_lanesci<mode>"
5873 [(match_operand:CI 0 "neon_struct_operand")
5874 (match_operand:CI 1 "s_register_operand")
4b644867 5875 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5876 "TARGET_NEON"
5877{
5878 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5879 DONE;
5880})
5881
88f77cba 5882(define_expand "neon_vst3<mode>"
6308e208
RS
5883 [(match_operand:CI 0 "neon_struct_operand")
5884 (match_operand:CI 1 "s_register_operand")
ff229375 5885 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5886 "TARGET_NEON"
5887{
6308e208
RS
5888 rtx mem;
5889
5890 mem = adjust_address (operands[0], EImode, 0);
5891 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5892 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5893 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
88f77cba
JB
5894 DONE;
5895})
5896
5897(define_insn "neon_vst3qa<mode>"
6308e208
RS
5898 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5899 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
ff229375 5900 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5901 UNSPEC_VST3A))]
88f77cba
JB
5902 "TARGET_NEON"
5903{
6308e208 5904 int regno = REGNO (operands[1]);
88f77cba
JB
5905 rtx ops[4];
5906 ops[0] = operands[0];
5907 ops[1] = gen_rtx_REG (DImode, regno);
5908 ops[2] = gen_rtx_REG (DImode, regno + 4);
5909 ops[3] = gen_rtx_REG (DImode, regno + 8);
6308e208 5910 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
88f77cba 5911 return "";
c956e102 5912}
f7379e5e 5913 [(set_attr "type" "neon_store3_3reg<q>")]
c956e102 5914)
88f77cba
JB
5915
5916(define_insn "neon_vst3qb<mode>"
6308e208
RS
5917 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5918 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
ff229375 5919 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5920 UNSPEC_VST3B))]
88f77cba
JB
5921 "TARGET_NEON"
5922{
6308e208 5923 int regno = REGNO (operands[1]);
88f77cba
JB
5924 rtx ops[4];
5925 ops[0] = operands[0];
5926 ops[1] = gen_rtx_REG (DImode, regno + 2);
5927 ops[2] = gen_rtx_REG (DImode, regno + 6);
5928 ops[3] = gen_rtx_REG (DImode, regno + 10);
6308e208 5929 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
88f77cba 5930 return "";
c956e102 5931}
f7379e5e 5932 [(set_attr "type" "neon_store3_3reg<q>")]
c956e102 5933)
88f77cba 5934
22f9db64
CB
5935;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5936;; here on big endian targets.
88f77cba 5937(define_insn "neon_vst3_lane<mode>"
6308e208 5938 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
5939 (unspec:<V_three_elem>
5940 [(match_operand:EI 1 "s_register_operand" "w")
5941 (match_operand:SI 2 "immediate_operand" "i")
4b644867 5942 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5943 UNSPEC_VST3_LANE))]
5944 "TARGET_NEON"
5945{
22f9db64 5946 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5947 int regno = REGNO (operands[1]);
5948 rtx ops[5];
88f77cba
JB
5949 ops[0] = operands[0];
5950 ops[1] = gen_rtx_REG (DImode, regno);
5951 ops[2] = gen_rtx_REG (DImode, regno + 2);
5952 ops[3] = gen_rtx_REG (DImode, regno + 4);
22f9db64 5953 ops[4] = GEN_INT (lane);
9594fe5e 5954 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
88f77cba
JB
5955 ops);
5956 return "";
c956e102 5957}
f7379e5e 5958 [(set_attr "type" "neon_store3_one_lane<q>")]
c956e102 5959)
88f77cba 5960
22f9db64
CB
5961;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5962;; here on big endian targets.
88f77cba 5963(define_insn "neon_vst3_lane<mode>"
6308e208 5964 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
5965 (unspec:<V_three_elem>
5966 [(match_operand:CI 1 "s_register_operand" "w")
5967 (match_operand:SI 2 "immediate_operand" "i")
4b644867 5968 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5969 UNSPEC_VST3_LANE))]
5970 "TARGET_NEON"
5971{
22f9db64 5972 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5973 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5974 int regno = REGNO (operands[1]);
5975 rtx ops[5];
e68ffe57 5976 if (lane >= max / 2)
88f77cba
JB
5977 {
5978 lane -= max / 2;
5979 regno += 2;
5980 }
5981 ops[0] = operands[0];
5982 ops[1] = gen_rtx_REG (DImode, regno);
5983 ops[2] = gen_rtx_REG (DImode, regno + 4);
5984 ops[3] = gen_rtx_REG (DImode, regno + 8);
5985 ops[4] = GEN_INT (lane);
9594fe5e 5986 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
88f77cba
JB
5987 ops);
5988 return "";
c956e102 5989}
f7379e5e
JG
5990 [(set_attr "type" "neon_store3_one_lane<q>")]
5991)
88f77cba 5992
3188ed59
RS
5993(define_expand "vec_load_lanesoi<mode>"
5994 [(set (match_operand:OI 0 "s_register_operand")
5995 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5996 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5997 UNSPEC_VLD4))]
5998 "TARGET_NEON")
5999
88f77cba
JB
6000(define_insn "neon_vld4<mode>"
6001 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 6002 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
eb637e76 6003 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6004 UNSPEC_VLD4))]
6005 "TARGET_NEON"
6006{
6007 if (<V_sz_elem> == 64)
6308e208 6008 return "vld1.64\t%h0, %A1";
88f77cba 6009 else
6308e208 6010 return "vld4.<V_sz_elem>\t%h0, %A1";
c956e102 6011}
003bb7f3 6012 [(set (attr "type")
c956e102 6013 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
6014 (const_string "neon_load1_4reg<q>")
6015 (const_string "neon_load4_4reg<q>")))]
c956e102 6016)
88f77cba 6017
3188ed59
RS
6018(define_expand "vec_load_lanesxi<mode>"
6019 [(match_operand:XI 0 "s_register_operand")
6020 (match_operand:XI 1 "neon_struct_operand")
4b644867 6021 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
6022 "TARGET_NEON"
6023{
6024 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6025 DONE;
6026})
6027
88f77cba 6028(define_expand "neon_vld4<mode>"
6308e208
RS
6029 [(match_operand:XI 0 "s_register_operand")
6030 (match_operand:XI 1 "neon_struct_operand")
eb637e76 6031 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6032 "TARGET_NEON"
6033{
6308e208
RS
6034 rtx mem;
6035
6036 mem = adjust_address (operands[1], OImode, 0);
6037 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6038 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6039 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
88f77cba
JB
6040 DONE;
6041})
6042
6043(define_insn "neon_vld4qa<mode>"
6044 [(set (match_operand:XI 0 "s_register_operand" "=w")
6308e208 6045 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
eb637e76 6046 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 6047 UNSPEC_VLD4A))]
88f77cba
JB
6048 "TARGET_NEON"
6049{
6050 int regno = REGNO (operands[0]);
6051 rtx ops[5];
6052 ops[0] = gen_rtx_REG (DImode, regno);
6053 ops[1] = gen_rtx_REG (DImode, regno + 4);
6054 ops[2] = gen_rtx_REG (DImode, regno + 8);
6055 ops[3] = gen_rtx_REG (DImode, regno + 12);
a6217191 6056 ops[4] = operands[1];
6308e208 6057 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
88f77cba 6058 return "";
c956e102 6059}
f7379e5e 6060 [(set_attr "type" "neon_load4_4reg<q>")]
c956e102 6061)
88f77cba
JB
6062
6063(define_insn "neon_vld4qb<mode>"
6064 [(set (match_operand:XI 0 "s_register_operand" "=w")
6308e208
RS
6065 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6066 (match_operand:XI 2 "s_register_operand" "0")
eb637e76 6067 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 6068 UNSPEC_VLD4B))]
88f77cba
JB
6069 "TARGET_NEON"
6070{
6071 int regno = REGNO (operands[0]);
6072 rtx ops[5];
6073 ops[0] = gen_rtx_REG (DImode, regno + 2);
6074 ops[1] = gen_rtx_REG (DImode, regno + 6);
6075 ops[2] = gen_rtx_REG (DImode, regno + 10);
6076 ops[3] = gen_rtx_REG (DImode, regno + 14);
6308e208
RS
6077 ops[4] = operands[1];
6078 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
88f77cba 6079 return "";
c956e102 6080}
f7379e5e 6081 [(set_attr "type" "neon_load4_4reg<q>")]
c956e102 6082)
88f77cba 6083
22f9db64
CB
6084;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6085;; here on big endian targets.
88f77cba
JB
6086(define_insn "neon_vld4_lane<mode>"
6087 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 6088 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
6089 (match_operand:OI 2 "s_register_operand" "0")
6090 (match_operand:SI 3 "immediate_operand" "i")
4b644867 6091 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6092 UNSPEC_VLD4_LANE))]
6093 "TARGET_NEON"
6094{
22f9db64 6095 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
6096 int regno = REGNO (operands[0]);
6097 rtx ops[6];
88f77cba
JB
6098 ops[0] = gen_rtx_REG (DImode, regno);
6099 ops[1] = gen_rtx_REG (DImode, regno + 2);
6100 ops[2] = gen_rtx_REG (DImode, regno + 4);
6101 ops[3] = gen_rtx_REG (DImode, regno + 6);
6102 ops[4] = operands[1];
22f9db64 6103 ops[5] = GEN_INT (lane);
6308e208 6104 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
88f77cba
JB
6105 ops);
6106 return "";
c956e102 6107}
f7379e5e 6108 [(set_attr "type" "neon_load4_one_lane<q>")]
c956e102 6109)
88f77cba 6110
22f9db64
CB
6111;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6112;; here on big endian targets.
88f77cba
JB
6113(define_insn "neon_vld4_lane<mode>"
6114 [(set (match_operand:XI 0 "s_register_operand" "=w")
6308e208 6115 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
6116 (match_operand:XI 2 "s_register_operand" "0")
6117 (match_operand:SI 3 "immediate_operand" "i")
4b644867 6118 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6119 UNSPEC_VLD4_LANE))]
6120 "TARGET_NEON"
6121{
22f9db64 6122 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
6123 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6124 int regno = REGNO (operands[0]);
6125 rtx ops[6];
e68ffe57 6126 if (lane >= max / 2)
88f77cba
JB
6127 {
6128 lane -= max / 2;
6129 regno += 2;
6130 }
6131 ops[0] = gen_rtx_REG (DImode, regno);
6132 ops[1] = gen_rtx_REG (DImode, regno + 4);
6133 ops[2] = gen_rtx_REG (DImode, regno + 8);
6134 ops[3] = gen_rtx_REG (DImode, regno + 12);
6135 ops[4] = operands[1];
6136 ops[5] = GEN_INT (lane);
6308e208 6137 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
88f77cba
JB
6138 ops);
6139 return "";
c956e102 6140}
f7379e5e 6141 [(set_attr "type" "neon_load4_one_lane<q>")]
c956e102 6142)
88f77cba
JB
6143
6144(define_insn "neon_vld4_dup<mode>"
6145 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 6146 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
eb637e76 6147 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6148 UNSPEC_VLD4_DUP))]
6149 "TARGET_NEON"
6150{
6151 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6152 {
6153 int regno = REGNO (operands[0]);
6154 rtx ops[5];
6155 ops[0] = gen_rtx_REG (DImode, regno);
6156 ops[1] = gen_rtx_REG (DImode, regno + 2);
6157 ops[2] = gen_rtx_REG (DImode, regno + 4);
6158 ops[3] = gen_rtx_REG (DImode, regno + 6);
6159 ops[4] = operands[1];
6308e208 6160 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
88f77cba
JB
6161 ops);
6162 return "";
6163 }
6164 else
6308e208 6165 return "vld1.<V_sz_elem>\t%h0, %A1";
c956e102 6166}
003bb7f3 6167 [(set (attr "type")
c956e102 6168 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
f7379e5e
JG
6169 (const_string "neon_load4_all_lanes<q>")
6170 (const_string "neon_load1_1reg<q>")))]
c956e102 6171)
88f77cba 6172
eb637e76
DB
6173(define_insn "neon_vld4_dupv8bf"
6174 [(set (match_operand:XI 0 "s_register_operand" "=w")
6175 (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
6176 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6177 UNSPEC_VLD2_DUP))]
6178 "TARGET_BF16_SIMD"
6179 {
6180 rtx ops[5];
6181 int tabbase = REGNO (operands[0]);
6182
6183 ops[4] = operands[1];
6184 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
6185 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
6186 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
6187 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
6188 output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
6189 return "";
6190 }
6191 [(set_attr "type" "neon_load4_all_lanes_q")]
6192)
6193
3188ed59
RS
6194(define_expand "vec_store_lanesoi<mode>"
6195 [(set (match_operand:OI 0 "neon_struct_operand")
6196 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6197 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6198 UNSPEC_VST4))]
6199 "TARGET_NEON")
6200
88f77cba 6201(define_insn "neon_vst4<mode>"
6308e208 6202 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
88f77cba 6203 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
ff229375 6204 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6205 UNSPEC_VST4))]
6206 "TARGET_NEON"
6207{
6208 if (<V_sz_elem> == 64)
6308e208 6209 return "vst1.64\t%h1, %A0";
88f77cba 6210 else
6308e208 6211 return "vst4.<V_sz_elem>\t%h1, %A0";
c956e102 6212}
003bb7f3 6213 [(set (attr "type")
c956e102 6214 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
6215 (const_string "neon_store1_4reg<q>")
6216 (const_string "neon_store4_4reg<q>")))]
c956e102 6217)
88f77cba 6218
3188ed59
RS
6219(define_expand "vec_store_lanesxi<mode>"
6220 [(match_operand:XI 0 "neon_struct_operand")
6221 (match_operand:XI 1 "s_register_operand")
4b644867 6222 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
6223 "TARGET_NEON"
6224{
6225 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6226 DONE;
6227})
6228
88f77cba 6229(define_expand "neon_vst4<mode>"
6308e208
RS
6230 [(match_operand:XI 0 "neon_struct_operand")
6231 (match_operand:XI 1 "s_register_operand")
ff229375 6232 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6233 "TARGET_NEON"
6234{
6308e208
RS
6235 rtx mem;
6236
6237 mem = adjust_address (operands[0], OImode, 0);
6238 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6239 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6240 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
88f77cba
JB
6241 DONE;
6242})
6243
6244(define_insn "neon_vst4qa<mode>"
6308e208
RS
6245 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6246 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
ff229375 6247 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 6248 UNSPEC_VST4A))]
88f77cba
JB
6249 "TARGET_NEON"
6250{
6308e208 6251 int regno = REGNO (operands[1]);
88f77cba
JB
6252 rtx ops[5];
6253 ops[0] = operands[0];
6254 ops[1] = gen_rtx_REG (DImode, regno);
6255 ops[2] = gen_rtx_REG (DImode, regno + 4);
6256 ops[3] = gen_rtx_REG (DImode, regno + 8);
6257 ops[4] = gen_rtx_REG (DImode, regno + 12);
6308e208 6258 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
88f77cba 6259 return "";
c956e102 6260}
f7379e5e 6261 [(set_attr "type" "neon_store4_4reg<q>")]
c956e102 6262)
88f77cba
JB
6263
6264(define_insn "neon_vst4qb<mode>"
6308e208
RS
6265 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6266 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
ff229375 6267 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 6268 UNSPEC_VST4B))]
88f77cba
JB
6269 "TARGET_NEON"
6270{
6308e208 6271 int regno = REGNO (operands[1]);
88f77cba
JB
6272 rtx ops[5];
6273 ops[0] = operands[0];
6274 ops[1] = gen_rtx_REG (DImode, regno + 2);
6275 ops[2] = gen_rtx_REG (DImode, regno + 6);
6276 ops[3] = gen_rtx_REG (DImode, regno + 10);
6277 ops[4] = gen_rtx_REG (DImode, regno + 14);
6308e208 6278 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
88f77cba 6279 return "";
c956e102 6280}
f7379e5e 6281 [(set_attr "type" "neon_store4_4reg<q>")]
c956e102 6282)
88f77cba 6283
22f9db64
CB
6284;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6285;; here on big endian targets.
88f77cba 6286(define_insn "neon_vst4_lane<mode>"
6308e208 6287 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
6288 (unspec:<V_four_elem>
6289 [(match_operand:OI 1 "s_register_operand" "w")
6290 (match_operand:SI 2 "immediate_operand" "i")
4b644867 6291 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6292 UNSPEC_VST4_LANE))]
6293 "TARGET_NEON"
6294{
22f9db64 6295 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
6296 int regno = REGNO (operands[1]);
6297 rtx ops[6];
88f77cba
JB
6298 ops[0] = operands[0];
6299 ops[1] = gen_rtx_REG (DImode, regno);
6300 ops[2] = gen_rtx_REG (DImode, regno + 2);
6301 ops[3] = gen_rtx_REG (DImode, regno + 4);
6302 ops[4] = gen_rtx_REG (DImode, regno + 6);
22f9db64 6303 ops[5] = GEN_INT (lane);
6308e208 6304 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
88f77cba
JB
6305 ops);
6306 return "";
c956e102 6307}
f7379e5e 6308 [(set_attr "type" "neon_store4_one_lane<q>")]
c956e102 6309)
88f77cba 6310
22f9db64
CB
6311;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6312;; here on big endian targets.
88f77cba 6313(define_insn "neon_vst4_lane<mode>"
6308e208 6314 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
6315 (unspec:<V_four_elem>
6316 [(match_operand:XI 1 "s_register_operand" "w")
6317 (match_operand:SI 2 "immediate_operand" "i")
4b644867 6318 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6319 UNSPEC_VST4_LANE))]
6320 "TARGET_NEON"
6321{
22f9db64 6322 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
6323 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6324 int regno = REGNO (operands[1]);
6325 rtx ops[6];
e68ffe57 6326 if (lane >= max / 2)
88f77cba
JB
6327 {
6328 lane -= max / 2;
6329 regno += 2;
6330 }
6331 ops[0] = operands[0];
6332 ops[1] = gen_rtx_REG (DImode, regno);
6333 ops[2] = gen_rtx_REG (DImode, regno + 4);
6334 ops[3] = gen_rtx_REG (DImode, regno + 8);
6335 ops[4] = gen_rtx_REG (DImode, regno + 12);
6336 ops[5] = GEN_INT (lane);
6308e208 6337 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
88f77cba
JB
6338 ops);
6339 return "";
c956e102 6340}
f7379e5e 6341 [(set_attr "type" "neon_store4_4reg<q>")]
c956e102 6342)
88f77cba 6343
46b57af1
TB
6344(define_insn "neon_vec_unpack<US>_lo_<mode>"
6345 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6346 (SE:<V_unpack> (vec_select:<V_HALF>
6347 (match_operand:VU 1 "register_operand" "w")
6348 (match_operand:VU 2 "vect_par_constant_low" ""))))]
0094f21b 6349 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1 6350 "vmovl.<US><V_sz_elem> %q0, %e1"
f7379e5e 6351 [(set_attr "type" "neon_shift_imm_long")]
46b57af1
TB
6352)
6353
6354(define_insn "neon_vec_unpack<US>_hi_<mode>"
6355 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6356 (SE:<V_unpack> (vec_select:<V_HALF>
6357 (match_operand:VU 1 "register_operand" "w")
6358 (match_operand:VU 2 "vect_par_constant_high" ""))))]
0094f21b 6359 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1 6360 "vmovl.<US><V_sz_elem> %q0, %f1"
f7379e5e 6361 [(set_attr "type" "neon_shift_imm_long")]
46b57af1
TB
6362)
6363
6364(define_expand "vec_unpack<US>_hi_<mode>"
cd65e265 6365 [(match_operand:<V_unpack> 0 "register_operand")
46b57af1 6366 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
0094f21b 6367 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1
TB
6368 {
6369 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6370 rtx t1;
6371 int i;
6372 for (i = 0; i < (<V_mode_nunits>/2); i++)
6373 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6374
6375 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6376 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6377 operands[1],
6378 t1));
6379 DONE;
6380 }
6381)
6382
6383(define_expand "vec_unpack<US>_lo_<mode>"
cd65e265
DZ
6384 [(match_operand:<V_unpack> 0 "register_operand")
6385 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
0094f21b 6386 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1
TB
6387 {
6388 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6389 rtx t1;
6390 int i;
6391 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6392 RTVEC_ELT (v, i) = GEN_INT (i);
6393 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6394 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6395 operands[1],
6396 t1));
6397 DONE;
6398 }
6399)
6400
6401(define_insn "neon_vec_<US>mult_lo_<mode>"
6402 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6403 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6404 (match_operand:VU 1 "register_operand" "w")
6405 (match_operand:VU 2 "vect_par_constant_low" "")))
6406 (SE:<V_unpack> (vec_select:<V_HALF>
6407 (match_operand:VU 3 "register_operand" "w")
6408 (match_dup 2)))))]
0094f21b 6409 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1 6410 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
f7379e5e 6411 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
46b57af1
TB
6412)
6413
6414(define_expand "vec_widen_<US>mult_lo_<mode>"
cd65e265
DZ
6415 [(match_operand:<V_unpack> 0 "register_operand")
6416 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6417 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
0094f21b 6418 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1
TB
6419 {
6420 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6421 rtx t1;
6422 int i;
6423 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6424 RTVEC_ELT (v, i) = GEN_INT (i);
6425 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6426
6427 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6428 operands[1],
6429 t1,
6430 operands[2]));
6431 DONE;
6432 }
6433)
6434
6435(define_insn "neon_vec_<US>mult_hi_<mode>"
6436 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6437 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6438 (match_operand:VU 1 "register_operand" "w")
6439 (match_operand:VU 2 "vect_par_constant_high" "")))
6440 (SE:<V_unpack> (vec_select:<V_HALF>
6441 (match_operand:VU 3 "register_operand" "w")
6442 (match_dup 2)))))]
0094f21b 6443 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1 6444 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
f7379e5e 6445 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
46b57af1
TB
6446)
6447
6448(define_expand "vec_widen_<US>mult_hi_<mode>"
cd65e265
DZ
6449 [(match_operand:<V_unpack> 0 "register_operand")
6450 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6451 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
0094f21b 6452 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1
TB
6453 {
6454 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6455 rtx t1;
6456 int i;
6457 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6458 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6459 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6460
6461 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6462 operands[1],
6463 t1,
6464 operands[2]));
6465 DONE;
6466
6467 }
6468)
6469
36ba4aae
IR
6470(define_insn "neon_vec_<US>shiftl_<mode>"
6471 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6472 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6473 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6474 "TARGET_NEON"
6475{
6476 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6477}
f7379e5e 6478 [(set_attr "type" "neon_shift_imm_long")]
36ba4aae
IR
6479)
6480
6481(define_expand "vec_widen_<US>shiftl_lo_<mode>"
cd65e265
DZ
6482 [(match_operand:<V_unpack> 0 "register_operand")
6483 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6484 (match_operand:SI 2 "immediate_operand")]
36ba4aae
IR
6485 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6486 {
6487 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6488 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6489 operands[2]));
6490 DONE;
6491 }
6492)
6493
6494(define_expand "vec_widen_<US>shiftl_hi_<mode>"
cd65e265
DZ
6495 [(match_operand:<V_unpack> 0 "register_operand")
6496 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6497 (match_operand:SI 2 "immediate_operand")]
36ba4aae
IR
6498 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6499 {
6500 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6501 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6502 GET_MODE_SIZE (<V_HALF>mode)),
6503 operands[2]));
6504 DONE;
6505 }
6506)
6507
46b57af1
TB
6508;; Vectorize for non-neon-quad case
6509(define_insn "neon_unpack<US>_<mode>"
6510 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
7f27ec08 6511 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
46b57af1 6512 "TARGET_NEON"
7f27ec08 6513 "vmovl.<US><V_sz_elem> %q0, %P1"
f7379e5e 6514 [(set_attr "type" "neon_move")]
46b57af1
TB
6515)
6516
6517(define_expand "vec_unpack<US>_lo_<mode>"
cd65e265 6518 [(match_operand:<V_double_width> 0 "register_operand")
46b57af1
TB
6519 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6520 "TARGET_NEON"
6521{
6522 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6523 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6524 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6525
6526 DONE;
6527}
6528)
6529
6530(define_expand "vec_unpack<US>_hi_<mode>"
cd65e265 6531 [(match_operand:<V_double_width> 0 "register_operand")
46b57af1
TB
6532 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6533 "TARGET_NEON"
6534{
6535 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6536 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6537 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6538
6539 DONE;
6540}
6541)
6542
6543(define_insn "neon_vec_<US>mult_<mode>"
6544 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6545 (mult:<V_widen> (SE:<V_widen>
6546 (match_operand:VDI 1 "register_operand" "w"))
6547 (SE:<V_widen>
6548 (match_operand:VDI 2 "register_operand" "w"))))]
6549 "TARGET_NEON"
7f27ec08 6550 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
f7379e5e 6551 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
46b57af1
TB
6552)
6553
6554(define_expand "vec_widen_<US>mult_hi_<mode>"
cd65e265
DZ
6555 [(match_operand:<V_double_width> 0 "register_operand")
6556 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6557 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
46b57af1
TB
6558 "TARGET_NEON"
6559 {
6560 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6561 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6562 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6563
6564 DONE;
6565
6566 }
6567)
6568
6569(define_expand "vec_widen_<US>mult_lo_<mode>"
cd65e265
DZ
6570 [(match_operand:<V_double_width> 0 "register_operand")
6571 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6572 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
46b57af1
TB
6573 "TARGET_NEON"
6574 {
6575 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6576 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6577 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6578
6579 DONE;
6580
6581 }
6582)
0f38f229 6583
36ba4aae 6584(define_expand "vec_widen_<US>shiftl_hi_<mode>"
cd65e265
DZ
6585 [(match_operand:<V_double_width> 0 "register_operand")
6586 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6587 (match_operand:SI 2 "immediate_operand")]
36ba4aae
IR
6588 "TARGET_NEON"
6589 {
6590 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6591 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6592 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6593
6594 DONE;
6595 }
6596)
6597
6598(define_expand "vec_widen_<US>shiftl_lo_<mode>"
cd65e265
DZ
6599 [(match_operand:<V_double_width> 0 "register_operand")
6600 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6601 (match_operand:SI 2 "immediate_operand")]
36ba4aae
IR
6602 "TARGET_NEON"
6603 {
6604 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6605 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6606 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6607
6608 DONE;
6609 }
6610)
6611
0094f21b
JB
6612; FIXME: These instruction patterns can't be used safely in big-endian mode
6613; because the ordering of vector elements in Q registers is different from what
6614; the semantics of the instructions require.
6615
0f38f229
TB
6616(define_insn "vec_pack_trunc_<mode>"
6617 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6618 (vec_concat:<V_narrow_pack>
6619 (truncate:<V_narrow>
6620 (match_operand:VN 1 "register_operand" "w"))
6621 (truncate:<V_narrow>
6622 (match_operand:VN 2 "register_operand" "w"))))]
0094f21b 6623 "TARGET_NEON && !BYTES_BIG_ENDIAN"
30cecf17 6624 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
f7379e5e 6625 [(set_attr "type" "multiple")
30cecf17 6626 (set_attr "length" "8")]
0f38f229
TB
6627)
6628
6629;; For the non-quad case.
6630(define_insn "neon_vec_pack_trunc_<mode>"
6631 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
7f27ec08 6632 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
0094f21b 6633 "TARGET_NEON && !BYTES_BIG_ENDIAN"
7f27ec08 6634 "vmovn.i<V_sz_elem>\t%P0, %q1"
f7379e5e 6635 [(set_attr "type" "neon_move_narrow_q")]
0f38f229
TB
6636)
6637
6638(define_expand "vec_pack_trunc_<mode>"
cd65e265
DZ
6639 [(match_operand:<V_narrow_pack> 0 "register_operand")
6640 (match_operand:VSHFT 1 "register_operand")
0f38f229 6641 (match_operand:VSHFT 2 "register_operand")]
0094f21b 6642 "TARGET_NEON && !BYTES_BIG_ENDIAN"
0f38f229
TB
6643{
6644 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6645
6646 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6647 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6648 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6649 DONE;
6650})
bd1aa4f4
SS
6651
6652(define_insn "neon_vabd<mode>_2"
d0b6b5a7
KT
6653 [(set (match_operand:VF 0 "s_register_operand" "=w")
6654 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6655 (match_operand:VF 2 "s_register_operand" "w"))))]
6656 "TARGET_NEON && flag_unsafe_math_optimizations"
bd1aa4f4 6657 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
d0b6b5a7 6658 [(set_attr "type" "neon_fp_abd_s<q>")]
bd1aa4f4
SS
6659)
6660
6661(define_insn "neon_vabd<mode>_3"
d0b6b5a7
KT
6662 [(set (match_operand:VF 0 "s_register_operand" "=w")
6663 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6664 (match_operand:VF 2 "s_register_operand" "w")]
6665 UNSPEC_VSUB)))]
6666 "TARGET_NEON && flag_unsafe_math_optimizations"
bd1aa4f4 6667 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
d0b6b5a7 6668 [(set_attr "type" "neon_fp_abd_s<q>")]
bd1aa4f4 6669)
436016f4
DZ
6670
6671(define_insn "neon_<sup>mmlav16qi"
6672 [(set (match_operand:V4SI 0 "register_operand" "=w")
6673 (plus:V4SI
6674 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
6675 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
6676 (match_operand:V4SI 1 "register_operand" "0")))]
6677 "TARGET_I8MM"
6678 "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3"
6679 [(set_attr "type" "neon_mla_s_q")]
6680)
eb7ba6c3
DZ
6681
6682(define_insn "neon_vbfdot<VCVTF:mode>"
6683 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6684 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6685 (unspec:VCVTF [
6686 (match_operand:<VSF2BF> 2 "register_operand" "w")
6687 (match_operand:<VSF2BF> 3 "register_operand" "w")]
6688 UNSPEC_DOT_S)))]
6689 "TARGET_BF16_SIMD"
6690 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
6691 [(set_attr "type" "neon_dot<q>")]
6692)
6693
6694(define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>"
6695 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6696 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6697 (unspec:VCVTF [
6698 (match_operand:<VSF2BF> 2 "register_operand" "w")
6699 (match_operand:V4BF 3 "register_operand" "x")
6700 (match_operand:SI 4 "immediate_operand" "i")]
6701 UNSPEC_DOT_S)))]
6702 "TARGET_BF16_SIMD"
6703 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
6704 [(set_attr "type" "neon_dot<q>")]
6705)
6706
6707(define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>"
6708 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6709 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6710 (unspec:VCVTF [
6711 (match_operand:<VSF2BF> 2 "register_operand" "w")
6712 (match_operand:V8BF 3 "register_operand" "x")
6713 (match_operand:SI 4 "immediate_operand" "i")]
6714 UNSPEC_DOT_S)))]
6715 "TARGET_BF16_SIMD"
6716 {
6717 int lane = INTVAL (operands[4]);
6718 int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4;
6719 if (lane < half)
6720 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
6721 else
6722 {
6723 operands[4] = GEN_INT (lane - half);
6724 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
6725 }
6726 }
6727 [(set_attr "type" "neon_dot<q>")]
6728)
8e6d0dba
DZ
6729
6730(define_insn "neon_vbfcvtv4sf<VBFCVT:mode>"
6731 [(set (match_operand:VBFCVT 0 "register_operand" "=w")
6732 (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")]
6733 UNSPEC_BFCVT))]
6734 "TARGET_BF16_SIMD"
6735 "vcvt.bf16.f32\\t%<V_bf_low>0, %q1"
6736 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6737)
6738
6739(define_insn "neon_vbfcvtv4sf_highv8bf"
6740 [(set (match_operand:V8BF 0 "register_operand" "=w")
6741 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
6742 (match_operand:V4SF 2 "register_operand" "w")]
6743 UNSPEC_BFCVT_HIGH))]
6744 "TARGET_BF16_SIMD"
6745 "vcvt.bf16.f32\\t%f0, %q2"
6746 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6747)
6748
6749(define_insn "neon_vbfcvtsf"
6750 [(set (match_operand:BF 0 "register_operand" "=t")
6751 (unspec:BF [(match_operand:SF 1 "register_operand" "t")]
6752 UNSPEC_BFCVT))]
6753 "TARGET_BF16_FP"
6754 "vcvtb.bf16.f32\\t%0, %1"
6755 [(set_attr "type" "f_cvt")]
6756)
6757
6758(define_insn "neon_vbfcvt<VBFCVT:mode>"
6759 [(set (match_operand:V4SF 0 "register_operand" "=w")
6760 (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")]
6761 UNSPEC_BFCVT))]
6762 "TARGET_BF16_SIMD"
6763 "vshll.u32\\t%q0, %<V_bf_low>1, #16"
6764 [(set_attr "type" "neon_shift_imm_q")]
6765)
6766
6767(define_insn "neon_vbfcvt_highv8bf"
6768 [(set (match_operand:V4SF 0 "register_operand" "=w")
6769 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
6770 UNSPEC_BFCVT_HIGH))]
6771 "TARGET_BF16_SIMD"
6772 "vshll.u32\\t%q0, %f1, #16"
6773 [(set_attr "type" "neon_shift_imm_q")]
6774)
6775
6776;; Convert a BF scalar operand to SF via VSHL.
6777;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands
6778;; would be allocated, therefore the operands must be converted to intermediate
6779;; vectors (i.e. V2SI) in order to apply 64-bit registers.
6780(define_expand "neon_vbfcvtbf"
6781 [(match_operand:SF 0 "register_operand")
6782 (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)]
6783 "TARGET_BF16_FP"
6784{
6785 rtx op0 = gen_reg_rtx (V2SImode);
6786 rtx op1 = gen_reg_rtx (V2SImode);
6787 emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1]));
6788 emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode)));
6789 emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0));
6790 DONE;
6791})
6792
6793;; Convert BF mode to V2SI and V2SI to SF.
6794;; Implement this by allocating a 32-bit operand in the low half of a 64-bit
6795;; register indexed by a 32-bit sub-register number.
6796;; This will generate reloads but compiler can optimize out the moves.
6797;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable
6798;; range so that to avoid extra moves.
6799(define_insn "neon_vbfcvtbf_cvtmode<mode>"
6800 [(set (match_operand:VBFCVTM 0 "register_operand" "=x")
6801 (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")]
6802 UNSPEC_BFCVT))]
6803 "TARGET_BF16_FP"
6804 ""
6805)
2d22ab64
KT
6806
6807(define_insn "neon_vmmlav8bf"
6808 [(set (match_operand:V4SF 0 "register_operand" "=w")
6809 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
6810 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6811 (match_operand:V8BF 3 "register_operand" "w")]
6812 UNSPEC_BFMMLA)))]
6813 "TARGET_BF16_SIMD"
6814 "vmmla.bf16\\t%q0, %q2, %q3"
6815 [(set_attr "type" "neon_fp_mla_s_q")]
6816)
6817
6818(define_insn "neon_vfma<bt>v8bf"
6819 [(set (match_operand:V4SF 0 "register_operand" "=w")
6820 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6821 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6822 (match_operand:V8BF 3 "register_operand" "w")]
6823 BF_MA)))]
6824 "TARGET_BF16_SIMD"
6825 "vfma<bt>.bf16\\t%q0, %q2, %q3"
6826 [(set_attr "type" "neon_fp_mla_s_q")]
6827)
6828
6829(define_insn "neon_vfma<bt>_lanev8bf"
6830 [(set (match_operand:V4SF 0 "register_operand" "=w")
6831 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6832 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6833 (match_operand:V4BF 3 "register_operand" "x")
6834 (match_operand:SI 4 "const_int_operand" "n")]
6835 BF_MA)))]
6836 "TARGET_BF16_SIMD"
6837 "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]"
6838 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6839)
6840
6841(define_expand "neon_vfma<bt>_laneqv8bf"
6842 [(set (match_operand:V4SF 0 "register_operand" "=w")
6843 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6844 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6845 (match_operand:V8BF 3 "register_operand" "x")
6846 (match_operand:SI 4 "const_int_operand" "n")]
6847 BF_MA)))]
6848 "TARGET_BF16_SIMD"
6849 {
6850 int lane = INTVAL (operands[4]);
6851 gcc_assert (IN_RANGE(lane, 0, 7));
6852 if (lane < 4)
6853 {
6854 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4]));
6855 }
6856 else
6857 {
6858 rtx op_highpart = gen_reg_rtx (V4BFmode);
6859 emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3]));
6860 operands[4] = GEN_INT (lane - 4);
6861 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4]));
6862 }
6863 DONE;
6864 }
6865 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6866)