]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/arm/neon.md
arm: Auto-vectorization for MVE: vneg
[thirdparty/gcc.git] / gcc / config / arm / neon.md
CommitLineData
88f77cba 1;; ARM NEON coprocessor Machine Description
8d9254fc 2;; Copyright (C) 2006-2020 Free Software Foundation, Inc.
88f77cba
JB
3;; Written by CodeSourcery.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
2f83c7d6 9;; the Free Software Foundation; either version 3, or (at your option)
88f77cba
JB
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
2f83c7d6
NC
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
88f77cba 20
88f77cba 21
c956e102 22;; Attribute used to permit string comparisons against <VQH_mnem> in
003bb7f3 23;; type attribute definitions.
c956e102
MS
24(define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
25
3eefaaa9
BE
26(define_insn "unaligned_storev8qi"
27 [(set (match_operand:V8QI 0 "memory_operand" "=Un")
28 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")]
29 UNSPEC_UNALIGNED_STORE))]
30 "TARGET_NEON"
31 "*
32 return output_move_neon (operands);
33 "
34 [(set_attr "type" "neon_store1_1reg")])
35
88f77cba 36(define_insn "*neon_mov<mode>"
2e87b2f4 37 [(set (match_operand:VDXMOV 0 "nonimmediate_operand"
e009dfb3 38 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r")
2e87b2f4 39 (match_operand:VDXMOV 1 "general_operand"
e009dfb3 40 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
40f73786
DJ
41 "TARGET_NEON
42 && (register_operand (operands[0], <MODE>mode)
43 || register_operand (operands[1], <MODE>mode))"
88f77cba 44{
e009dfb3 45 if (which_alternative == 2 || which_alternative == 3)
88f77cba
JB
46 {
47 int width, is_valid;
48 static char templ[40];
49
63c8f7d6 50 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
88f77cba
JB
51 &operands[1], &width);
52
53 gcc_assert (is_valid != 0);
54
55 if (width == 0)
56 return "vmov.f32\t%P0, %1 @ <mode>";
57 else
00a3a76a 58 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
88f77cba
JB
59
60 return templ;
61 }
62
88f77cba
JB
63 switch (which_alternative)
64 {
65 case 0: return "vmov\t%P0, %P1 @ <mode>";
e009dfb3
MM
66 case 1: case 4: return output_move_neon (operands);
67 case 2: case 3: gcc_unreachable ();
68 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>";
69 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>";
70 case 9: return "#";
3598da80 71 default: return output_move_double (operands, true, NULL);
88f77cba
JB
72 }
73}
f7379e5e 74 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
e009dfb3
MM
75 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
76 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
77 multiple")
78 (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
79 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*")
80 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*")
81 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
88f77cba
JB
82
83(define_insn "*neon_mov<mode>"
84 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
e009dfb3 85 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us")
88f77cba 86 (match_operand:VQXMOV 1 "general_operand"
e009dfb3 87 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
40f73786
DJ
88 "TARGET_NEON
89 && (register_operand (operands[0], <MODE>mode)
90 || register_operand (operands[1], <MODE>mode))"
88f77cba 91{
e009dfb3 92 if (which_alternative == 2 || which_alternative == 3)
88f77cba
JB
93 {
94 int width, is_valid;
95 static char templ[40];
96
63c8f7d6 97 is_valid = simd_immediate_valid_for_move (operands[1], <MODE>mode,
88f77cba
JB
98 &operands[1], &width);
99
100 gcc_assert (is_valid != 0);
101
102 if (width == 0)
103 return "vmov.f32\t%q0, %1 @ <mode>";
104 else
105 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
106
107 return templ;
108 }
109
110 switch (which_alternative)
111 {
112 case 0: return "vmov\t%q0, %q1 @ <mode>";
e009dfb3
MM
113 case 1: case 4: return output_move_neon (operands);
114 case 2: case 3: gcc_unreachable ();
115 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
116 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
88f77cba
JB
117 default: return output_move_quad (operands);
118 }
119}
f7379e5e 120 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
e009dfb3
MM
121 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
122 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
123 (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
124 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
125 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
126 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
88f77cba 127
2a9234e8
TC
128/* We define these mov expanders to match the standard mov$a optab to prevent
129 the mid-end from trying to do a subreg for these modes which is the most
130 inefficient way to expand the move. Also big-endian subreg's aren't
131 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
132 Without these RTL generation patterns the mid-end would attempt to take a
133 sub-reg and may ICE if it can't. */
134
88f77cba 135(define_expand "movti"
cd65e265
DZ
136 [(set (match_operand:TI 0 "nonimmediate_operand")
137 (match_operand:TI 1 "general_operand"))]
88f77cba
JB
138 "TARGET_NEON"
139{
70cdb21e
BE
140 gcc_checking_assert (aligned_operand (operands[0], TImode));
141 gcc_checking_assert (aligned_operand (operands[1], TImode));
40f73786
DJ
142 if (can_create_pseudo_p ())
143 {
d435a4be 144 if (!REG_P (operands[0]))
40f73786
DJ
145 operands[1] = force_reg (TImode, operands[1]);
146 }
88f77cba
JB
147})
148
149(define_expand "mov<mode>"
cd65e265
DZ
150 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
151 (match_operand:VSTRUCT 1 "general_operand"))]
14782c81 152 "TARGET_NEON || TARGET_HAVE_MVE"
88f77cba 153{
70cdb21e
BE
154 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
155 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
40f73786
DJ
156 if (can_create_pseudo_p ())
157 {
d435a4be 158 if (!REG_P (operands[0]))
40f73786
DJ
159 operands[1] = force_reg (<MODE>mode, operands[1]);
160 }
88f77cba
JB
161})
162
63c8f7d6
SP
163;; The pattern mov<mode> where mode is v8hf, v4hf, v4bf and v8bf are split into
164;; two groups. The pattern movv8hf is common for MVE and NEON, so it is moved
165;; into vec-common.md file. Remaining mov expand patterns with half float and
166;; bfloats are implemented below.
2a9234e8 167(define_expand "mov<mode>"
63c8f7d6
SP
168 [(set (match_operand:VHFBF_split 0 "s_register_operand")
169 (match_operand:VHFBF_split 1 "s_register_operand"))]
2a9234e8 170 "TARGET_NEON"
92422235 171{
70cdb21e
BE
172 gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
173 gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
92422235
CL
174 if (can_create_pseudo_p ())
175 {
176 if (!REG_P (operands[0]))
2a9234e8 177 operands[1] = force_reg (<MODE>mode, operands[1]);
92422235
CL
178 }
179})
180
88f77cba
JB
181(define_insn "*neon_mov<mode>"
182 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
183 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
14782c81 184 "(TARGET_NEON || TARGET_HAVE_MVE)
40f73786
DJ
185 && (register_operand (operands[0], <MODE>mode)
186 || register_operand (operands[1], <MODE>mode))"
88f77cba
JB
187{
188 switch (which_alternative)
189 {
190 case 0: return "#";
191 case 1: case 2: return output_move_neon (operands);
192 default: gcc_unreachable ();
193 }
194}
f7379e5e 195 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
7c4f0041 196 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
88f77cba
JB
197
198(define_split
199 [(set (match_operand:EI 0 "s_register_operand" "")
200 (match_operand:EI 1 "s_register_operand" ""))]
201 "TARGET_NEON && reload_completed"
202 [(set (match_dup 0) (match_dup 1))
203 (set (match_dup 2) (match_dup 3))]
204{
205 int rdest = REGNO (operands[0]);
206 int rsrc = REGNO (operands[1]);
207 rtx dest[2], src[2];
208
209 dest[0] = gen_rtx_REG (TImode, rdest);
210 src[0] = gen_rtx_REG (TImode, rsrc);
211 dest[1] = gen_rtx_REG (DImode, rdest + 4);
212 src[1] = gen_rtx_REG (DImode, rsrc + 4);
213
214 neon_disambiguate_copy (operands, dest, src, 2);
215})
216
217(define_split
218 [(set (match_operand:OI 0 "s_register_operand" "")
219 (match_operand:OI 1 "s_register_operand" ""))]
14782c81 220 "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed"
88f77cba
JB
221 [(set (match_dup 0) (match_dup 1))
222 (set (match_dup 2) (match_dup 3))]
223{
224 int rdest = REGNO (operands[0]);
225 int rsrc = REGNO (operands[1]);
226 rtx dest[2], src[2];
227
228 dest[0] = gen_rtx_REG (TImode, rdest);
229 src[0] = gen_rtx_REG (TImode, rsrc);
230 dest[1] = gen_rtx_REG (TImode, rdest + 4);
231 src[1] = gen_rtx_REG (TImode, rsrc + 4);
232
233 neon_disambiguate_copy (operands, dest, src, 2);
234})
235
236(define_split
237 [(set (match_operand:CI 0 "s_register_operand" "")
238 (match_operand:CI 1 "s_register_operand" ""))]
239 "TARGET_NEON && reload_completed"
240 [(set (match_dup 0) (match_dup 1))
241 (set (match_dup 2) (match_dup 3))
242 (set (match_dup 4) (match_dup 5))]
243{
244 int rdest = REGNO (operands[0]);
245 int rsrc = REGNO (operands[1]);
246 rtx dest[3], src[3];
247
248 dest[0] = gen_rtx_REG (TImode, rdest);
249 src[0] = gen_rtx_REG (TImode, rsrc);
250 dest[1] = gen_rtx_REG (TImode, rdest + 4);
251 src[1] = gen_rtx_REG (TImode, rsrc + 4);
252 dest[2] = gen_rtx_REG (TImode, rdest + 8);
253 src[2] = gen_rtx_REG (TImode, rsrc + 8);
254
255 neon_disambiguate_copy (operands, dest, src, 3);
256})
257
258(define_split
259 [(set (match_operand:XI 0 "s_register_operand" "")
260 (match_operand:XI 1 "s_register_operand" ""))]
14782c81 261 "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed"
88f77cba
JB
262 [(set (match_dup 0) (match_dup 1))
263 (set (match_dup 2) (match_dup 3))
264 (set (match_dup 4) (match_dup 5))
265 (set (match_dup 6) (match_dup 7))]
266{
267 int rdest = REGNO (operands[0]);
268 int rsrc = REGNO (operands[1]);
269 rtx dest[4], src[4];
270
271 dest[0] = gen_rtx_REG (TImode, rdest);
272 src[0] = gen_rtx_REG (TImode, rsrc);
273 dest[1] = gen_rtx_REG (TImode, rdest + 4);
274 src[1] = gen_rtx_REG (TImode, rsrc + 4);
275 dest[2] = gen_rtx_REG (TImode, rdest + 8);
276 src[2] = gen_rtx_REG (TImode, rsrc + 8);
277 dest[3] = gen_rtx_REG (TImode, rdest + 12);
278 src[3] = gen_rtx_REG (TImode, rsrc + 12);
279
280 neon_disambiguate_copy (operands, dest, src, 4);
281})
282
c452684d 283(define_expand "movmisalign<mode>"
33255ae3
JB
284 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
285 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
c452684d 286 UNSPEC_MISALIGNED_ACCESS))]
869b9125 287 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 288{
3416dd87 289 rtx adjust_mem;
c452684d
JB
290 /* This pattern is not permitted to fail during expansion: if both arguments
291 are non-registers (e.g. memory := constant, which can be created by the
292 auto-vectorizer), force operand 1 into a register. */
293 if (!s_register_operand (operands[0], <MODE>mode)
294 && !s_register_operand (operands[1], <MODE>mode))
295 operands[1] = force_reg (<MODE>mode, operands[1]);
3416dd87
RR
296
297 if (s_register_operand (operands[0], <MODE>mode))
298 adjust_mem = operands[1];
299 else
300 adjust_mem = operands[0];
301
302 /* Legitimize address. */
303 if (!neon_vector_mem_operand (adjust_mem, 2, true))
304 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
305
c452684d
JB
306})
307
308(define_insn "*movmisalign<mode>_neon_store"
33255ae3 309 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
c452684d
JB
310 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
311 UNSPEC_MISALIGNED_ACCESS))]
869b9125 312 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 313 "vst1.<V_sz_elem>\t{%P1}, %A0"
f7379e5e 314 [(set_attr "type" "neon_store1_1reg<q>")])
c452684d
JB
315
316(define_insn "*movmisalign<mode>_neon_load"
33255ae3
JB
317 [(set (match_operand:VDX 0 "s_register_operand" "=w")
318 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
319 " Um")]
c452684d 320 UNSPEC_MISALIGNED_ACCESS))]
869b9125 321 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 322 "vld1.<V_sz_elem>\t{%P0}, %A1"
f7379e5e 323 [(set_attr "type" "neon_load1_1reg<q>")])
c452684d
JB
324
325(define_insn "*movmisalign<mode>_neon_store"
33255ae3 326 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
c452684d
JB
327 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
328 UNSPEC_MISALIGNED_ACCESS))]
869b9125 329 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 330 "vst1.<V_sz_elem>\t{%q1}, %A0"
f7379e5e 331 [(set_attr "type" "neon_store1_1reg<q>")])
c452684d
JB
332
333(define_insn "*movmisalign<mode>_neon_load"
33255ae3
JB
334 [(set (match_operand:VQX 0 "s_register_operand" "=w")
335 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
336 " Um")]
c452684d 337 UNSPEC_MISALIGNED_ACCESS))]
869b9125 338 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
c452684d 339 "vld1.<V_sz_elem>\t{%q0}, %A1"
b5a26023 340 [(set_attr "type" "neon_load1_1reg<q>")])
c452684d 341
8ba8ebff 342(define_insn "@vec_set<mode>_internal"
92422235
CL
343 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
344 (vec_merge:VD_LANE
345 (vec_duplicate:VD_LANE
058e2674 346 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
92422235 347 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
058e2674 348 (match_operand:SI 2 "immediate_operand" "i,i")))]
88f77cba 349 "TARGET_NEON"
80b8eb11 350{
d19eb620 351 int elt = ffs ((int) INTVAL (operands[2])) - 1;
874d42b9
JM
352 if (BYTES_BIG_ENDIAN)
353 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
354 operands[2] = GEN_INT (elt);
058e2674
UW
355
356 if (which_alternative == 0)
357 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
358 else
359 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
80b8eb11 360}
f7379e5e 361 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
88f77cba 362
8ba8ebff 363(define_insn "@vec_set<mode>_internal"
4b644867
AL
364 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
365 (vec_merge:VQ2
366 (vec_duplicate:VQ2
058e2674 367 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
4b644867 368 (match_operand:VQ2 3 "s_register_operand" "0,0")
058e2674 369 (match_operand:SI 2 "immediate_operand" "i,i")))]
88f77cba
JB
370 "TARGET_NEON"
371{
466e4b7a 372 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
88f77cba 373 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
80b8eb11
JB
374 int elt = elem % half_elts;
375 int hi = (elem / half_elts) * 2;
88f77cba
JB
376 int regno = REGNO (operands[0]);
377
874d42b9
JM
378 if (BYTES_BIG_ENDIAN)
379 elt = half_elts - 1 - elt;
380
88f77cba
JB
381 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
382 operands[2] = GEN_INT (elt);
383
058e2674
UW
384 if (which_alternative == 0)
385 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
386 else
387 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
88f77cba 388}
f7379e5e 389 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
c956e102 390)
88f77cba 391
8ba8ebff
RS
392(define_insn "@vec_set<mode>_internal"
393 [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
394 (vec_merge:V2DI_ONLY
395 (vec_duplicate:V2DI_ONLY
058e2674 396 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
8ba8ebff 397 (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
058e2674 398 (match_operand:SI 2 "immediate_operand" "i,i")))]
88f77cba
JB
399 "TARGET_NEON"
400{
466e4b7a 401 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
80b8eb11 402 int regno = REGNO (operands[0]) + 2 * elem;
88f77cba
JB
403
404 operands[0] = gen_rtx_REG (DImode, regno);
405
058e2674
UW
406 if (which_alternative == 0)
407 return "vld1.64\t%P0, %A1";
408 else
409 return "vmov\t%P0, %Q1, %R1";
88f77cba 410}
f7379e5e 411 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
c956e102 412)
88f77cba 413
ff03930a 414(define_insn "vec_extract<mode><V_elem_l>"
058e2674 415 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
88f77cba 416 (vec_select:<V_elem>
92422235 417 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
058e2674 418 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
88f77cba 419 "TARGET_NEON"
874d42b9
JM
420{
421 if (BYTES_BIG_ENDIAN)
422 {
423 int elt = INTVAL (operands[2]);
424 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
425 operands[2] = GEN_INT (elt);
426 }
058e2674
UW
427
428 if (which_alternative == 0)
429 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
430 else
431 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
874d42b9 432}
f7379e5e 433 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
c956e102 434)
88f77cba 435
1a5c27b1
SP
436;; This pattern is renamed from "vec_extract<mode><V_elem_l>" to
437;; "neon_vec_extract<mode><V_elem_l>" and this pattern is called
438;; by define_expand in vec-common.md file.
439(define_insn "neon_vec_extract<mode><V_elem_l>"
058e2674 440 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
88f77cba 441 (vec_select:<V_elem>
4b644867 442 (match_operand:VQ2 1 "s_register_operand" "w,w")
058e2674 443 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
88f77cba
JB
444 "TARGET_NEON"
445{
446 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
447 int elt = INTVAL (operands[2]) % half_elts;
448 int hi = (INTVAL (operands[2]) / half_elts) * 2;
449 int regno = REGNO (operands[1]);
450
874d42b9
JM
451 if (BYTES_BIG_ENDIAN)
452 elt = half_elts - 1 - elt;
453
88f77cba
JB
454 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
455 operands[2] = GEN_INT (elt);
456
058e2674
UW
457 if (which_alternative == 0)
458 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
459 else
460 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
88f77cba 461}
f7379e5e 462 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
c956e102 463)
88f77cba 464
1a5c27b1
SP
465;; This pattern is renamed from "vec_extractv2didi" to "neon_vec_extractv2didi"
466;; and this pattern is called by define_expand in vec-common.md file.
467(define_insn "neon_vec_extractv2didi"
058e2674 468 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
88f77cba 469 (vec_select:DI
058e2674
UW
470 (match_operand:V2DI 1 "s_register_operand" "w,w")
471 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
88f77cba
JB
472 "TARGET_NEON"
473{
8c98c2a6 474 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
88f77cba
JB
475
476 operands[1] = gen_rtx_REG (DImode, regno);
477
058e2674
UW
478 if (which_alternative == 0)
479 return "vst1.64\t{%P1}, %A0 @ v2di";
480 else
481 return "vmov\t%Q0, %R0, %P1 @ v2di";
88f77cba 482}
f7379e5e 483 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
c956e102 484)
88f77cba 485
ff03930a 486(define_expand "vec_init<mode><V_elem_l>"
cd65e265 487 [(match_operand:VDQ 0 "s_register_operand")
88f77cba 488 (match_operand 1 "" "")]
63c8f7d6 489 "TARGET_NEON || TARGET_HAVE_MVE"
88f77cba
JB
490{
491 neon_expand_vector_init (operands[0], operands[1]);
492 DONE;
493})
494
495;; Doubleword and quadword arithmetic.
496
bab53516
SL
497;; NOTE: some other instructions also support 64-bit integer
498;; element size, which we could potentially use for "long long" operations.
88f77cba
JB
499
500(define_insn "*add<mode>3_neon"
501 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
502 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
503 (match_operand:VDQ 2 "s_register_operand" "w")))]
95e10b8a 504 "ARM_HAVE_NEON_<MODE>_ARITH"
c956e102 505 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 506 [(set (attr "type")
b75b1be2 507 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
508 (const_string "neon_fp_addsub_s<q>")
509 (const_string "neon_add<q>")))]
c956e102 510)
88f77cba
JB
511
512(define_insn "*sub<mode>3_neon"
513 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
514 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
515 (match_operand:VDQ 2 "s_register_operand" "w")))]
bb78e587 516 "ARM_HAVE_NEON_<MODE>_ARITH"
c956e102 517 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 518 [(set (attr "type")
b75b1be2 519 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
520 (const_string "neon_fp_addsub_s<q>")
521 (const_string "neon_sub<q>")))]
c956e102 522)
88f77cba
JB
523
524(define_insn "*mul<mode>3_neon"
f7379e5e
JG
525 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
526 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
527 (match_operand:VDQW 2 "s_register_operand" "w")))]
bb78e587 528 "ARM_HAVE_NEON_<MODE>_ARITH"
c956e102 529 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 530 [(set (attr "type")
b75b1be2 531 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
532 (const_string "neon_fp_mul_s<q>")
533 (const_string "neon_mul_<V_elem_ch><q>")))]
c956e102 534)
88f77cba 535
536ecfc4
PK
536/* Perform division using multiply-by-reciprocal.
537 Reciprocal is calculated using Newton-Raphson method.
538 Enabled with -funsafe-math-optimizations -freciprocal-math
539 and disabled for -Os since it increases code size . */
540
541(define_expand "div<mode>3"
cd65e265
DZ
542 [(set (match_operand:VCVTF 0 "s_register_operand")
543 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
544 (match_operand:VCVTF 2 "s_register_operand")))]
536ecfc4
PK
545 "TARGET_NEON && !optimize_size
546 && flag_reciprocal_math"
547 {
548 rtx rec = gen_reg_rtx (<MODE>mode);
549 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
550
551 /* Reciprocal estimate. */
552 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
553
554 /* Perform 2 iterations of newton-raphson method. */
555 for (int i = 0; i < 2; i++)
556 {
557 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
558 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
559 }
560
561 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
562 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
563 DONE;
564 }
565)
566
567
bab53516 568(define_insn "mul<mode>3add<mode>_neon"
f7379e5e
JG
569 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
570 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
571 (match_operand:VDQW 3 "s_register_operand" "w"))
572 (match_operand:VDQW 1 "s_register_operand" "0")))]
bb78e587 573 "ARM_HAVE_NEON_<MODE>_ARITH"
1ea9fe56 574 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
003bb7f3 575 [(set (attr "type")
b75b1be2 576 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
577 (const_string "neon_fp_mla_s<q>")
578 (const_string "neon_mla_<V_elem_ch><q>")))]
1ea9fe56
MM
579)
580
55a9b91b
MW
581(define_insn "mul<mode>3add<mode>_neon"
582 [(set (match_operand:VH 0 "s_register_operand" "=w")
583 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
584 (match_operand:VH 3 "s_register_operand" "w"))
585 (match_operand:VH 1 "s_register_operand" "0")))]
bb78e587 586 "ARM_HAVE_NEON_<MODE>_ARITH"
55a9b91b
MW
587 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
588 [(set_attr "type" "neon_fp_mla_s<q>")]
589)
590
bab53516 591(define_insn "mul<mode>3neg<mode>add<mode>_neon"
f7379e5e
JG
592 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
593 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
594 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
595 (match_operand:VDQW 3 "s_register_operand" "w"))))]
bb78e587 596 "ARM_HAVE_NEON_<MODE>_ARITH"
1ea9fe56 597 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
003bb7f3 598 [(set (attr "type")
b75b1be2 599 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
600 (const_string "neon_fp_mla_s<q>")
601 (const_string "neon_mla_<V_elem_ch><q>")))]
1ea9fe56
MM
602)
603
8b2ab9cb 604;; Fused multiply-accumulate
c4216388
MGD
605;; We define each insn twice here:
606;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
607;; to be able to use when converting to FMA.
608;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
8b2ab9cb
RR
609(define_insn "fma<VCVTF:mode>4"
610 [(set (match_operand:VCVTF 0 "register_operand" "=w")
611 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
612 (match_operand:VCVTF 2 "register_operand" "w")
613 (match_operand:VCVTF 3 "register_operand" "0")))]
bb78e587 614 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
e60226ff 615 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 616 [(set_attr "type" "neon_fp_mla_s<q>")]
8b2ab9cb
RR
617)
618
c4216388
MGD
619(define_insn "fma<VCVTF:mode>4_intrinsic"
620 [(set (match_operand:VCVTF 0 "register_operand" "=w")
621 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
622 (match_operand:VCVTF 2 "register_operand" "w")
623 (match_operand:VCVTF 3 "register_operand" "0")))]
624 "TARGET_NEON && TARGET_FMA"
e60226ff 625 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 626 [(set_attr "type" "neon_fp_mla_s<q>")]
c4216388
MGD
627)
628
6da37857
MW
629(define_insn "fma<VH:mode>4"
630 [(set (match_operand:VH 0 "register_operand" "=w")
631 (fma:VH
632 (match_operand:VH 1 "register_operand" "w")
633 (match_operand:VH 2 "register_operand" "w")
634 (match_operand:VH 3 "register_operand" "0")))]
bb78e587 635 "ARM_HAVE_NEON_<MODE>_ARITH"
55a9b91b
MW
636 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
637 [(set_attr "type" "neon_fp_mla_s<q>")]
638)
639
8b2ab9cb
RR
640(define_insn "*fmsub<VCVTF:mode>4"
641 [(set (match_operand:VCVTF 0 "register_operand" "=w")
642 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
643 (match_operand:VCVTF 2 "register_operand" "w")
644 (match_operand:VCVTF 3 "register_operand" "0")))]
bb78e587 645 "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
e60226ff 646 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 647 [(set_attr "type" "neon_fp_mla_s<q>")]
8b2ab9cb
RR
648)
649
c4216388 650(define_insn "fmsub<VCVTF:mode>4_intrinsic"
55a9b91b
MW
651 [(set (match_operand:VCVTF 0 "register_operand" "=w")
652 (fma:VCVTF
653 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
654 (match_operand:VCVTF 2 "register_operand" "w")
655 (match_operand:VCVTF 3 "register_operand" "0")))]
656 "TARGET_NEON && TARGET_FMA"
e60226ff 657 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
55a9b91b
MW
658 [(set_attr "type" "neon_fp_mla_s<q>")]
659)
660
661(define_insn "fmsub<VH:mode>4_intrinsic"
662 [(set (match_operand:VH 0 "register_operand" "=w")
663 (fma:VH
664 (neg:VH (match_operand:VH 1 "register_operand" "w"))
665 (match_operand:VH 2 "register_operand" "w")
666 (match_operand:VH 3 "register_operand" "0")))]
667 "TARGET_NEON_FP16INST"
668 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
669 [(set_attr "type" "neon_fp_mla_s<q>")]
c4216388
MGD
670)
671
79739965
KT
672(define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
673 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
674 (unspec:VCVTF [(match_operand:VCVTF 1
675 "s_register_operand" "w")]
676 NEON_VRINT))]
c8d61ab8 677 "TARGET_NEON && TARGET_VFP5"
e60226ff 678 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
f7379e5e 679 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
79739965
KT
680)
681
e9e67af1
KT
682(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
683 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
684 (FIXUORS:<V_cmp_result> (unspec:VCVTF
685 [(match_operand:VCVTF 1 "register_operand" "w")]
686 NEON_VCVT)))]
c8d61ab8 687 "TARGET_NEON && TARGET_VFP5"
e9e67af1
KT
688 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
689 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
690 (set_attr "predicable" "no")]
691)
692
75de6a28 693(define_insn "ior<mode>3_neon"
88f77cba
JB
694 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
695 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
696 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
697 "TARGET_NEON"
698{
699 switch (which_alternative)
700 {
701 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
702 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
703 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
704 default: gcc_unreachable ();
705 }
c956e102 706}
f7379e5e 707 [(set_attr "type" "neon_logic<q>")]
c956e102 708)
88f77cba 709
88f77cba
JB
710;; The concrete forms of the Neon immediate-logic instructions are vbic and
711;; vorr. We support the pseudo-instruction vand instead, because that
712;; corresponds to the canonical form the middle-end expects to use for
713;; immediate bitwise-ANDs.
714
11a0beff 715(define_insn "and<mode>3_neon"
88f77cba
JB
716 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
717 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
718 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
719 "TARGET_NEON"
720{
721 switch (which_alternative)
722 {
723 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
724 case 1: return neon_output_logic_immediate ("vand", &operands[2],
725 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
726 default: gcc_unreachable ();
727 }
c956e102 728}
f7379e5e 729 [(set_attr "type" "neon_logic<q>")]
c956e102 730)
88f77cba 731
88f77cba
JB
732(define_insn "orn<mode>3_neon"
733 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
50fed7bf
RR
734 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
735 (match_operand:VDQ 1 "s_register_operand" "w")))]
88f77cba 736 "TARGET_NEON"
c956e102 737 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 738 [(set_attr "type" "neon_logic<q>")]
c956e102 739)
88f77cba 740
88f77cba
JB
741(define_insn "bic<mode>3_neon"
742 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
50fed7bf
RR
743 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
744 (match_operand:VDQ 1 "s_register_operand" "w")))]
88f77cba 745 "TARGET_NEON"
c956e102 746 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 747 [(set_attr "type" "neon_logic<q>")]
c956e102 748)
88f77cba 749
434fb3b6 750(define_insn "xor<mode>3_neon"
88f77cba
JB
751 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
752 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
753 (match_operand:VDQ 2 "s_register_operand" "w")))]
754 "TARGET_NEON"
c956e102 755 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 756 [(set_attr "type" "neon_logic<q>")]
c956e102 757)
88f77cba 758
fd436034 759(define_insn "one_cmpl<mode>2_neon"
88f77cba
JB
760 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
761 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
762 "TARGET_NEON"
c956e102 763 "vmvn\t%<V_reg>0, %<V_reg>1"
f7379e5e 764 [(set_attr "type" "neon_move<q>")]
c956e102 765)
88f77cba
JB
766
767(define_insn "abs<mode>2"
768 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
769 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
770 "TARGET_NEON"
c956e102 771 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
003bb7f3 772 [(set (attr "type")
b75b1be2 773 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
774 (const_string "neon_fp_abs_s<q>")
775 (const_string "neon_abs<q>")))]
c956e102 776)
88f77cba 777
4cbb7cab 778(define_insn "neon_neg<mode>2"
88f77cba
JB
779 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
780 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
781 "TARGET_NEON"
c956e102 782 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
003bb7f3 783 [(set (attr "type")
b75b1be2 784 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
785 (const_string "neon_fp_neg_s<q>")
786 (const_string "neon_neg<q>")))]
c956e102 787)
88f77cba 788
4cbb7cab 789(define_insn "neon_<absneg_str><mode>2"
55a9b91b
MW
790 [(set (match_operand:VH 0 "s_register_operand" "=w")
791 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
792 "TARGET_NEON_FP16INST"
793 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
794 [(set_attr "type" "neon_abs<q>")]
795)
796
797(define_expand "neon_v<absneg_str><mode>"
798 [(set
799 (match_operand:VH 0 "s_register_operand")
800 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
801 "TARGET_NEON_FP16INST"
802{
4cbb7cab 803 emit_insn (gen_neon_<absneg_str><mode>2 (operands[0], operands[1]));
55a9b91b
MW
804 DONE;
805})
806
807(define_insn "neon_v<fp16_rnd_str><mode>"
808 [(set (match_operand:VH 0 "s_register_operand" "=w")
809 (unspec:VH
810 [(match_operand:VH 1 "s_register_operand" "w")]
811 FP16_RND))]
812 "TARGET_NEON_FP16INST"
813 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
814 [(set_attr "type" "neon_fp_round_s<q>")]
815)
816
817(define_insn "neon_vrsqrte<mode>"
818 [(set (match_operand:VH 0 "s_register_operand" "=w")
819 (unspec:VH
820 [(match_operand:VH 1 "s_register_operand" "w")]
821 UNSPEC_VRSQRTE))]
822 "TARGET_NEON_FP16INST"
823 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
824 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
825)
826
88f77cba
JB
827(define_insn "*umin<mode>3_neon"
828 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
829 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
830 (match_operand:VDQIW 2 "s_register_operand" "w")))]
831 "TARGET_NEON"
c956e102 832 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 833 [(set_attr "type" "neon_minmax<q>")]
c956e102 834)
88f77cba
JB
835
836(define_insn "*umax<mode>3_neon"
837 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
838 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
839 (match_operand:VDQIW 2 "s_register_operand" "w")))]
840 "TARGET_NEON"
c956e102 841 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 842 [(set_attr "type" "neon_minmax<q>")]
c956e102 843)
88f77cba
JB
844
845(define_insn "*smin<mode>3_neon"
846 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
847 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
848 (match_operand:VDQW 2 "s_register_operand" "w")))]
849 "TARGET_NEON"
c956e102 850 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 851 [(set (attr "type")
b75b1be2 852 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
853 (const_string "neon_fp_minmax_s<q>")
854 (const_string "neon_minmax<q>")))]
c956e102 855)
88f77cba
JB
856
857(define_insn "*smax<mode>3_neon"
858 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
859 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
860 (match_operand:VDQW 2 "s_register_operand" "w")))]
861 "TARGET_NEON"
c956e102 862 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 863 [(set (attr "type")
b75b1be2 864 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
865 (const_string "neon_fp_minmax_s<q>")
866 (const_string "neon_minmax<q>")))]
c956e102 867)
88f77cba
JB
868
869; TODO: V2DI shifts are current disabled because there are bugs in the
870; generic vectorizer code. It ends up creating a V2DI constructor with
871; SImode elements.
872
d44463a9 873(define_insn "vashl<mode>3"
31a0c825
DP
874 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
875 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
e009dfb3 876 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
31a0c825
DP
877 "TARGET_NEON"
878 {
879 switch (which_alternative)
880 {
881 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
882 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
883 <MODE>mode,
884 VALID_NEON_QREG_MODE (<MODE>mode),
885 true);
886 default: gcc_unreachable ();
887 }
888 }
f7379e5e 889 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
31a0c825
DP
890)
891
892(define_insn "vashr<mode>3_imm"
88f77cba 893 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
31a0c825 894 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
e009dfb3 895 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
88f77cba 896 "TARGET_NEON"
31a0c825
DP
897 {
898 return neon_output_shift_immediate ("vshr", 's', &operands[2],
899 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
900 false);
901 }
f7379e5e 902 [(set_attr "type" "neon_shift_imm<q>")]
c956e102 903)
88f77cba 904
31a0c825
DP
905(define_insn "vlshr<mode>3_imm"
906 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
907 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
e009dfb3 908 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
31a0c825
DP
909 "TARGET_NEON"
910 {
911 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
912 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
913 false);
914 }
f7379e5e 915 [(set_attr "type" "neon_shift_imm<q>")]
31a0c825
DP
916)
917
88f77cba
JB
918; Used for implementing logical shift-right, which is a left-shift by a negative
919; amount, with signed operands. This is essentially the same as ashl<mode>3
920; above, but using an unspec in case GCC tries anything tricky with negative
921; shift amounts.
922
923(define_insn "ashl<mode>3_signed"
924 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
925 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
926 (match_operand:VDQI 2 "s_register_operand" "w")]
927 UNSPEC_ASHIFT_SIGNED))]
928 "TARGET_NEON"
c956e102 929 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 930 [(set_attr "type" "neon_shift_reg<q>")]
c956e102 931)
88f77cba
JB
932
933; Used for implementing logical shift-right, which is a left-shift by a negative
934; amount, with unsigned operands.
935
936(define_insn "ashl<mode>3_unsigned"
937 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
938 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
939 (match_operand:VDQI 2 "s_register_operand" "w")]
940 UNSPEC_ASHIFT_UNSIGNED))]
941 "TARGET_NEON"
c956e102 942 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 943 [(set_attr "type" "neon_shift_reg<q>")]
c956e102 944)
88f77cba 945
d44463a9 946(define_expand "vashr<mode>3"
cd65e265
DZ
947 [(set (match_operand:VDQIW 0 "s_register_operand")
948 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
949 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
88f77cba
JB
950 "TARGET_NEON"
951{
56b15099 952 if (s_register_operand (operands[2], <MODE>mode))
31a0c825 953 {
56b15099 954 rtx neg = gen_reg_rtx (<MODE>mode);
4cbb7cab 955 emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
31a0c825
DP
956 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
957 }
958 else
959 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
88f77cba
JB
960 DONE;
961})
962
d44463a9 963(define_expand "vlshr<mode>3"
cd65e265
DZ
964 [(set (match_operand:VDQIW 0 "s_register_operand")
965 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
966 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
88f77cba
JB
967 "TARGET_NEON"
968{
56b15099 969 if (s_register_operand (operands[2], <MODE>mode))
31a0c825 970 {
56b15099 971 rtx neg = gen_reg_rtx (<MODE>mode);
4cbb7cab 972 emit_insn (gen_neon_neg<mode>2 (neg, operands[2]));
31a0c825
DP
973 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
974 }
975 else
976 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
88f77cba
JB
977 DONE;
978})
979
3f2dc806
AS
980;; 64-bit shifts
981
982;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
983;; leaving the upper half uninitalized. This is OK since the shift
984;; instruction only looks at the low 8 bits anyway. To avoid confusing
985;; data flow analysis however, we pretend the full register is set
986;; using an unspec.
987(define_insn "neon_load_count"
988 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
989 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
990 UNSPEC_LOAD_COUNT))]
991 "TARGET_NEON"
992 "@
993 vld1.32\t{%P0[0]}, %A1
994 vmov.32\t%P0[0], %1"
f7379e5e 995 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
3f2dc806
AS
996)
997
88f77cba
JB
998;; Widening operations
999
93c590ee 1000(define_expand "widen_ssum<mode>3"
cd65e265 1001 [(set (match_operand:<V_double_width> 0 "s_register_operand")
93c590ee
MC
1002 (plus:<V_double_width>
1003 (sign_extend:<V_double_width>
cd65e265
DZ
1004 (match_operand:VQI 1 "s_register_operand"))
1005 (match_operand:<V_double_width> 2 "s_register_operand")))]
93c590ee
MC
1006 "TARGET_NEON"
1007 {
1008 machine_mode mode = GET_MODE (operands[1]);
1009 rtx p1, p2;
1010
1011 p1 = arm_simd_vect_par_cnst_half (mode, false);
1012 p2 = arm_simd_vect_par_cnst_half (mode, true);
1013
1014 if (operands[0] != operands[2])
1015 emit_move_insn (operands[0], operands[2]);
1016
1017 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1018 operands[1],
1019 p1,
1020 operands[0]));
1021 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1022 operands[1],
1023 p2,
1024 operands[0]));
1025 DONE;
1026 }
1027)
1028
b8c36603
KT
1029(define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1030 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1031 (plus:<V_double_width>
1032 (sign_extend:<V_double_width>
1033 (vec_select:<V_HALF>
93c590ee
MC
1034 (match_operand:VQI 1 "s_register_operand" "%w")
1035 (match_operand:VQI 2 "vect_par_constant_low" "")))
b8c36603 1036 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
93c590ee
MC
1037 "TARGET_NEON"
1038{
1039 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1040 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1041}
1042 [(set_attr "type" "neon_add_widen")])
1043
b8c36603
KT
1044(define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1045 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1046 (plus:<V_double_width>
1047 (sign_extend:<V_double_width>
1048 (vec_select:<V_HALF>
1049 (match_operand:VQI 1 "s_register_operand" "%w")
93c590ee 1050 (match_operand:VQI 2 "vect_par_constant_high" "")))
b8c36603 1051 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
93c590ee
MC
1052 "TARGET_NEON"
1053{
1054 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1055 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1056}
1057 [(set_attr "type" "neon_add_widen")])
1058
88f77cba
JB
1059(define_insn "widen_ssum<mode>3"
1060 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
93c590ee
MC
1061 (plus:<V_widen>
1062 (sign_extend:<V_widen>
1063 (match_operand:VW 1 "s_register_operand" "%w"))
1064 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
88f77cba 1065 "TARGET_NEON"
c956e102 1066 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
f7379e5e 1067 [(set_attr "type" "neon_add_widen")]
c956e102 1068)
88f77cba 1069
93c590ee 1070(define_expand "widen_usum<mode>3"
cd65e265 1071 [(set (match_operand:<V_double_width> 0 "s_register_operand")
93c590ee
MC
1072 (plus:<V_double_width>
1073 (zero_extend:<V_double_width>
cd65e265
DZ
1074 (match_operand:VQI 1 "s_register_operand"))
1075 (match_operand:<V_double_width> 2 "s_register_operand")))]
93c590ee
MC
1076 "TARGET_NEON"
1077 {
1078 machine_mode mode = GET_MODE (operands[1]);
1079 rtx p1, p2;
1080
1081 p1 = arm_simd_vect_par_cnst_half (mode, false);
1082 p2 = arm_simd_vect_par_cnst_half (mode, true);
1083
1084 if (operands[0] != operands[2])
1085 emit_move_insn (operands[0], operands[2]);
1086
1087 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1088 operands[1],
1089 p1,
1090 operands[0]));
1091 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1092 operands[1],
1093 p2,
1094 operands[0]));
1095 DONE;
1096 }
1097)
1098
b8c36603
KT
1099(define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1100 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1101 (plus:<V_double_width>
1102 (zero_extend:<V_double_width>
1103 (vec_select:<V_HALF>
93c590ee
MC
1104 (match_operand:VQI 1 "s_register_operand" "%w")
1105 (match_operand:VQI 2 "vect_par_constant_low" "")))
b8c36603 1106 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
93c590ee
MC
1107 "TARGET_NEON"
1108{
1109 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1110 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1111}
1112 [(set_attr "type" "neon_add_widen")])
1113
b8c36603
KT
1114(define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1115 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1116 (plus:<V_double_width>
1117 (zero_extend:<V_double_width>
1118 (vec_select:<V_HALF>
1119 (match_operand:VQI 1 "s_register_operand" "%w")
93c590ee 1120 (match_operand:VQI 2 "vect_par_constant_high" "")))
b8c36603 1121 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
93c590ee
MC
1122 "TARGET_NEON"
1123{
1124 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1125 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1126}
1127 [(set_attr "type" "neon_add_widen")])
1128
88f77cba
JB
1129(define_insn "widen_usum<mode>3"
1130 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1131 (plus:<V_widen> (zero_extend:<V_widen>
1132 (match_operand:VW 1 "s_register_operand" "%w"))
1133 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1134 "TARGET_NEON"
c956e102 1135 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
f7379e5e 1136 [(set_attr "type" "neon_add_widen")]
c956e102 1137)
88f77cba 1138
88f77cba
JB
1139;; Helpers for quad-word reduction operations
1140
1141; Add (or smin, smax...) the low N/2 elements of the N-element vector
1142; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1143; N/2-element vector.
1144
1145(define_insn "quad_halves_<code>v4si"
1146 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
728dc153 1147 (VQH_OPS:V2SI
88f77cba
JB
1148 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1149 (parallel [(const_int 0) (const_int 1)]))
1150 (vec_select:V2SI (match_dup 1)
1151 (parallel [(const_int 2) (const_int 3)]))))]
1152 "TARGET_NEON"
c956e102
MS
1153 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1154 [(set_attr "vqh_mnem" "<VQH_mnem>")
f7379e5e 1155 (set_attr "type" "neon_reduc_<VQH_type>_q")]
c956e102 1156)
88f77cba
JB
1157
1158(define_insn "quad_halves_<code>v4sf"
1159 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
728dc153 1160 (VQHS_OPS:V2SF
88f77cba
JB
1161 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1162 (parallel [(const_int 0) (const_int 1)]))
1163 (vec_select:V2SF (match_dup 1)
1164 (parallel [(const_int 2) (const_int 3)]))))]
bb78e587 1165 "ARM_HAVE_NEON_V4SF_ARITH"
c956e102
MS
1166 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1167 [(set_attr "vqh_mnem" "<VQH_mnem>")
f7379e5e 1168 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
c956e102 1169)
88f77cba
JB
1170
1171(define_insn "quad_halves_<code>v8hi"
1172 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
728dc153 1173 (VQH_OPS:V4HI
88f77cba
JB
1174 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1175 (parallel [(const_int 0) (const_int 1)
1176 (const_int 2) (const_int 3)]))
1177 (vec_select:V4HI (match_dup 1)
1178 (parallel [(const_int 4) (const_int 5)
1179 (const_int 6) (const_int 7)]))))]
1180 "TARGET_NEON"
c956e102
MS
1181 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1182 [(set_attr "vqh_mnem" "<VQH_mnem>")
f7379e5e 1183 (set_attr "type" "neon_reduc_<VQH_type>_q")]
c956e102 1184)
88f77cba
JB
1185
1186(define_insn "quad_halves_<code>v16qi"
1187 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
728dc153 1188 (VQH_OPS:V8QI
88f77cba
JB
1189 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1190 (parallel [(const_int 0) (const_int 1)
1191 (const_int 2) (const_int 3)
1192 (const_int 4) (const_int 5)
1193 (const_int 6) (const_int 7)]))
1194 (vec_select:V8QI (match_dup 1)
1195 (parallel [(const_int 8) (const_int 9)
1196 (const_int 10) (const_int 11)
1197 (const_int 12) (const_int 13)
1198 (const_int 14) (const_int 15)]))))]
1199 "TARGET_NEON"
c956e102
MS
1200 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1201 [(set_attr "vqh_mnem" "<VQH_mnem>")
f7379e5e 1202 (set_attr "type" "neon_reduc_<VQH_type>_q")]
c956e102 1203)
88f77cba 1204
0f38f229 1205(define_expand "move_hi_quad_<mode>"
cd65e265
DZ
1206 [(match_operand:ANY128 0 "s_register_operand")
1207 (match_operand:<V_HALF> 1 "s_register_operand")]
0f38f229 1208 "TARGET_NEON"
88f77cba 1209{
d92aed06
RS
1210 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1211 GET_MODE_SIZE (<V_HALF>mode)),
1212 operands[1]);
0f38f229
TB
1213 DONE;
1214})
1215
1216(define_expand "move_lo_quad_<mode>"
cd65e265
DZ
1217 [(match_operand:ANY128 0 "s_register_operand")
1218 (match_operand:<V_HALF> 1 "s_register_operand")]
0f38f229 1219 "TARGET_NEON"
88f77cba 1220{
d92aed06
RS
1221 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1222 <MODE>mode, 0),
1223 operands[1]);
0f38f229
TB
1224 DONE;
1225})
88f77cba
JB
1226
1227;; Reduction operations
1228
89edc986 1229(define_expand "reduc_plus_scal_<mode>"
cd65e265
DZ
1230 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1231 (match_operand:VD 1 "s_register_operand")]
bb78e587 1232 "ARM_HAVE_NEON_<MODE>_ARITH"
88f77cba 1233{
89edc986
AL
1234 rtx vec = gen_reg_rtx (<MODE>mode);
1235 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1236 &gen_neon_vpadd_internal<mode>);
89edc986 1237 /* The same result is actually computed into every element. */
ff03930a 1238 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1239 DONE;
1240})
1241
89edc986 1242(define_expand "reduc_plus_scal_<mode>"
cd65e265
DZ
1243 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1244 (match_operand:VQ 1 "s_register_operand")]
bb78e587 1245 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
88f77cba
JB
1246{
1247 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1248
1249 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
89edc986 1250 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
88f77cba
JB
1251
1252 DONE;
1253})
1254
89edc986 1255(define_expand "reduc_plus_scal_v2di"
cd65e265
DZ
1256 [(match_operand:DI 0 "nonimmediate_operand")
1257 (match_operand:V2DI 1 "s_register_operand")]
89edc986
AL
1258 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1259{
1260 rtx vec = gen_reg_rtx (V2DImode);
1261
1262 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
ff03930a 1263 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
89edc986
AL
1264
1265 DONE;
1266})
1267
1268(define_insn "arm_reduc_plus_internal_v2di"
88f77cba
JB
1269 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1270 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1271 UNSPEC_VPADD))]
0094f21b 1272 "TARGET_NEON && !BYTES_BIG_ENDIAN"
c956e102 1273 "vadd.i64\t%e0, %e1, %f1"
f7379e5e 1274 [(set_attr "type" "neon_add_q")]
c956e102 1275)
88f77cba 1276
f5dcbee1 1277(define_expand "reduc_smin_scal_<mode>"
cd65e265
DZ
1278 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1279 (match_operand:VD 1 "s_register_operand")]
bb78e587 1280 "ARM_HAVE_NEON_<MODE>_ARITH"
88f77cba 1281{
f5dcbee1
AL
1282 rtx vec = gen_reg_rtx (<MODE>mode);
1283
1284 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1285 &gen_neon_vpsmin<mode>);
f5dcbee1 1286 /* The result is computed into every element of the vector. */
ff03930a 1287 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1288 DONE;
1289})
1290
f5dcbee1 1291(define_expand "reduc_smin_scal_<mode>"
cd65e265
DZ
1292 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1293 (match_operand:VQ 1 "s_register_operand")]
bb78e587 1294 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
88f77cba
JB
1295{
1296 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1297
1298 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
f5dcbee1 1299 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
88f77cba
JB
1300
1301 DONE;
1302})
1303
f5dcbee1 1304(define_expand "reduc_smax_scal_<mode>"
cd65e265
DZ
1305 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1306 (match_operand:VD 1 "s_register_operand")]
bb78e587 1307 "ARM_HAVE_NEON_<MODE>_ARITH"
88f77cba 1308{
f5dcbee1
AL
1309 rtx vec = gen_reg_rtx (<MODE>mode);
1310 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1311 &gen_neon_vpsmax<mode>);
f5dcbee1 1312 /* The result is computed into every element of the vector. */
ff03930a 1313 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1314 DONE;
1315})
1316
f5dcbee1 1317(define_expand "reduc_smax_scal_<mode>"
cd65e265
DZ
1318 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1319 (match_operand:VQ 1 "s_register_operand")]
bb78e587 1320 "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
88f77cba
JB
1321{
1322 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1323
1324 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
f5dcbee1 1325 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
88f77cba
JB
1326
1327 DONE;
1328})
1329
f5dcbee1 1330(define_expand "reduc_umin_scal_<mode>"
cd65e265
DZ
1331 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1332 (match_operand:VDI 1 "s_register_operand")]
88f77cba
JB
1333 "TARGET_NEON"
1334{
f5dcbee1
AL
1335 rtx vec = gen_reg_rtx (<MODE>mode);
1336 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1337 &gen_neon_vpumin<mode>);
f5dcbee1 1338 /* The result is computed into every element of the vector. */
ff03930a 1339 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1340 DONE;
1341})
1342
f5dcbee1 1343(define_expand "reduc_umin_scal_<mode>"
cd65e265
DZ
1344 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1345 (match_operand:VQI 1 "s_register_operand")]
0094f21b 1346 "TARGET_NEON && !BYTES_BIG_ENDIAN"
88f77cba
JB
1347{
1348 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1349
1350 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
f5dcbee1 1351 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
88f77cba
JB
1352
1353 DONE;
1354})
1355
f5dcbee1 1356(define_expand "reduc_umax_scal_<mode>"
cd65e265
DZ
1357 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1358 (match_operand:VDI 1 "s_register_operand")]
88f77cba
JB
1359 "TARGET_NEON"
1360{
f5dcbee1
AL
1361 rtx vec = gen_reg_rtx (<MODE>mode);
1362 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
88f77cba 1363 &gen_neon_vpumax<mode>);
f5dcbee1 1364 /* The result is computed into every element of the vector. */
ff03930a 1365 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
88f77cba
JB
1366 DONE;
1367})
1368
f5dcbee1 1369(define_expand "reduc_umax_scal_<mode>"
cd65e265
DZ
1370 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1371 (match_operand:VQI 1 "s_register_operand")]
0094f21b 1372 "TARGET_NEON && !BYTES_BIG_ENDIAN"
88f77cba
JB
1373{
1374 rtx step1 = gen_reg_rtx (<V_HALF>mode);
88f77cba
JB
1375
1376 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
f5dcbee1 1377 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
88f77cba
JB
1378
1379 DONE;
1380})
1381
1382(define_insn "neon_vpadd_internal<mode>"
1383 [(set (match_operand:VD 0 "s_register_operand" "=w")
1384 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1385 (match_operand:VD 2 "s_register_operand" "w")]
1386 UNSPEC_VPADD))]
1387 "TARGET_NEON"
c956e102
MS
1388 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1389 ;; Assume this schedules like vadd.
003bb7f3 1390 [(set (attr "type")
b75b1be2 1391 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1392 (const_string "neon_fp_reduc_add_s<q>")
1393 (const_string "neon_reduc_add<q>")))]
c956e102 1394)
88f77cba 1395
55a9b91b
MW
1396(define_insn "neon_vpaddv4hf"
1397 [(set
1398 (match_operand:V4HF 0 "s_register_operand" "=w")
1399 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1400 (match_operand:V4HF 2 "s_register_operand" "w")]
1401 UNSPEC_VPADD))]
1402 "TARGET_NEON_FP16INST"
1403 "vpadd.f16\t%P0, %P1, %P2"
1404 [(set_attr "type" "neon_reduc_add")]
1405)
1406
88f77cba
JB
1407(define_insn "neon_vpsmin<mode>"
1408 [(set (match_operand:VD 0 "s_register_operand" "=w")
1409 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1410 (match_operand:VD 2 "s_register_operand" "w")]
1411 UNSPEC_VPSMIN))]
1412 "TARGET_NEON"
c956e102 1413 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
003bb7f3 1414 [(set (attr "type")
b75b1be2 1415 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1416 (const_string "neon_fp_reduc_minmax_s<q>")
1417 (const_string "neon_reduc_minmax<q>")))]
c956e102 1418)
88f77cba
JB
1419
1420(define_insn "neon_vpsmax<mode>"
1421 [(set (match_operand:VD 0 "s_register_operand" "=w")
1422 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1423 (match_operand:VD 2 "s_register_operand" "w")]
1424 UNSPEC_VPSMAX))]
1425 "TARGET_NEON"
c956e102 1426 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
003bb7f3 1427 [(set (attr "type")
b75b1be2 1428 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1429 (const_string "neon_fp_reduc_minmax_s<q>")
1430 (const_string "neon_reduc_minmax<q>")))]
c956e102 1431)
88f77cba
JB
1432
1433(define_insn "neon_vpumin<mode>"
1434 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1435 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1436 (match_operand:VDI 2 "s_register_operand" "w")]
1437 UNSPEC_VPUMIN))]
1438 "TARGET_NEON"
c956e102 1439 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
f7379e5e 1440 [(set_attr "type" "neon_reduc_minmax<q>")]
c956e102 1441)
88f77cba
JB
1442
1443(define_insn "neon_vpumax<mode>"
1444 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1445 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1446 (match_operand:VDI 2 "s_register_operand" "w")]
1447 UNSPEC_VPUMAX))]
1448 "TARGET_NEON"
c956e102 1449 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
f7379e5e 1450 [(set_attr "type" "neon_reduc_minmax<q>")]
c956e102 1451)
88f77cba
JB
1452
1453;; Saturating arithmetic
1454
1455; NOTE: Neon supports many more saturating variants of instructions than the
1456; following, but these are all GCC currently understands.
1457; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1458; yet either, although these patterns may be used by intrinsics when they're
1459; added.
1460
1461(define_insn "*ss_add<mode>_neon"
1462 [(set (match_operand:VD 0 "s_register_operand" "=w")
1463 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1464 (match_operand:VD 2 "s_register_operand" "w")))]
1465 "TARGET_NEON"
c956e102 1466 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
f7379e5e 1467 [(set_attr "type" "neon_qadd<q>")]
c956e102 1468)
88f77cba
JB
1469
1470(define_insn "*us_add<mode>_neon"
1471 [(set (match_operand:VD 0 "s_register_operand" "=w")
1472 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1473 (match_operand:VD 2 "s_register_operand" "w")))]
1474 "TARGET_NEON"
c956e102 1475 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
f7379e5e 1476 [(set_attr "type" "neon_qadd<q>")]
c956e102 1477)
88f77cba
JB
1478
1479(define_insn "*ss_sub<mode>_neon"
1480 [(set (match_operand:VD 0 "s_register_operand" "=w")
1481 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1482 (match_operand:VD 2 "s_register_operand" "w")))]
1483 "TARGET_NEON"
c956e102 1484 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
f7379e5e 1485 [(set_attr "type" "neon_qsub<q>")]
c956e102 1486)
88f77cba
JB
1487
1488(define_insn "*us_sub<mode>_neon"
1489 [(set (match_operand:VD 0 "s_register_operand" "=w")
1490 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1491 (match_operand:VD 2 "s_register_operand" "w")))]
1492 "TARGET_NEON"
c956e102 1493 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
f7379e5e 1494 [(set_attr "type" "neon_qsub<q>")]
c956e102 1495)
88f77cba 1496
c2978b34
RS
1497(define_expand "vec_cmp<mode><v_cmp_result>"
1498 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
1499 (match_operator:<V_cmp_result> 1 "comparison_operator"
1500 [(match_operand:VDQW 2 "s_register_operand")
1501 (match_operand:VDQW 3 "reg_or_zero_operand")]))]
1502 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1503{
1504 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1505 operands[2], operands[3], false);
1506 DONE;
1507})
1508
1509(define_expand "vec_cmpu<mode><mode>"
1510 [(set (match_operand:VDQIW 0 "s_register_operand")
1511 (match_operator:VDQIW 1 "comparison_operator"
1512 [(match_operand:VDQIW 2 "s_register_operand")
1513 (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
1514 "TARGET_NEON"
1515{
1516 arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
1517 operands[2], operands[3], false);
1518 DONE;
1519})
1520
5bfc5baa
JB
1521;; Conditional instructions. These are comparisons with conditional moves for
1522;; vectors. They perform the assignment:
1523;;
1524;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1525;;
1526;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1527;; element-wise.
1528
e9e1d143 1529(define_expand "vcond<mode><mode>"
cd65e265 1530 [(set (match_operand:VDQW 0 "s_register_operand")
5bfc5baa 1531 (if_then_else:VDQW
f35c297f 1532 (match_operator 3 "comparison_operator"
cd65e265 1533 [(match_operand:VDQW 4 "s_register_operand")
c2978b34 1534 (match_operand:VDQW 5 "reg_or_zero_operand")])
cd65e265
DZ
1535 (match_operand:VDQW 1 "s_register_operand")
1536 (match_operand:VDQW 2 "s_register_operand")))]
5bfc5baa
JB
1537 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1538{
c2978b34
RS
1539 arm_expand_vcond (operands, <V_cmp_result>mode);
1540 DONE;
1541})
f35c297f 1542
c2978b34
RS
1543(define_expand "vcond<V_cvtto><mode>"
1544 [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
1545 (if_then_else:<V_CVTTO>
1546 (match_operator 3 "comparison_operator"
1547 [(match_operand:V32 4 "s_register_operand")
1548 (match_operand:V32 5 "reg_or_zero_operand")])
1549 (match_operand:<V_CVTTO> 1 "s_register_operand")
1550 (match_operand:<V_CVTTO> 2 "s_register_operand")))]
1551 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1552{
1553 arm_expand_vcond (operands, <V_cmp_result>mode);
5bfc5baa
JB
1554 DONE;
1555})
1556
c2978b34
RS
1557(define_expand "vcondu<mode><v_cmp_result>"
1558 [(set (match_operand:VDQW 0 "s_register_operand")
1559 (if_then_else:VDQW
5bfc5baa 1560 (match_operator 3 "arm_comparison_operator"
c2978b34
RS
1561 [(match_operand:<V_cmp_result> 4 "s_register_operand")
1562 (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
1563 (match_operand:VDQW 1 "s_register_operand")
1564 (match_operand:VDQW 2 "s_register_operand")))]
1565 "TARGET_NEON"
1566{
1567 arm_expand_vcond (operands, <V_cmp_result>mode);
1568 DONE;
1569})
5bfc5baa 1570
c2978b34
RS
1571(define_expand "vcond_mask_<mode><v_cmp_result>"
1572 [(set (match_operand:VDQW 0 "s_register_operand")
1573 (if_then_else:VDQW
1574 (match_operand:<V_cmp_result> 3 "s_register_operand")
1575 (match_operand:VDQW 1 "s_register_operand")
1576 (match_operand:VDQW 2 "s_register_operand")))]
1577 "TARGET_NEON"
1578{
1579 emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
1580 operands[2]));
5bfc5baa
JB
1581 DONE;
1582})
1583
88f77cba
JB
1584;; Patterns for builtins.
1585
1586; good for plain vadd, vaddq.
1587
bab53516 1588(define_expand "neon_vadd<mode>"
cd65e265
DZ
1589 [(match_operand:VCVTF 0 "s_register_operand")
1590 (match_operand:VCVTF 1 "s_register_operand")
1591 (match_operand:VCVTF 2 "s_register_operand")]
bab53516
SL
1592 "TARGET_NEON"
1593{
bb78e587 1594 if (ARM_HAVE_NEON_<MODE>_ARITH)
bab53516
SL
1595 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1596 else
1597 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1598 operands[2]));
1599 DONE;
1600})
1601
55a9b91b
MW
1602(define_expand "neon_vadd<mode>"
1603 [(match_operand:VH 0 "s_register_operand")
1604 (match_operand:VH 1 "s_register_operand")
1605 (match_operand:VH 2 "s_register_operand")]
1606 "TARGET_NEON_FP16INST"
1607{
95e10b8a 1608 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
55a9b91b
MW
1609 DONE;
1610})
1611
1612(define_expand "neon_vsub<mode>"
1613 [(match_operand:VH 0 "s_register_operand")
1614 (match_operand:VH 1 "s_register_operand")
1615 (match_operand:VH 2 "s_register_operand")]
1616 "TARGET_NEON_FP16INST"
1617{
98161c24 1618 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
55a9b91b
MW
1619 DONE;
1620})
1621
bab53516
SL
1622; Note that NEON operations don't support the full IEEE 754 standard: in
1623; particular, denormal values are flushed to zero. This means that GCC cannot
1624; use those instructions for autovectorization, etc. unless
1625; -funsafe-math-optimizations is in effect (in which case flush-to-zero
9c582551 1626; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
bab53516
SL
1627; header) must work in either case: if -funsafe-math-optimizations is given,
1628; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1629; expand to unspecs (which may potentially limit the extent to which they might
1630; be optimized by generic code).
1631
1632; Used for intrinsics when flag_unsafe_math_optimizations is false.
1633
1634(define_insn "neon_vadd<mode>_unspec"
0d0b79a6
RR
1635 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1636 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1637 (match_operand:VCVTF 2 "s_register_operand" "w")]
88f77cba
JB
1638 UNSPEC_VADD))]
1639 "TARGET_NEON"
c956e102 1640 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 1641 [(set (attr "type")
b75b1be2 1642 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1643 (const_string "neon_fp_addsub_s<q>")
1644 (const_string "neon_add<q>")))]
c956e102 1645)
88f77cba 1646
94f0f2cc 1647(define_insn "neon_vaddl<sup><mode>"
88f77cba
JB
1648 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1649 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
94f0f2cc
JG
1650 (match_operand:VDI 2 "s_register_operand" "w")]
1651 VADDL))]
88f77cba 1652 "TARGET_NEON"
94f0f2cc 1653 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
f7379e5e 1654 [(set_attr "type" "neon_add_long")]
c956e102 1655)
88f77cba 1656
94f0f2cc 1657(define_insn "neon_vaddw<sup><mode>"
88f77cba
JB
1658 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1659 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
94f0f2cc
JG
1660 (match_operand:VDI 2 "s_register_operand" "w")]
1661 VADDW))]
88f77cba 1662 "TARGET_NEON"
94f0f2cc 1663 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
f7379e5e 1664 [(set_attr "type" "neon_add_widen")]
c956e102 1665)
88f77cba
JB
1666
1667; vhadd and vrhadd.
1668
94f0f2cc 1669(define_insn "neon_v<r>hadd<sup><mode>"
88f77cba
JB
1670 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1671 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
94f0f2cc
JG
1672 (match_operand:VDQIW 2 "s_register_operand" "w")]
1673 VHADD))]
88f77cba 1674 "TARGET_NEON"
94f0f2cc 1675 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 1676 [(set_attr "type" "neon_add_halve_q")]
c956e102 1677)
88f77cba 1678
94f0f2cc 1679(define_insn "neon_vqadd<sup><mode>"
88f77cba
JB
1680 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1681 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
1682 (match_operand:VDQIX 2 "s_register_operand" "w")]
1683 VQADD))]
88f77cba 1684 "TARGET_NEON"
94f0f2cc 1685 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 1686 [(set_attr "type" "neon_qadd<q>")]
c956e102 1687)
88f77cba 1688
94f0f2cc 1689(define_insn "neon_v<r>addhn<mode>"
88f77cba
JB
1690 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1691 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
1692 (match_operand:VN 2 "s_register_operand" "w")]
1693 VADDHN))]
88f77cba 1694 "TARGET_NEON"
94f0f2cc 1695 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
f7379e5e 1696 [(set_attr "type" "neon_add_halve_narrow_q")]
c956e102 1697)
88f77cba 1698
94f0f2cc
JG
1699;; Polynomial and Float multiplication.
1700(define_insn "neon_vmul<pf><mode>"
1701 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1702 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1703 (match_operand:VPF 2 "s_register_operand" "w")]
88f77cba
JB
1704 UNSPEC_VMUL))]
1705 "TARGET_NEON"
94f0f2cc 1706 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 1707 [(set (attr "type")
b75b1be2 1708 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
1709 (const_string "neon_fp_mul_s<q>")
1710 (const_string "neon_mul_<V_elem_ch><q>")))]
c956e102 1711)
88f77cba 1712
55a9b91b
MW
1713(define_insn "neon_vmulf<mode>"
1714 [(set
1715 (match_operand:VH 0 "s_register_operand" "=w")
1716 (mult:VH
1717 (match_operand:VH 1 "s_register_operand" "w")
1718 (match_operand:VH 2 "s_register_operand" "w")))]
1719 "TARGET_NEON_FP16INST"
1720 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1721 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
1722)
1723
bab53516 1724(define_expand "neon_vmla<mode>"
cd65e265
DZ
1725 [(match_operand:VDQW 0 "s_register_operand")
1726 (match_operand:VDQW 1 "s_register_operand")
1727 (match_operand:VDQW 2 "s_register_operand")
1728 (match_operand:VDQW 3 "s_register_operand")]
bab53516
SL
1729 "TARGET_NEON"
1730{
bb78e587 1731 if (ARM_HAVE_NEON_<MODE>_ARITH)
bab53516
SL
1732 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
1733 operands[2], operands[3]));
1734 else
1735 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
1736 operands[2], operands[3]));
1737 DONE;
1738})
1739
c4216388
MGD
1740(define_expand "neon_vfma<VCVTF:mode>"
1741 [(match_operand:VCVTF 0 "s_register_operand")
1742 (match_operand:VCVTF 1 "s_register_operand")
1743 (match_operand:VCVTF 2 "s_register_operand")
94f0f2cc 1744 (match_operand:VCVTF 3 "s_register_operand")]
c4216388
MGD
1745 "TARGET_NEON && TARGET_FMA"
1746{
1747 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
1748 operands[1]));
1749 DONE;
1750})
1751
55a9b91b
MW
1752(define_expand "neon_vfma<VH:mode>"
1753 [(match_operand:VH 0 "s_register_operand")
1754 (match_operand:VH 1 "s_register_operand")
1755 (match_operand:VH 2 "s_register_operand")
1756 (match_operand:VH 3 "s_register_operand")]
1757 "TARGET_NEON_FP16INST"
1758{
bb78e587
RS
1759 emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
1760 operands[1]));
55a9b91b
MW
1761 DONE;
1762})
1763
c4216388
MGD
1764(define_expand "neon_vfms<VCVTF:mode>"
1765 [(match_operand:VCVTF 0 "s_register_operand")
1766 (match_operand:VCVTF 1 "s_register_operand")
1767 (match_operand:VCVTF 2 "s_register_operand")
94f0f2cc 1768 (match_operand:VCVTF 3 "s_register_operand")]
c4216388
MGD
1769 "TARGET_NEON && TARGET_FMA"
1770{
1771 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1772 operands[1]));
1773 DONE;
1774})
1775
55a9b91b
MW
1776(define_expand "neon_vfms<VH:mode>"
1777 [(match_operand:VH 0 "s_register_operand")
1778 (match_operand:VH 1 "s_register_operand")
1779 (match_operand:VH 2 "s_register_operand")
1780 (match_operand:VH 3 "s_register_operand")]
1781 "TARGET_NEON_FP16INST"
1782{
1783 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
1784 operands[1]));
1785 DONE;
1786})
1787
06e95715
KT
1788;; The expand RTL structure here is not important.
1789;; We use the gen_* functions anyway.
1790;; We just need something to wrap the iterators around.
1791
1792(define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
1793 [(set (match_operand:VCVTF 0 "s_register_operand")
1794 (unspec:VCVTF
1795 [(match_operand:VCVTF 1 "s_register_operand")
1796 (PLUSMINUS:<VFML>
1797 (match_operand:<VFML> 2 "s_register_operand")
1798 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
1799 "TARGET_FP16FML"
1800{
1801 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1802 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
1803 operands[1],
1804 operands[2],
1805 operands[3],
1806 half, half));
1807 DONE;
1808})
1809
1810(define_insn "vfmal_low<mode>_intrinsic"
1811 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1812 (fma:VCVTF
1813 (float_extend:VCVTF
1814 (vec_select:<VFMLSEL>
1815 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1816 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1817 (float_extend:VCVTF
1818 (vec_select:<VFMLSEL>
1819 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1820 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1821 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1822 "TARGET_FP16FML"
1823 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1824 [(set_attr "type" "neon_fp_mla_s<q>")]
1825)
1826
1827(define_insn "vfmsl_high<mode>_intrinsic"
1828 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1829 (fma:VCVTF
1830 (float_extend:VCVTF
1831 (neg:<VFMLSEL>
1832 (vec_select:<VFMLSEL>
1833 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1834 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
1835 (float_extend:VCVTF
1836 (vec_select:<VFMLSEL>
1837 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1838 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1839 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1840 "TARGET_FP16FML"
1841 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1842 [(set_attr "type" "neon_fp_mla_s<q>")]
1843)
1844
1845(define_insn "vfmal_high<mode>_intrinsic"
1846 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1847 (fma:VCVTF
1848 (float_extend:VCVTF
1849 (vec_select:<VFMLSEL>
1850 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1851 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1852 (float_extend:VCVTF
1853 (vec_select:<VFMLSEL>
1854 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1855 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
1856 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1857 "TARGET_FP16FML"
1858 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
1859 [(set_attr "type" "neon_fp_mla_s<q>")]
1860)
1861
1862(define_insn "vfmsl_low<mode>_intrinsic"
1863 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1864 (fma:VCVTF
1865 (float_extend:VCVTF
1866 (neg:<VFMLSEL>
1867 (vec_select:<VFMLSEL>
1868 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1869 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
1870 (float_extend:VCVTF
1871 (vec_select:<VFMLSEL>
1872 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
1873 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
1874 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1875 "TARGET_FP16FML"
1876 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
1877 [(set_attr "type" "neon_fp_mla_s<q>")]
1878)
1879
eccf4d70
KT
1880(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
1881 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1882 (unspec:VCVTF
1883 [(match_operand:VCVTF 1 "s_register_operand")
1884 (PLUSMINUS:<VFML>
1885 (match_operand:<VFML> 2 "s_register_operand")
1886 (match_operand:<VFML> 3 "s_register_operand"))
1887 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1888 "TARGET_FP16FML"
1889{
1890 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
1891 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1892 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
1893 (operands[0], operands[1],
1894 operands[2], operands[3],
1895 half, lane));
1896 DONE;
1897})
1898
1899(define_insn "vfmal_lane_low<mode>_intrinsic"
1900 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1901 (fma:VCVTF
1902 (float_extend:VCVTF
1903 (vec_select:<VFMLSEL>
1904 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1905 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1906 (float_extend:VCVTF
1907 (vec_duplicate:<VFMLSEL>
1908 (vec_select:HF
1909 (match_operand:<VFML> 3 "s_register_operand" "x")
1910 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1911 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1912 "TARGET_FP16FML"
1913 {
1914 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
1915 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
1916 {
1917 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
1918 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
1919 }
1920 else
1921 {
1922 operands[5] = GEN_INT (lane);
1923 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
1924 }
1925 }
1926 [(set_attr "type" "neon_fp_mla_s<q>")]
1927)
1928
1929(define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
1930 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
1931 (unspec:VCVTF
1932 [(match_operand:VCVTF 1 "s_register_operand")
1933 (PLUSMINUS:<VFML>
1934 (match_operand:<VFML> 2 "s_register_operand")
1935 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
1936 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
1937 "TARGET_FP16FML"
1938{
1939 rtx lane
1940 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
1941 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
1942 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
1943 (operands[0], operands[1], operands[2], operands[3],
1944 half, lane));
1945 DONE;
1946})
1947
1948;; Used to implement the intrinsics:
99cf78cf
TC
1949;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1950;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
eccf4d70
KT
1951;; Needs a bit of care to get the modes of the different sub-expressions right
1952;; due to 'a' and 'b' having different sizes and make sure we use the right
1953;; S or D subregister to select the appropriate lane from.
1954
1955(define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
1956 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1957 (fma:VCVTF
1958 (float_extend:VCVTF
1959 (vec_select:<VFMLSEL>
1960 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1961 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
1962 (float_extend:VCVTF
1963 (vec_duplicate:<VFMLSEL>
1964 (vec_select:HF
1965 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
1966 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
1967 (match_operand:VCVTF 1 "s_register_operand" "0")))]
1968 "TARGET_FP16FML"
1969 {
1970 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
1971 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
1972 int new_lane = lane % elts_per_reg;
1973 int regdiff = lane / elts_per_reg;
1974 operands[5] = GEN_INT (new_lane);
1975 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
1976 because we want the print_operand code to print the appropriate
1977 S or D register prefix. */
1978 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
1979 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
1980 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
1981 }
1982 [(set_attr "type" "neon_fp_mla_s<q>")]
1983)
1984
1985;; Used to implement the intrinsics:
99cf78cf
TC
1986;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
1987;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
eccf4d70
KT
1988;; Needs a bit of care to get the modes of the different sub-expressions right
1989;; due to 'a' and 'b' having different sizes and make sure we use the right
1990;; S or D subregister to select the appropriate lane from.
1991
1992(define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
1993 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1994 (fma:VCVTF
1995 (float_extend:VCVTF
1996 (vec_select:<VFMLSEL>
1997 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
1998 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
1999 (float_extend:VCVTF
2000 (vec_duplicate:<VFMLSEL>
2001 (vec_select:HF
2002 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2003 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2004 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2005 "TARGET_FP16FML"
2006 {
2007 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2008 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2009 int new_lane = lane % elts_per_reg;
2010 int regdiff = lane / elts_per_reg;
2011 operands[5] = GEN_INT (new_lane);
2012 /* We re-create operands[3] in the halved VFMLSEL mode
2013 because we've calculated the correct half-width subreg to extract
2014 the lane from and we want to print *that* subreg instead. */
2015 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2016 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2017 }
2018 [(set_attr "type" "neon_fp_mla_s<q>")]
2019)
2020
2021(define_insn "vfmal_lane_high<mode>_intrinsic"
2022 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2023 (fma:VCVTF
2024 (float_extend:VCVTF
2025 (vec_select:<VFMLSEL>
2026 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2027 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2028 (float_extend:VCVTF
2029 (vec_duplicate:<VFMLSEL>
2030 (vec_select:HF
2031 (match_operand:<VFML> 3 "s_register_operand" "x")
2032 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2033 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2034 "TARGET_FP16FML"
2035 {
2036 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2037 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2038 {
2039 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2040 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2041 }
2042 else
2043 {
2044 operands[5] = GEN_INT (lane);
2045 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2046 }
2047 }
2048 [(set_attr "type" "neon_fp_mla_s<q>")]
2049)
2050
2051(define_insn "vfmsl_lane_low<mode>_intrinsic"
2052 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2053 (fma:VCVTF
2054 (float_extend:VCVTF
2055 (neg:<VFMLSEL>
2056 (vec_select:<VFMLSEL>
2057 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2058 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2059 (float_extend:VCVTF
2060 (vec_duplicate:<VFMLSEL>
2061 (vec_select:HF
2062 (match_operand:<VFML> 3 "s_register_operand" "x")
2063 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2064 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2065 "TARGET_FP16FML"
2066 {
2067 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2068 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2069 {
2070 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2071 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2072 }
2073 else
2074 {
2075 operands[5] = GEN_INT (lane);
2076 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2077 }
2078 }
2079 [(set_attr "type" "neon_fp_mla_s<q>")]
2080)
2081
2082;; Used to implement the intrinsics:
99cf78cf
TC
2083;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2084;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
eccf4d70
KT
2085;; Needs a bit of care to get the modes of the different sub-expressions right
2086;; due to 'a' and 'b' having different sizes and make sure we use the right
2087;; S or D subregister to select the appropriate lane from.
2088
2089(define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2090 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2091 (fma:VCVTF
2092 (float_extend:VCVTF
2093 (neg:<VFMLSEL>
2094 (vec_select:<VFMLSEL>
2095 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2096 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2097 (float_extend:VCVTF
2098 (vec_duplicate:<VFMLSEL>
2099 (vec_select:HF
2100 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2101 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2102 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2103 "TARGET_FP16FML"
2104 {
2105 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2106 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2107 int new_lane = lane % elts_per_reg;
2108 int regdiff = lane / elts_per_reg;
2109 operands[5] = GEN_INT (new_lane);
2110 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2111 because we want the print_operand code to print the appropriate
2112 S or D register prefix. */
2113 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2114 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2115 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2116 }
2117 [(set_attr "type" "neon_fp_mla_s<q>")]
2118)
2119
2120;; Used to implement the intrinsics:
99cf78cf
TC
2121;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2122;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
eccf4d70
KT
2123;; Needs a bit of care to get the modes of the different sub-expressions right
2124;; due to 'a' and 'b' having different sizes and make sure we use the right
2125;; S or D subregister to select the appropriate lane from.
2126
2127(define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2128 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2129 (fma:VCVTF
2130 (float_extend:VCVTF
2131 (neg:<VFMLSEL>
2132 (vec_select:<VFMLSEL>
2133 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2134 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2135 (float_extend:VCVTF
2136 (vec_duplicate:<VFMLSEL>
2137 (vec_select:HF
2138 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2139 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2140 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2141 "TARGET_FP16FML"
2142 {
2143 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2144 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2145 int new_lane = lane % elts_per_reg;
2146 int regdiff = lane / elts_per_reg;
2147 operands[5] = GEN_INT (new_lane);
2148 /* We re-create operands[3] in the halved VFMLSEL mode
2149 because we've calculated the correct half-width subreg to extract
2150 the lane from and we want to print *that* subreg instead. */
2151 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2152 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2153 }
2154 [(set_attr "type" "neon_fp_mla_s<q>")]
2155)
2156
2157(define_insn "vfmsl_lane_high<mode>_intrinsic"
2158 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2159 (fma:VCVTF
2160 (float_extend:VCVTF
2161 (neg:<VFMLSEL>
2162 (vec_select:<VFMLSEL>
2163 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2164 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2165 (float_extend:VCVTF
2166 (vec_duplicate:<VFMLSEL>
2167 (vec_select:HF
2168 (match_operand:<VFML> 3 "s_register_operand" "x")
2169 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2170 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2171 "TARGET_FP16FML"
2172 {
2173 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2174 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2175 {
2176 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2177 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2178 }
2179 else
2180 {
2181 operands[5] = GEN_INT (lane);
2182 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2183 }
2184 }
2185 [(set_attr "type" "neon_fp_mla_s<q>")]
2186)
2187
bab53516
SL
2188; Used for intrinsics when flag_unsafe_math_optimizations is false.
2189
2190(define_insn "neon_vmla<mode>_unspec"
f7379e5e
JG
2191 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2192 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2193 (match_operand:VDQW 2 "s_register_operand" "w")
2194 (match_operand:VDQW 3 "s_register_operand" "w")]
bab53516 2195 UNSPEC_VMLA))]
88f77cba 2196 "TARGET_NEON"
c956e102 2197 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
003bb7f3 2198 [(set (attr "type")
b75b1be2 2199 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
2200 (const_string "neon_fp_mla_s<q>")
2201 (const_string "neon_mla_<V_elem_ch><q>")))]
c956e102 2202)
88f77cba 2203
94f0f2cc 2204(define_insn "neon_vmlal<sup><mode>"
88f77cba
JB
2205 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2206 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2207 (match_operand:VW 2 "s_register_operand" "w")
94f0f2cc
JG
2208 (match_operand:VW 3 "s_register_operand" "w")]
2209 VMLAL))]
88f77cba 2210 "TARGET_NEON"
94f0f2cc 2211 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
f7379e5e 2212 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
c956e102 2213)
88f77cba 2214
bab53516 2215(define_expand "neon_vmls<mode>"
cd65e265
DZ
2216 [(match_operand:VDQW 0 "s_register_operand")
2217 (match_operand:VDQW 1 "s_register_operand")
2218 (match_operand:VDQW 2 "s_register_operand")
2219 (match_operand:VDQW 3 "s_register_operand")]
bab53516
SL
2220 "TARGET_NEON"
2221{
bb78e587 2222 if (ARM_HAVE_NEON_<MODE>_ARITH)
bab53516
SL
2223 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2224 operands[1], operands[2], operands[3]));
2225 else
2226 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2227 operands[2], operands[3]));
2228 DONE;
2229})
2230
2231; Used for intrinsics when flag_unsafe_math_optimizations is false.
2232
2233(define_insn "neon_vmls<mode>_unspec"
f7379e5e
JG
2234 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2235 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2236 (match_operand:VDQW 2 "s_register_operand" "w")
2237 (match_operand:VDQW 3 "s_register_operand" "w")]
bab53516 2238 UNSPEC_VMLS))]
88f77cba 2239 "TARGET_NEON"
c956e102 2240 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
003bb7f3 2241 [(set (attr "type")
b75b1be2 2242 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
2243 (const_string "neon_fp_mla_s<q>")
2244 (const_string "neon_mla_<V_elem_ch><q>")))]
c956e102 2245)
88f77cba 2246
94f0f2cc 2247(define_insn "neon_vmlsl<sup><mode>"
88f77cba
JB
2248 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2249 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2250 (match_operand:VW 2 "s_register_operand" "w")
94f0f2cc
JG
2251 (match_operand:VW 3 "s_register_operand" "w")]
2252 VMLSL))]
88f77cba 2253 "TARGET_NEON"
94f0f2cc 2254 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
f7379e5e 2255 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
c956e102 2256)
88f77cba 2257
94f0f2cc
JG
2258;; vqdmulh, vqrdmulh
2259(define_insn "neon_vq<r>dmulh<mode>"
88f77cba
JB
2260 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2261 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
94f0f2cc
JG
2262 (match_operand:VMDQI 2 "s_register_operand" "w")]
2263 VQDMULH))]
88f77cba 2264 "TARGET_NEON"
94f0f2cc 2265 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2266 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
c956e102 2267)
88f77cba 2268
5f2ca3b2
MW
2269;; vqrdmlah, vqrdmlsh
2270(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2271 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2272 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2273 (match_operand:VMDQI 2 "s_register_operand" "w")
2274 (match_operand:VMDQI 3 "s_register_operand" "w")]
2275 VQRDMLH_AS))]
2276 "TARGET_NEON_RDMA"
2277 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2278 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2279)
2280
88f77cba
JB
2281(define_insn "neon_vqdmlal<mode>"
2282 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2283 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2284 (match_operand:VMDI 2 "s_register_operand" "w")
94f0f2cc 2285 (match_operand:VMDI 3 "s_register_operand" "w")]
88f77cba
JB
2286 UNSPEC_VQDMLAL))]
2287 "TARGET_NEON"
c956e102 2288 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
f7379e5e 2289 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
c956e102 2290)
88f77cba
JB
2291
2292(define_insn "neon_vqdmlsl<mode>"
2293 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2294 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2295 (match_operand:VMDI 2 "s_register_operand" "w")
94f0f2cc 2296 (match_operand:VMDI 3 "s_register_operand" "w")]
88f77cba
JB
2297 UNSPEC_VQDMLSL))]
2298 "TARGET_NEON"
c956e102 2299 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
f7379e5e 2300 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
c956e102 2301)
88f77cba 2302
94f0f2cc 2303(define_insn "neon_vmull<sup><mode>"
88f77cba
JB
2304 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2305 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
94f0f2cc
JG
2306 (match_operand:VW 2 "s_register_operand" "w")]
2307 VMULL))]
88f77cba 2308 "TARGET_NEON"
94f0f2cc 2309 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
f7379e5e 2310 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
c956e102 2311)
88f77cba
JB
2312
2313(define_insn "neon_vqdmull<mode>"
2314 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2315 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
94f0f2cc 2316 (match_operand:VMDI 2 "s_register_operand" "w")]
88f77cba
JB
2317 UNSPEC_VQDMULL))]
2318 "TARGET_NEON"
c956e102 2319 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
f7379e5e 2320 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
c956e102 2321)
88f77cba 2322
bab53516 2323(define_expand "neon_vsub<mode>"
cd65e265
DZ
2324 [(match_operand:VCVTF 0 "s_register_operand")
2325 (match_operand:VCVTF 1 "s_register_operand")
2326 (match_operand:VCVTF 2 "s_register_operand")]
bab53516
SL
2327 "TARGET_NEON"
2328{
bb78e587 2329 if (ARM_HAVE_NEON_<MODE>_ARITH)
bab53516
SL
2330 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2331 else
2332 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2333 operands[2]));
2334 DONE;
2335})
2336
2337; Used for intrinsics when flag_unsafe_math_optimizations is false.
2338
2339(define_insn "neon_vsub<mode>_unspec"
0d0b79a6
RR
2340 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2341 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2342 (match_operand:VCVTF 2 "s_register_operand" "w")]
88f77cba
JB
2343 UNSPEC_VSUB))]
2344 "TARGET_NEON"
c956e102 2345 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
003bb7f3 2346 [(set (attr "type")
b75b1be2 2347 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
2348 (const_string "neon_fp_addsub_s<q>")
2349 (const_string "neon_sub<q>")))]
c956e102 2350)
88f77cba 2351
94f0f2cc 2352(define_insn "neon_vsubl<sup><mode>"
88f77cba
JB
2353 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2354 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
94f0f2cc
JG
2355 (match_operand:VDI 2 "s_register_operand" "w")]
2356 VSUBL))]
88f77cba 2357 "TARGET_NEON"
94f0f2cc 2358 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
f7379e5e 2359 [(set_attr "type" "neon_sub_long")]
c956e102 2360)
88f77cba 2361
94f0f2cc 2362(define_insn "neon_vsubw<sup><mode>"
88f77cba
JB
2363 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2364 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
94f0f2cc
JG
2365 (match_operand:VDI 2 "s_register_operand" "w")]
2366 VSUBW))]
88f77cba 2367 "TARGET_NEON"
94f0f2cc 2368 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
f7379e5e 2369 [(set_attr "type" "neon_sub_widen")]
c956e102 2370)
88f77cba 2371
94f0f2cc 2372(define_insn "neon_vqsub<sup><mode>"
88f77cba
JB
2373 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2374 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
2375 (match_operand:VDQIX 2 "s_register_operand" "w")]
2376 VQSUB))]
88f77cba 2377 "TARGET_NEON"
94f0f2cc 2378 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2379 [(set_attr "type" "neon_qsub<q>")]
c956e102 2380)
88f77cba 2381
94f0f2cc 2382(define_insn "neon_vhsub<sup><mode>"
88f77cba
JB
2383 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2384 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
94f0f2cc
JG
2385 (match_operand:VDQIW 2 "s_register_operand" "w")]
2386 VHSUB))]
88f77cba 2387 "TARGET_NEON"
94f0f2cc 2388 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2389 [(set_attr "type" "neon_sub_halve<q>")]
c956e102 2390)
88f77cba 2391
94f0f2cc 2392(define_insn "neon_v<r>subhn<mode>"
88f77cba
JB
2393 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2394 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
2395 (match_operand:VN 2 "s_register_operand" "w")]
2396 VSUBHN))]
88f77cba 2397 "TARGET_NEON"
94f0f2cc 2398 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
f7379e5e 2399 [(set_attr "type" "neon_sub_halve_narrow_q")]
c956e102 2400)
88f77cba 2401
381811fa
KT
2402;; These may expand to an UNSPEC pattern when a floating point mode is used
2403;; without unsafe math optimizations.
c2978b34 2404(define_expand "@neon_vc<cmp_op><mode>"
cd65e265 2405 [(match_operand:<V_cmp_result> 0 "s_register_operand")
381811fa 2406 (neg:<V_cmp_result>
cd65e265
DZ
2407 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2408 (match_operand:VDQW 2 "reg_or_zero_operand")))]
88f77cba 2409 "TARGET_NEON"
381811fa
KT
2410 {
2411 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2412 are enabled. */
2413 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2414 && !flag_unsafe_math_optimizations)
2415 {
2416 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2417 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2418 whereas this expander iterates over the integer modes as well,
2419 but we will never expand to UNSPECs for the integer comparisons. */
2420 switch (<MODE>mode)
2421 {
4e10a5a7 2422 case E_V2SFmode:
381811fa
KT
2423 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2424 operands[1],
2425 operands[2]));
2426 break;
4e10a5a7 2427 case E_V4SFmode:
381811fa
KT
2428 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2429 operands[1],
2430 operands[2]));
2431 break;
2432 default:
2433 gcc_unreachable ();
2434 }
2435 }
2436 else
2437 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2438 operands[1],
2439 operands[2]));
2440 DONE;
2441 }
c956e102 2442)
88f77cba 2443
c2978b34 2444(define_insn "@neon_vc<cmp_op><mode>_insn"
5bfc5baa 2445 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
381811fa
KT
2446 (neg:<V_cmp_result>
2447 (COMPARISONS:<V_cmp_result>
2448 (match_operand:VDQW 1 "s_register_operand" "w,w")
2449 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2450 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2451 && !flag_unsafe_math_optimizations)"
2452 {
2453 char pattern[100];
2454 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2455 " %%<V_reg>1, %s",
2456 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2457 ? "f" : "<cmp_type>",
2458 which_alternative == 0
2459 ? "%<V_reg>2" : "#0");
2460 output_asm_insn (pattern, operands);
2461 return "";
2462 }
003bb7f3 2463 [(set (attr "type")
381811fa 2464 (if_then_else (match_operand 2 "zero_operand")
f7379e5e 2465 (const_string "neon_compare_zero<q>")
381811fa 2466 (const_string "neon_compare<q>")))]
c956e102 2467)
88f77cba 2468
381811fa 2469(define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
5bfc5baa
JB
2470 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2471 (unspec:<V_cmp_result>
381811fa
KT
2472 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2473 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2474 NEON_VCMP))]
88f77cba 2475 "TARGET_NEON"
381811fa
KT
2476 {
2477 char pattern[100];
2478 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2479 " %%<V_reg>1, %s",
2480 which_alternative == 0
2481 ? "%<V_reg>2" : "#0");
2482 output_asm_insn (pattern, operands);
2483 return "";
2484}
2485 [(set_attr "type" "neon_fp_compare_s<q>")]
c956e102 2486)
88f77cba 2487
c2978b34 2488(define_expand "@neon_vc<cmp_op><mode>"
55a9b91b
MW
2489 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2490 (neg:<V_cmp_result>
2491 (COMPARISONS:VH
2492 (match_operand:VH 1 "s_register_operand")
2493 (match_operand:VH 2 "reg_or_zero_operand")))]
2494 "TARGET_NEON_FP16INST"
2495{
2496 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2497 are enabled. */
2498 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2499 && !flag_unsafe_math_optimizations)
2500 emit_insn
2501 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2502 (operands[0], operands[1], operands[2]));
2503 else
2504 emit_insn
2505 (gen_neon_vc<cmp_op><mode>_fp16insn
2506 (operands[0], operands[1], operands[2]));
2507 DONE;
2508})
2509
2510(define_insn "neon_vc<cmp_op><mode>_fp16insn"
2511 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2512 (neg:<V_cmp_result>
2513 (COMPARISONS:<V_cmp_result>
2514 (match_operand:VH 1 "s_register_operand" "w,w")
2515 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2516 "TARGET_NEON_FP16INST
2517 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2518 && !flag_unsafe_math_optimizations)"
2519{
2520 char pattern[100];
2521 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2522 " %%<V_reg>1, %s",
2523 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2524 ? "f" : "<cmp_type>",
2525 which_alternative == 0
2526 ? "%<V_reg>2" : "#0");
2527 output_asm_insn (pattern, operands);
2528 return "";
2529}
2530 [(set (attr "type")
2531 (if_then_else (match_operand 2 "zero_operand")
2532 (const_string "neon_compare_zero<q>")
2533 (const_string "neon_compare<q>")))])
2534
2535(define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2536 [(set
2537 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2538 (unspec:<V_cmp_result>
2539 [(match_operand:VH 1 "s_register_operand" "w,w")
2540 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2541 NEON_VCMP))]
2542 "TARGET_NEON_FP16INST"
2543{
2544 char pattern[100];
2545 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2546 " %%<V_reg>1, %s",
2547 which_alternative == 0
2548 ? "%<V_reg>2" : "#0");
2549 output_asm_insn (pattern, operands);
2550 return "";
2551}
2552 [(set_attr "type" "neon_fp_compare_s<q>")])
2553
c2978b34 2554(define_insn "@neon_vc<code><mode>"
fd92bb80 2555 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
381811fa
KT
2556 (neg:<V_cmp_result>
2557 (GTUGEU:<V_cmp_result>
2558 (match_operand:VDQIW 1 "s_register_operand" "w")
2559 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
fd92bb80 2560 "TARGET_NEON"
381811fa 2561 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2562 [(set_attr "type" "neon_compare<q>")]
fd92bb80
MGD
2563)
2564
381811fa
KT
2565(define_expand "neon_vca<cmp_op><mode>"
2566 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2567 (neg:<V_cmp_result>
2568 (GTGE:<V_cmp_result>
2569 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2570 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
5bfc5baa 2571 "TARGET_NEON"
381811fa
KT
2572 {
2573 if (flag_unsafe_math_optimizations)
2574 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2575 operands[2]));
2576 else
2577 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2578 operands[1],
2579 operands[2]));
2580 DONE;
2581 }
5bfc5baa
JB
2582)
2583
381811fa 2584(define_insn "neon_vca<cmp_op><mode>_insn"
88f77cba 2585 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
381811fa
KT
2586 (neg:<V_cmp_result>
2587 (GTGE:<V_cmp_result>
2588 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2589 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2590 "TARGET_NEON && flag_unsafe_math_optimizations"
2591 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2592 [(set_attr "type" "neon_fp_compare_s<q>")]
c956e102 2593)
88f77cba 2594
381811fa 2595(define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
88f77cba
JB
2596 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2597 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
94f0f2cc 2598 (match_operand:VCVTF 2 "s_register_operand" "w")]
381811fa 2599 NEON_VACMP))]
88f77cba 2600 "TARGET_NEON"
381811fa 2601 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2602 [(set_attr "type" "neon_fp_compare_s<q>")]
c956e102 2603)
88f77cba 2604
55a9b91b
MW
2605(define_expand "neon_vca<cmp_op><mode>"
2606 [(set
2607 (match_operand:<V_cmp_result> 0 "s_register_operand")
2608 (neg:<V_cmp_result>
2609 (GLTE:<V_cmp_result>
2610 (abs:VH (match_operand:VH 1 "s_register_operand"))
2611 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2612 "TARGET_NEON_FP16INST"
2613{
2614 if (flag_unsafe_math_optimizations)
2615 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2616 (operands[0], operands[1], operands[2]));
2617 else
2618 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2619 (operands[0], operands[1], operands[2]));
2620 DONE;
2621})
2622
2623(define_insn "neon_vca<cmp_op><mode>_fp16insn"
2624 [(set
2625 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2626 (neg:<V_cmp_result>
2627 (GLTE:<V_cmp_result>
2628 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2629 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2630 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2631 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2632 [(set_attr "type" "neon_fp_compare_s<q>")]
2633)
2634
2635(define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2636 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2637 (unspec:<V_cmp_result>
2638 [(match_operand:VH 1 "s_register_operand" "w")
2639 (match_operand:VH 2 "s_register_operand" "w")]
2640 NEON_VAGLTE))]
2641 "TARGET_NEON"
2642 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2643 [(set_attr "type" "neon_fp_compare_s<q>")]
2644)
2645
2646(define_expand "neon_vc<cmp_op>z<mode>"
2647 [(set
2648 (match_operand:<V_cmp_result> 0 "s_register_operand")
2649 (COMPARISONS:<V_cmp_result>
2650 (match_operand:VH 1 "s_register_operand")
2651 (const_int 0)))]
2652 "TARGET_NEON_FP16INST"
2653 {
2654 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2655 CONST0_RTX (<MODE>mode)));
2656 DONE;
2657})
2658
88f77cba
JB
2659(define_insn "neon_vtst<mode>"
2660 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2661 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
94f0f2cc 2662 (match_operand:VDQIW 2 "s_register_operand" "w")]
88f77cba
JB
2663 UNSPEC_VTST))]
2664 "TARGET_NEON"
c956e102 2665 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2666 [(set_attr "type" "neon_tst<q>")]
c956e102 2667)
88f77cba 2668
94f0f2cc
JG
2669(define_insn "neon_vabd<sup><mode>"
2670 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2671 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2672 (match_operand:VDQIW 2 "s_register_operand" "w")]
2673 VABD))]
88f77cba 2674 "TARGET_NEON"
94f0f2cc
JG
2675 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2676 [(set_attr "type" "neon_abd<q>")]
c956e102 2677)
88f77cba 2678
55a9b91b
MW
2679(define_insn "neon_vabd<mode>"
2680 [(set (match_operand:VH 0 "s_register_operand" "=w")
2681 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2682 (match_operand:VH 2 "s_register_operand" "w")]
2683 UNSPEC_VABD_F))]
2684 "TARGET_NEON_FP16INST"
2685 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2686 [(set_attr "type" "neon_abd<q>")]
2687)
2688
94f0f2cc
JG
2689(define_insn "neon_vabdf<mode>"
2690 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2691 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2692 (match_operand:VCVTF 2 "s_register_operand" "w")]
2693 UNSPEC_VABD_F))]
2694 "TARGET_NEON"
2695 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2696 [(set_attr "type" "neon_fp_abd_s<q>")]
2697)
2698
2699(define_insn "neon_vabdl<sup><mode>"
88f77cba
JB
2700 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2701 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
94f0f2cc
JG
2702 (match_operand:VW 2 "s_register_operand" "w")]
2703 VABDL))]
88f77cba 2704 "TARGET_NEON"
94f0f2cc 2705 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
f7379e5e 2706 [(set_attr "type" "neon_abd_long")]
c956e102 2707)
88f77cba 2708
94f0f2cc 2709(define_insn "neon_vaba<sup><mode>"
88f77cba 2710 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
5b28d821 2711 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
94f0f2cc
JG
2712 (match_operand:VDQIW 3 "s_register_operand" "w")]
2713 VABD)
5b28d821 2714 (match_operand:VDQIW 1 "s_register_operand" "0")))]
88f77cba 2715 "TARGET_NEON"
94f0f2cc 2716 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
f7379e5e 2717 [(set_attr "type" "neon_arith_acc<q>")]
c956e102 2718)
88f77cba 2719
94f0f2cc 2720(define_insn "neon_vabal<sup><mode>"
88f77cba 2721 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
5b28d821 2722 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
94f0f2cc
JG
2723 (match_operand:VW 3 "s_register_operand" "w")]
2724 VABDL)
5b28d821 2725 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
88f77cba 2726 "TARGET_NEON"
94f0f2cc 2727 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
f7379e5e 2728 [(set_attr "type" "neon_arith_acc<q>")]
c956e102 2729)
88f77cba 2730
84ae7213
PW
2731(define_expand "<sup>sadv16qi"
2732 [(use (match_operand:V4SI 0 "register_operand"))
2733 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
2734 (use (match_operand:V16QI 2 "register_operand"))] VABAL)
2735 (use (match_operand:V4SI 3 "register_operand"))]
2736 "TARGET_NEON"
2737 {
2738 rtx reduc = gen_reg_rtx (V8HImode);
2739 rtx op1_highpart = gen_reg_rtx (V8QImode);
2740 rtx op2_highpart = gen_reg_rtx (V8QImode);
2741
2742 emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
2743 gen_lowpart (V8QImode, operands[1]),
2744 gen_lowpart (V8QImode, operands[2])));
2745
2746 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
2747 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
2748 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
2749 op1_highpart, op2_highpart));
2750 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
2751
2752 emit_move_insn (operands[0], operands[3]);
2753 DONE;
2754 }
2755)
2756
94f0f2cc
JG
2757(define_insn "neon_v<maxmin><sup><mode>"
2758 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2759 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2760 (match_operand:VDQIW 2 "s_register_operand" "w")]
2761 VMAXMIN))]
88f77cba 2762 "TARGET_NEON"
94f0f2cc
JG
2763 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2764 [(set_attr "type" "neon_minmax<q>")]
c956e102 2765)
88f77cba 2766
94f0f2cc
JG
2767(define_insn "neon_v<maxmin>f<mode>"
2768 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2769 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2770 (match_operand:VCVTF 2 "s_register_operand" "w")]
2771 VMAXMINF))]
88f77cba 2772 "TARGET_NEON"
94f0f2cc 2773 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
0a18c19f
DS
2774 [(set_attr "type" "neon_fp_minmax_s<q>")]
2775)
2776
55a9b91b
MW
2777(define_insn "neon_v<maxmin>f<mode>"
2778 [(set (match_operand:VH 0 "s_register_operand" "=w")
2779 (unspec:VH
2780 [(match_operand:VH 1 "s_register_operand" "w")
2781 (match_operand:VH 2 "s_register_operand" "w")]
2782 VMAXMINF))]
2783 "TARGET_NEON_FP16INST"
2784 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2785 [(set_attr "type" "neon_fp_minmax_s<q>")]
2786)
2787
2788(define_insn "neon_vp<maxmin>fv4hf"
2789 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2790 (unspec:V4HF
2791 [(match_operand:V4HF 1 "s_register_operand" "w")
2792 (match_operand:V4HF 2 "s_register_operand" "w")]
2793 VPMAXMINF))]
2794 "TARGET_NEON_FP16INST"
2795 "vp<maxmin>.f16\t%P0, %P1, %P2"
2796 [(set_attr "type" "neon_reduc_minmax")]
2797)
2798
2799(define_insn "neon_<fmaxmin_op><mode>"
2800 [(set
2801 (match_operand:VH 0 "s_register_operand" "=w")
2802 (unspec:VH
2803 [(match_operand:VH 1 "s_register_operand" "w")
2804 (match_operand:VH 2 "s_register_operand" "w")]
2805 VMAXMINFNM))]
2806 "TARGET_NEON_FP16INST"
2807 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2808 [(set_attr "type" "neon_fp_minmax_s<q>")]
2809)
9fc158eb
BB
2810
2811;; v<maxmin>nm intrinsics.
2812(define_insn "neon_<fmaxmin_op><mode>"
2813 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2814 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2815 (match_operand:VCVTF 2 "s_register_operand" "w")]
2816 VMAXMINFNM))]
c8d61ab8 2817 "TARGET_NEON && TARGET_VFP5"
9fc158eb
BB
2818 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2819 [(set_attr "type" "neon_fp_minmax_s<q>")]
2820)
55a9b91b 2821
0a18c19f
DS
2822;; Vector forms for the IEEE-754 fmax()/fmin() functions
2823(define_insn "<fmaxmin><mode>3"
2824 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2825 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2826 (match_operand:VCVTF 2 "s_register_operand" "w")]
2827 VMAXMINFNM))]
c8d61ab8 2828 "TARGET_NEON && TARGET_VFP5"
0a18c19f 2829 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
94f0f2cc 2830 [(set_attr "type" "neon_fp_minmax_s<q>")]
c956e102 2831)
88f77cba
JB
2832
2833(define_expand "neon_vpadd<mode>"
cd65e265
DZ
2834 [(match_operand:VD 0 "s_register_operand")
2835 (match_operand:VD 1 "s_register_operand")
2836 (match_operand:VD 2 "s_register_operand")]
88f77cba
JB
2837 "TARGET_NEON"
2838{
2839 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2840 operands[2]));
2841 DONE;
2842})
2843
94f0f2cc 2844(define_insn "neon_vpaddl<sup><mode>"
88f77cba 2845 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
94f0f2cc
JG
2846 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2847 VPADDL))]
88f77cba 2848 "TARGET_NEON"
94f0f2cc 2849 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 2850 [(set_attr "type" "neon_reduc_add_long")]
c956e102 2851)
88f77cba 2852
94f0f2cc 2853(define_insn "neon_vpadal<sup><mode>"
88f77cba
JB
2854 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2855 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
94f0f2cc
JG
2856 (match_operand:VDQIW 2 "s_register_operand" "w")]
2857 VPADAL))]
88f77cba 2858 "TARGET_NEON"
94f0f2cc 2859 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
f7379e5e 2860 [(set_attr "type" "neon_reduc_add_acc")]
c956e102 2861)
88f77cba 2862
94f0f2cc
JG
2863(define_insn "neon_vp<maxmin><sup><mode>"
2864 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2865 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2866 (match_operand:VDI 2 "s_register_operand" "w")]
2867 VPMAXMIN))]
88f77cba 2868 "TARGET_NEON"
94f0f2cc
JG
2869 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2870 [(set_attr "type" "neon_reduc_minmax<q>")]
c956e102 2871)
88f77cba 2872
94f0f2cc
JG
2873(define_insn "neon_vp<maxmin>f<mode>"
2874 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2875 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2876 (match_operand:VCVTF 2 "s_register_operand" "w")]
2877 VPMAXMINF))]
88f77cba 2878 "TARGET_NEON"
94f0f2cc
JG
2879 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2880 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
c956e102 2881)
88f77cba
JB
2882
2883(define_insn "neon_vrecps<mode>"
2884 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2885 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
94f0f2cc 2886 (match_operand:VCVTF 2 "s_register_operand" "w")]
88f77cba
JB
2887 UNSPEC_VRECPS))]
2888 "TARGET_NEON"
c956e102 2889 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2890 [(set_attr "type" "neon_fp_recps_s<q>")]
c956e102 2891)
88f77cba 2892
55a9b91b
MW
2893(define_insn "neon_vrecps<mode>"
2894 [(set
2895 (match_operand:VH 0 "s_register_operand" "=w")
2896 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2897 (match_operand:VH 2 "s_register_operand" "w")]
2898 UNSPEC_VRECPS))]
2899 "TARGET_NEON_FP16INST"
2900 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2901 [(set_attr "type" "neon_fp_recps_s<q>")]
2902)
2903
88f77cba
JB
2904(define_insn "neon_vrsqrts<mode>"
2905 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2906 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
94f0f2cc 2907 (match_operand:VCVTF 2 "s_register_operand" "w")]
88f77cba
JB
2908 UNSPEC_VRSQRTS))]
2909 "TARGET_NEON"
c956e102 2910 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 2911 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
c956e102 2912)
88f77cba 2913
55a9b91b
MW
2914(define_insn "neon_vrsqrts<mode>"
2915 [(set
2916 (match_operand:VH 0 "s_register_operand" "=w")
2917 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2918 (match_operand:VH 2 "s_register_operand" "w")]
2919 UNSPEC_VRSQRTS))]
2920 "TARGET_NEON_FP16INST"
2921 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2922 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2923)
2924
bab53516 2925(define_expand "neon_vabs<mode>"
cd65e265
DZ
2926 [(match_operand:VDQW 0 "s_register_operand")
2927 (match_operand:VDQW 1 "s_register_operand")]
88f77cba 2928 "TARGET_NEON"
bab53516
SL
2929{
2930 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2931 DONE;
2932})
88f77cba
JB
2933
2934(define_insn "neon_vqabs<mode>"
2935 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
94f0f2cc 2936 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
88f77cba
JB
2937 UNSPEC_VQABS))]
2938 "TARGET_NEON"
c956e102 2939 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 2940 [(set_attr "type" "neon_qabs<q>")]
c956e102 2941)
88f77cba 2942
7a10ea9f
KT
2943(define_insn "neon_bswap<mode>"
2944 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2945 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2946 "TARGET_NEON"
2947 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2948 [(set_attr "type" "neon_rev<q>")]
2949)
2950
88f77cba 2951(define_expand "neon_vneg<mode>"
cd65e265
DZ
2952 [(match_operand:VDQW 0 "s_register_operand")
2953 (match_operand:VDQW 1 "s_register_operand")]
88f77cba
JB
2954 "TARGET_NEON"
2955{
4cbb7cab 2956 emit_insn (gen_neon_neg<mode>2 (operands[0], operands[1]));
88f77cba
JB
2957 DONE;
2958})
2959
c2b7062d
TC
2960
2961;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
2962;; fact that their usage need to guarantee that the source vectors are
2963;; contiguous. It would be wrong to describe the operation without being able
2964;; to describe the permute that is also required, but even if that is done
2965;; the permute would have been created as a LOAD_LANES which means the values
2966;; in the registers are in the wrong order.
2967(define_insn "neon_vcadd<rot><mode>"
2968 [(set (match_operand:VF 0 "register_operand" "=w")
2969 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
2970 (match_operand:VF 2 "register_operand" "w")]
2971 VCADD))]
2972 "TARGET_COMPLEX"
2973 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
2974 [(set_attr "type" "neon_fcadd")]
2975)
2976
2977(define_insn "neon_vcmla<rot><mode>"
2978 [(set (match_operand:VF 0 "register_operand" "=w")
2979 (plus:VF (match_operand:VF 1 "register_operand" "0")
2980 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
2981 (match_operand:VF 3 "register_operand" "w")]
2982 VCMLA)))]
2983 "TARGET_COMPLEX"
2984 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
2985 [(set_attr "type" "neon_fcmla")]
2986)
2987
2988(define_insn "neon_vcmla_lane<rot><mode>"
2989 [(set (match_operand:VF 0 "s_register_operand" "=w")
2990 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
2991 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
2992 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
2993 (match_operand:SI 4 "const_int_operand" "n")]
2994 VCMLA)))]
2995 "TARGET_COMPLEX"
2996 {
ee8045e5 2997 operands = neon_vcmla_lane_prepare_operands (operands);
c2b7062d
TC
2998 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
2999 }
3000 [(set_attr "type" "neon_fcmla")]
3001)
3002
3003(define_insn "neon_vcmla_laneq<rot><mode>"
3004 [(set (match_operand:VDF 0 "s_register_operand" "=w")
3005 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3006 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3007 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3008 (match_operand:SI 4 "const_int_operand" "n")]
3009 VCMLA)))]
3010 "TARGET_COMPLEX"
3011 {
ee8045e5 3012 operands = neon_vcmla_lane_prepare_operands (operands);
c2b7062d
TC
3013 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3014 }
3015 [(set_attr "type" "neon_fcmla")]
3016)
3017
3018(define_insn "neon_vcmlaq_lane<rot><mode>"
3019 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3020 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3021 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3022 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3023 (match_operand:SI 4 "const_int_operand" "n")]
3024 VCMLA)))]
3025 "TARGET_COMPLEX"
3026 {
ee8045e5 3027 operands = neon_vcmla_lane_prepare_operands (operands);
c2b7062d
TC
3028 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3029 }
3030 [(set_attr "type" "neon_fcmla")]
3031)
3032
3033
f8e109ba
TC
3034;; These instructions map to the __builtins for the Dot Product operations.
3035(define_insn "neon_<sup>dot<vsi2qi>"
3036 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3037 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3038 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3039 "register_operand" "w")
3040 (match_operand:<VSI2QI> 3
3041 "register_operand" "w")]
3042 DOTPROD)))]
3043 "TARGET_DOTPROD"
3044 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
51e6029f 3045 [(set_attr "type" "neon_dot<q>")]
f8e109ba
TC
3046)
3047
f348846e
SMW
3048;; These instructions map to the __builtins for the Dot Product operations.
3049(define_insn "neon_usdot<vsi2qi>"
3050 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3051 (plus:VCVTI
3052 (unspec:VCVTI
3053 [(match_operand:<VSI2QI> 2 "register_operand" "w")
3054 (match_operand:<VSI2QI> 3 "register_operand" "w")]
3055 UNSPEC_DOT_US)
3056 (match_operand:VCVTI 1 "register_operand" "0")))]
3057 "TARGET_I8MM"
3058 "vusdot.s8\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3059 [(set_attr "type" "neon_dot<q>")]
3060)
3061
f8e109ba
TC
3062;; These instructions map to the __builtins for the Dot Product
3063;; indexed operations.
3064(define_insn "neon_<sup>dot_lane<vsi2qi>"
3065 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3066 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3067 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3068 "register_operand" "w")
3069 (match_operand:V8QI 3 "register_operand" "t")
3070 (match_operand:SI 4 "immediate_operand" "i")]
3071 DOTPROD)))]
3072 "TARGET_DOTPROD"
3073 {
3074 operands[4]
3075 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3076 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3077 }
51e6029f 3078 [(set_attr "type" "neon_dot<q>")]
f8e109ba
TC
3079)
3080
f348846e
SMW
3081;; These instructions map to the __builtins for the Dot Product
3082;; indexed operations in the v8.6 I8MM extension.
3083(define_insn "neon_<sup>dot_lane<vsi2qi>"
3084 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3085 (plus:VCVTI
3086 (unspec:VCVTI
3087 [(match_operand:<VSI2QI> 2 "register_operand" "w")
3088 (match_operand:V8QI 3 "register_operand" "t")
3089 (match_operand:SI 4 "immediate_operand" "i")]
3090 DOTPROD_I8MM)
3091 (match_operand:VCVTI 1 "register_operand" "0")))]
3092 "TARGET_I8MM"
3093 {
3094 operands[4] = GEN_INT (INTVAL (operands[4]));
3095 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3096 }
3097 [(set_attr "type" "neon_dot<q>")]
3098)
3099
f8e109ba
TC
3100;; These expands map to the Dot Product optab the vectorizer checks for.
3101;; The auto-vectorizer expects a dot product builtin that also does an
3102;; accumulation into the provided register.
3103;; Given the following pattern
3104;;
3105;; for (i=0; i<len; i++) {
3106;; c = a[i] * b[i];
3107;; r += c;
3108;; }
3109;; return result;
3110;;
3111;; This can be auto-vectorized to
3112;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3113;;
3114;; given enough iterations. However the vectorizer can keep unrolling the loop
3115;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3116;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3117;; ...
3118;;
3119;; and so the vectorizer provides r, in which the result has to be accumulated.
3120(define_expand "<sup>dot_prod<vsi2qi>"
3121 [(set (match_operand:VCVTI 0 "register_operand")
3122 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3123 "register_operand")
3124 (match_operand:<VSI2QI> 2
3125 "register_operand")]
3126 DOTPROD)
3127 (match_operand:VCVTI 3 "register_operand")))]
3128 "TARGET_DOTPROD"
3129{
3130 emit_insn (
3131 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3132 operands[2]));
3133 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3134 DONE;
3135})
3136
97f518b3
JW
3137(define_expand "neon_copysignf<mode>"
3138 [(match_operand:VCVTF 0 "register_operand")
3139 (match_operand:VCVTF 1 "register_operand")
3140 (match_operand:VCVTF 2 "register_operand")]
3141 "TARGET_NEON"
3142 "{
3143 rtx v_bitmask_cast;
3144 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
4199c859 3145 rtx c = gen_int_mode (0x80000000, SImode);
97f518b3
JW
3146
3147 emit_move_insn (v_bitmask,
59d06c05 3148 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
97f518b3
JW
3149 emit_move_insn (operands[0], operands[2]);
3150 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3151 <VCVTF:V_cmp_result>mode, 0);
3152 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3153 operands[1]));
3154
3155 DONE;
3156 }"
3157)
3158
88f77cba
JB
3159(define_insn "neon_vqneg<mode>"
3160 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
94f0f2cc 3161 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
88f77cba
JB
3162 UNSPEC_VQNEG))]
3163 "TARGET_NEON"
c956e102 3164 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3165 [(set_attr "type" "neon_qneg<q>")]
c956e102 3166)
88f77cba
JB
3167
3168(define_insn "neon_vcls<mode>"
3169 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
94f0f2cc 3170 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
88f77cba
JB
3171 UNSPEC_VCLS))]
3172 "TARGET_NEON"
c956e102 3173 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3174 [(set_attr "type" "neon_cls<q>")]
c956e102 3175)
88f77cba 3176
b3b7bbce 3177(define_insn "clz<mode>2"
88f77cba 3178 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
b3b7bbce 3179 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
88f77cba 3180 "TARGET_NEON"
c956e102 3181 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3182 [(set_attr "type" "neon_cnt<q>")]
c956e102 3183)
88f77cba 3184
b3b7bbce 3185(define_expand "neon_vclz<mode>"
cd65e265
DZ
3186 [(match_operand:VDQIW 0 "s_register_operand")
3187 (match_operand:VDQIW 1 "s_register_operand")]
b3b7bbce
SL
3188 "TARGET_NEON"
3189{
3190 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3191 DONE;
3192})
3193
3194(define_insn "popcount<mode>2"
88f77cba 3195 [(set (match_operand:VE 0 "s_register_operand" "=w")
b3b7bbce 3196 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
88f77cba 3197 "TARGET_NEON"
c956e102 3198 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3199 [(set_attr "type" "neon_cnt<q>")]
c956e102 3200)
88f77cba 3201
b3b7bbce 3202(define_expand "neon_vcnt<mode>"
cd65e265
DZ
3203 [(match_operand:VE 0 "s_register_operand")
3204 (match_operand:VE 1 "s_register_operand")]
b3b7bbce
SL
3205 "TARGET_NEON"
3206{
3207 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3208 DONE;
3209})
3210
55a9b91b
MW
3211(define_insn "neon_vrecpe<mode>"
3212 [(set (match_operand:VH 0 "s_register_operand" "=w")
3213 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3214 UNSPEC_VRECPE))]
3215 "TARGET_NEON_FP16INST"
3216 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3217 [(set_attr "type" "neon_fp_recpe_s<q>")]
3218)
3219
88f77cba
JB
3220(define_insn "neon_vrecpe<mode>"
3221 [(set (match_operand:V32 0 "s_register_operand" "=w")
94f0f2cc 3222 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
88f77cba
JB
3223 UNSPEC_VRECPE))]
3224 "TARGET_NEON"
c956e102 3225 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3226 [(set_attr "type" "neon_fp_recpe_s<q>")]
c956e102 3227)
88f77cba
JB
3228
3229(define_insn "neon_vrsqrte<mode>"
3230 [(set (match_operand:V32 0 "s_register_operand" "=w")
94f0f2cc 3231 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
88f77cba
JB
3232 UNSPEC_VRSQRTE))]
3233 "TARGET_NEON"
c956e102 3234 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 3235 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
c956e102 3236)
88f77cba
JB
3237
3238(define_expand "neon_vmvn<mode>"
cd65e265
DZ
3239 [(match_operand:VDQIW 0 "s_register_operand")
3240 (match_operand:VDQIW 1 "s_register_operand")]
88f77cba
JB
3241 "TARGET_NEON"
3242{
fd436034 3243 emit_insn (gen_one_cmpl<mode>2_neon (operands[0], operands[1]));
88f77cba
JB
3244 DONE;
3245})
3246
89ffa8fc
JB
3247(define_insn "neon_vget_lane<mode>_sext_internal"
3248 [(set (match_operand:SI 0 "s_register_operand" "=r")
3249 (sign_extend:SI
3250 (vec_select:<V_elem>
3251 (match_operand:VD 1 "s_register_operand" "w")
3252 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
88f77cba 3253 "TARGET_NEON"
874d42b9
JM
3254{
3255 if (BYTES_BIG_ENDIAN)
3256 {
3257 int elt = INTVAL (operands[2]);
3258 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3259 operands[2] = GEN_INT (elt);
3260 }
c3b1709a 3261 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
874d42b9 3262}
f7379e5e 3263 [(set_attr "type" "neon_to_gp")]
c956e102 3264)
88f77cba 3265
89ffa8fc
JB
3266(define_insn "neon_vget_lane<mode>_zext_internal"
3267 [(set (match_operand:SI 0 "s_register_operand" "=r")
3268 (zero_extend:SI
3269 (vec_select:<V_elem>
3270 (match_operand:VD 1 "s_register_operand" "w")
3271 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3272 "TARGET_NEON"
874d42b9
JM
3273{
3274 if (BYTES_BIG_ENDIAN)
3275 {
3276 int elt = INTVAL (operands[2]);
3277 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3278 operands[2] = GEN_INT (elt);
3279 }
c3b1709a 3280 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
874d42b9 3281}
f7379e5e 3282 [(set_attr "type" "neon_to_gp")]
89ffa8fc 3283)
88f77cba 3284
89ffa8fc
JB
3285(define_insn "neon_vget_lane<mode>_sext_internal"
3286 [(set (match_operand:SI 0 "s_register_operand" "=r")
3287 (sign_extend:SI
3288 (vec_select:<V_elem>
4b644867 3289 (match_operand:VQ2 1 "s_register_operand" "w")
89ffa8fc 3290 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
88f77cba 3291 "TARGET_NEON"
b617fc71 3292{
89ffa8fc
JB
3293 rtx ops[3];
3294 int regno = REGNO (operands[1]);
3295 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3296 unsigned int elt = INTVAL (operands[2]);
874d42b9
JM
3297 unsigned int elt_adj = elt % halfelts;
3298
3299 if (BYTES_BIG_ENDIAN)
3300 elt_adj = halfelts - 1 - elt_adj;
89ffa8fc
JB
3301
3302 ops[0] = operands[0];
3303 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
874d42b9 3304 ops[2] = GEN_INT (elt_adj);
c3b1709a 3305 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
89ffa8fc
JB
3306
3307 return "";
b617fc71 3308}
f7379e5e 3309 [(set_attr "type" "neon_to_gp_q")]
c956e102 3310)
88f77cba 3311
89ffa8fc
JB
3312(define_insn "neon_vget_lane<mode>_zext_internal"
3313 [(set (match_operand:SI 0 "s_register_operand" "=r")
3314 (zero_extend:SI
3315 (vec_select:<V_elem>
4b644867 3316 (match_operand:VQ2 1 "s_register_operand" "w")
89ffa8fc 3317 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
88f77cba
JB
3318 "TARGET_NEON"
3319{
89ffa8fc 3320 rtx ops[3];
88f77cba
JB
3321 int regno = REGNO (operands[1]);
3322 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3323 unsigned int elt = INTVAL (operands[2]);
874d42b9
JM
3324 unsigned int elt_adj = elt % halfelts;
3325
3326 if (BYTES_BIG_ENDIAN)
3327 elt_adj = halfelts - 1 - elt_adj;
88f77cba
JB
3328
3329 ops[0] = operands[0];
3330 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
874d42b9 3331 ops[2] = GEN_INT (elt_adj);
c3b1709a 3332 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
88f77cba
JB
3333
3334 return "";
3335}
f7379e5e 3336 [(set_attr "type" "neon_to_gp_q")]
89ffa8fc
JB
3337)
3338
3339(define_expand "neon_vget_lane<mode>"
cd65e265
DZ
3340 [(match_operand:<V_ext> 0 "s_register_operand")
3341 (match_operand:VDQW 1 "s_register_operand")
3342 (match_operand:SI 2 "immediate_operand")]
89ffa8fc
JB
3343 "TARGET_NEON"
3344{
874d42b9
JM
3345 if (BYTES_BIG_ENDIAN)
3346 {
3347 /* The intrinsics are defined in terms of a model where the
3348 element ordering in memory is vldm order, whereas the generic
3349 RTL is defined in terms of a model where the element ordering
3350 in memory is array order. Convert the lane number to conform
3351 to this model. */
3352 unsigned int elt = INTVAL (operands[2]);
3353 unsigned int reg_nelts
6c825cd4 3354 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
874d42b9
JM
3355 elt ^= reg_nelts - 1;
3356 operands[2] = GEN_INT (elt);
3357 }
3358
6c825cd4 3359 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
ff03930a
JJ
3360 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3361 operands[2]));
89ffa8fc 3362 else
94f0f2cc
JG
3363 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3364 operands[1],
3365 operands[2]));
3366 DONE;
3367})
3368
3369(define_expand "neon_vget_laneu<mode>"
cd65e265
DZ
3370 [(match_operand:<V_ext> 0 "s_register_operand")
3371 (match_operand:VDQIW 1 "s_register_operand")
3372 (match_operand:SI 2 "immediate_operand")]
94f0f2cc
JG
3373 "TARGET_NEON"
3374{
94f0f2cc 3375 if (BYTES_BIG_ENDIAN)
89ffa8fc 3376 {
94f0f2cc
JG
3377 /* The intrinsics are defined in terms of a model where the
3378 element ordering in memory is vldm order, whereas the generic
3379 RTL is defined in terms of a model where the element ordering
3380 in memory is array order. Convert the lane number to conform
3381 to this model. */
3382 unsigned int elt = INTVAL (operands[2]);
3383 unsigned int reg_nelts
6c825cd4 3384 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
94f0f2cc
JG
3385 elt ^= reg_nelts - 1;
3386 operands[2] = GEN_INT (elt);
89ffa8fc 3387 }
94f0f2cc 3388
6c825cd4 3389 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
ff03930a
JJ
3390 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3391 operands[2]));
94f0f2cc
JG
3392 else
3393 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3394 operands[1],
3395 operands[2]));
89ffa8fc
JB
3396 DONE;
3397})
3398
a277dd9b 3399(define_expand "neon_vget_lanedi"
cd65e265
DZ
3400 [(match_operand:DI 0 "s_register_operand")
3401 (match_operand:DI 1 "s_register_operand")
3402 (match_operand:SI 2 "immediate_operand")]
89ffa8fc
JB
3403 "TARGET_NEON"
3404{
a277dd9b
SL
3405 emit_move_insn (operands[0], operands[1]);
3406 DONE;
3407})
88f77cba 3408
a277dd9b 3409(define_expand "neon_vget_lanev2di"
cd65e265
DZ
3410 [(match_operand:DI 0 "s_register_operand")
3411 (match_operand:V2DI 1 "s_register_operand")
3412 (match_operand:SI 2 "immediate_operand")]
88f77cba
JB
3413 "TARGET_NEON"
3414{
69b23ad6
CL
3415 int lane;
3416
3417if (BYTES_BIG_ENDIAN)
3418 {
3419 /* The intrinsics are defined in terms of a model where the
3420 element ordering in memory is vldm order, whereas the generic
3421 RTL is defined in terms of a model where the element ordering
3422 in memory is array order. Convert the lane number to conform
3423 to this model. */
3424 unsigned int elt = INTVAL (operands[2]);
3425 unsigned int reg_nelts = 2;
3426 elt ^= reg_nelts - 1;
3427 operands[2] = GEN_INT (elt);
3428 }
3429
3430 lane = INTVAL (operands[2]);
eaa80f64
AL
3431 gcc_assert ((lane ==0) || (lane == 1));
3432 emit_move_insn (operands[0], lane == 0
3433 ? gen_lowpart (DImode, operands[1])
3434 : gen_highpart (DImode, operands[1]));
a277dd9b
SL
3435 DONE;
3436})
b617fc71 3437
a277dd9b 3438(define_expand "neon_vset_lane<mode>"
cd65e265
DZ
3439 [(match_operand:VDQ 0 "s_register_operand")
3440 (match_operand:<V_elem> 1 "s_register_operand")
3441 (match_operand:VDQ 2 "s_register_operand")
3442 (match_operand:SI 3 "immediate_operand")]
88f77cba
JB
3443 "TARGET_NEON"
3444{
88f77cba
JB
3445 unsigned int elt = INTVAL (operands[3]);
3446
a277dd9b
SL
3447 if (BYTES_BIG_ENDIAN)
3448 {
3449 unsigned int reg_nelts
6c825cd4 3450 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
a277dd9b
SL
3451 elt ^= reg_nelts - 1;
3452 }
b617fc71 3453
a277dd9b
SL
3454 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3455 GEN_INT (1 << elt), operands[2]));
3456 DONE;
3457})
88f77cba 3458
a277dd9b 3459; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
88f77cba 3460
a277dd9b 3461(define_expand "neon_vset_lanedi"
cd65e265
DZ
3462 [(match_operand:DI 0 "s_register_operand")
3463 (match_operand:DI 1 "s_register_operand")
3464 (match_operand:DI 2 "s_register_operand")
3465 (match_operand:SI 3 "immediate_operand")]
88f77cba
JB
3466 "TARGET_NEON"
3467{
a277dd9b
SL
3468 emit_move_insn (operands[0], operands[1]);
3469 DONE;
3470})
88f77cba
JB
3471
3472(define_expand "neon_vcreate<mode>"
cd65e265
DZ
3473 [(match_operand:VD_RE 0 "s_register_operand")
3474 (match_operand:DI 1 "general_operand")]
88f77cba
JB
3475 "TARGET_NEON"
3476{
3477 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3478 emit_move_insn (operands[0], src);
3479 DONE;
3480})
3481
3482(define_insn "neon_vdup_n<mode>"
814a4c3b 3483 [(set (match_operand:VX 0 "s_register_operand" "=w")
a277dd9b 3484 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
88f77cba 3485 "TARGET_NEON"
c3b1709a 3486 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
f7379e5e 3487 [(set_attr "type" "neon_from_gp<q>")]
c956e102 3488)
88f77cba 3489
92422235
CL
3490(define_insn "neon_vdup_nv4hf"
3491 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3492 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3493 "TARGET_NEON"
3494 "vdup.16\t%P0, %1"
3495 [(set_attr "type" "neon_from_gp")]
3496)
3497
3498(define_insn "neon_vdup_nv8hf"
3499 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3500 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3501 "TARGET_NEON"
3502 "vdup.16\t%q0, %1"
3503 [(set_attr "type" "neon_from_gp_q")]
3504)
3505
17a13507
MI
3506(define_insn "neon_vdup_nv4bf"
3507 [(set (match_operand:V4BF 0 "s_register_operand" "=w")
3508 (vec_duplicate:V4BF (match_operand:BF 1 "s_register_operand" "r")))]
3509 "TARGET_NEON"
3510 "vdup.16\t%P0, %1"
3511 [(set_attr "type" "neon_from_gp")]
3512)
3513
3514(define_insn "neon_vdup_nv8bf"
3515 [(set (match_operand:V8BF 0 "s_register_operand" "=w")
3516 (vec_duplicate:V8BF (match_operand:BF 1 "s_register_operand" "r")))]
3517 "TARGET_NEON"
3518 "vdup.16\t%q0, %1"
3519 [(set_attr "type" "neon_from_gp_q")]
3520)
3521
814a4c3b
DJ
3522(define_insn "neon_vdup_n<mode>"
3523 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
a277dd9b 3524 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
814a4c3b
DJ
3525 "TARGET_NEON"
3526 "@
c3b1709a
RR
3527 vdup.<V_sz_elem>\t%<V_reg>0, %1
3528 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
f7379e5e 3529 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
814a4c3b
DJ
3530)
3531
a277dd9b 3532(define_expand "neon_vdup_ndi"
cd65e265
DZ
3533 [(match_operand:DI 0 "s_register_operand")
3534 (match_operand:DI 1 "s_register_operand")]
88f77cba 3535 "TARGET_NEON"
a277dd9b
SL
3536{
3537 emit_move_insn (operands[0], operands[1]);
3538 DONE;
3539}
c956e102 3540)
88f77cba
JB
3541
3542(define_insn "neon_vdup_nv2di"
a277dd9b
SL
3543 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3544 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
88f77cba 3545 "TARGET_NEON"
a277dd9b 3546 "@
c3b1709a
RR
3547 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3548 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3549 [(set_attr "length" "8")
f7379e5e 3550 (set_attr "type" "multiple")]
c956e102 3551)
88f77cba 3552
a277dd9b
SL
3553(define_insn "neon_vdup_lane<mode>_internal"
3554 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3555 (vec_duplicate:VDQW
3556 (vec_select:<V_elem>
3557 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3558 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
88f77cba 3559 "TARGET_NEON"
b617fc71 3560{
a277dd9b
SL
3561 if (BYTES_BIG_ENDIAN)
3562 {
3563 int elt = INTVAL (operands[2]);
3564 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3565 operands[2] = GEN_INT (elt);
3566 }
3567 if (<Is_d_reg>)
3568 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3569 else
3570 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
b617fc71 3571}
f7379e5e 3572 [(set_attr "type" "neon_dup<q>")]
c956e102 3573)
88f77cba 3574
b1a970a5 3575(define_insn "neon_vdup_lane<mode>_internal"
17a13507
MI
3576 [(set (match_operand:VHFBF 0 "s_register_operand" "=w")
3577 (vec_duplicate:VHFBF
b1a970a5
MW
3578 (vec_select:<V_elem>
3579 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3580 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
17a13507 3581 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
b1a970a5
MW
3582{
3583 if (BYTES_BIG_ENDIAN)
3584 {
3585 int elt = INTVAL (operands[2]);
3586 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3587 operands[2] = GEN_INT (elt);
3588 }
3589 if (<Is_d_reg>)
3590 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3591 else
3592 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3593}
3594 [(set_attr "type" "neon_dup<q>")]
3595)
3596
a277dd9b 3597(define_expand "neon_vdup_lane<mode>"
cd65e265
DZ
3598 [(match_operand:VDQW 0 "s_register_operand")
3599 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3600 (match_operand:SI 2 "immediate_operand")]
88f77cba 3601 "TARGET_NEON"
b617fc71 3602{
a277dd9b
SL
3603 if (BYTES_BIG_ENDIAN)
3604 {
3605 unsigned int elt = INTVAL (operands[2]);
3606 unsigned int reg_nelts
6c825cd4 3607 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
a277dd9b
SL
3608 elt ^= reg_nelts - 1;
3609 operands[2] = GEN_INT (elt);
3610 }
3611 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3612 operands[2]));
3613 DONE;
3614})
88f77cba 3615
b1a970a5 3616(define_expand "neon_vdup_lane<mode>"
17a13507 3617 [(match_operand:VHFBF 0 "s_register_operand")
b1a970a5
MW
3618 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3619 (match_operand:SI 2 "immediate_operand")]
17a13507 3620 "TARGET_NEON && (TARGET_FP16 || TARGET_BF16_SIMD)"
b1a970a5
MW
3621{
3622 if (BYTES_BIG_ENDIAN)
3623 {
3624 unsigned int elt = INTVAL (operands[2]);
3625 unsigned int reg_nelts
3626 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3627 elt ^= reg_nelts - 1;
3628 operands[2] = GEN_INT (elt);
3629 }
3630 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3631 operands[2]));
3632 DONE;
3633})
3634
88f77cba
JB
3635; Scalar index is ignored, since only zero is valid here.
3636(define_expand "neon_vdup_lanedi"
cd65e265
DZ
3637 [(match_operand:DI 0 "s_register_operand")
3638 (match_operand:DI 1 "s_register_operand")
3639 (match_operand:SI 2 "immediate_operand")]
88f77cba
JB
3640 "TARGET_NEON"
3641{
3642 emit_move_insn (operands[0], operands[1]);
3643 DONE;
3644})
3645
a277dd9b
SL
3646; Likewise for v2di, as the DImode second operand has only a single element.
3647(define_expand "neon_vdup_lanev2di"
cd65e265
DZ
3648 [(match_operand:V2DI 0 "s_register_operand")
3649 (match_operand:DI 1 "s_register_operand")
3650 (match_operand:SI 2 "immediate_operand")]
88f77cba 3651 "TARGET_NEON"
b617fc71 3652{
a277dd9b
SL
3653 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3654 DONE;
3655})
88f77cba 3656
b440f324
RH
3657; Disabled before reload because we don't want combine doing something silly,
3658; but used by the post-reload expansion of neon_vcombine.
3659(define_insn "*neon_vswp<mode>"
3660 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3661 (match_operand:VDQX 1 "s_register_operand" "+w"))
3662 (set (match_dup 1) (match_dup 0))]
3663 "TARGET_NEON && reload_completed"
dc2c7a52 3664 "vswp\t%<V_reg>0, %<V_reg>1"
f7379e5e 3665 [(set_attr "type" "neon_permute<q>")]
b440f324
RH
3666)
3667
88f77cba
JB
3668;; In this insn, operand 1 should be low, and operand 2 the high part of the
3669;; dest vector.
3670;; FIXME: A different implementation of this builtin could make it much
3671;; more likely that we wouldn't actually need to output anything (we could make
3672;; it so that the reg allocator puts things in the right places magically
3673;; instead). Lack of subregs for vectors makes that tricky though, I think.
3674
b440f324 3675(define_insn_and_split "neon_vcombine<mode>"
88f77cba 3676 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
b440f324
RH
3677 (vec_concat:<V_DOUBLE>
3678 (match_operand:VDX 1 "s_register_operand" "w")
3679 (match_operand:VDX 2 "s_register_operand" "w")))]
88f77cba 3680 "TARGET_NEON"
b440f324
RH
3681 "#"
3682 "&& reload_completed"
3683 [(const_int 0)]
88f77cba 3684{
b440f324
RH
3685 neon_split_vcombine (operands);
3686 DONE;
f7379e5e
JG
3687}
3688[(set_attr "type" "multiple")]
3689)
88f77cba 3690
ddfd2edf
RS
3691(define_expand "neon_vget_high<mode>"
3692 [(match_operand:<V_HALF> 0 "s_register_operand")
2d22ab64 3693 (match_operand:VQXBF 1 "s_register_operand")]
a277dd9b
SL
3694 "TARGET_NEON"
3695{
ddfd2edf
RS
3696 emit_move_insn (operands[0],
3697 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3698 GET_MODE_SIZE (<V_HALF>mode)));
3699 DONE;
3700})
a277dd9b 3701
ddfd2edf
RS
3702(define_expand "neon_vget_low<mode>"
3703 [(match_operand:<V_HALF> 0 "s_register_operand")
3704 (match_operand:VQX 1 "s_register_operand")]
88f77cba
JB
3705 "TARGET_NEON"
3706{
ddfd2edf
RS
3707 emit_move_insn (operands[0],
3708 simplify_gen_subreg (<V_HALF>mode, operands[1],
3709 <MODE>mode, 0));
3710 DONE;
3711})
88f77cba 3712
5bf4dcf2
DP
3713(define_insn "float<mode><V_cvtto>2"
3714 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3715 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3716 "TARGET_NEON && !flag_rounding_math"
3717 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3718 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
5bf4dcf2
DP
3719)
3720
3721(define_insn "floatuns<mode><V_cvtto>2"
3722 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3723 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3724 "TARGET_NEON && !flag_rounding_math"
3725 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3726 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
5bf4dcf2
DP
3727)
3728
3729(define_insn "fix_trunc<mode><V_cvtto>2"
3730 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3731 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3732 "TARGET_NEON"
3733 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3734 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
5bf4dcf2
DP
3735)
3736
3737(define_insn "fixuns_trunc<mode><V_cvtto>2"
3738 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3739 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3740 "TARGET_NEON"
3741 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3742 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
5bf4dcf2
DP
3743)
3744
94f0f2cc 3745(define_insn "neon_vcvt<sup><mode>"
88f77cba 3746 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
94f0f2cc
JG
3747 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3748 VCVT_US))]
88f77cba 3749 "TARGET_NEON"
94f0f2cc 3750 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3751 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
c956e102 3752)
88f77cba 3753
94f0f2cc 3754(define_insn "neon_vcvt<sup><mode>"
88f77cba 3755 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
94f0f2cc
JG
3756 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3757 VCVT_US))]
88f77cba 3758 "TARGET_NEON"
94f0f2cc 3759 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
f7379e5e 3760 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
c956e102 3761)
88f77cba 3762
5819f96f
KT
3763(define_insn "neon_vcvtv4sfv4hf"
3764 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3765 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3766 UNSPEC_VCVT))]
3767 "TARGET_NEON && TARGET_FP16"
3768 "vcvt.f32.f16\t%q0, %P1"
f7379e5e 3769 [(set_attr "type" "neon_fp_cvt_widen_h")]
5819f96f
KT
3770)
3771
3772(define_insn "neon_vcvtv4hfv4sf"
3773 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3774 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3775 UNSPEC_VCVT))]
3776 "TARGET_NEON && TARGET_FP16"
3777 "vcvt.f16.f32\t%P0, %q1"
f7379e5e 3778 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
5819f96f
KT
3779)
3780
55a9b91b
MW
3781(define_insn "neon_vcvt<sup><mode>"
3782 [(set
3783 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3784 (unspec:<VH_CVTTO>
3785 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3786 VCVT_US))]
3787 "TARGET_NEON_FP16INST"
3788 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3789 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3790)
3791
3792(define_insn "neon_vcvt<sup><mode>"
3793 [(set
3794 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3795 (unspec:<VH_CVTTO>
3796 [(match_operand:VH 1 "s_register_operand" "w")]
3797 VCVT_US))]
3798 "TARGET_NEON_FP16INST"
3799 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3800 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3801)
3802
94f0f2cc 3803(define_insn "neon_vcvt<sup>_n<mode>"
88f77cba
JB
3804 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3805 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
94f0f2cc
JG
3806 (match_operand:SI 2 "immediate_operand" "i")]
3807 VCVT_US_N))]
88f77cba 3808 "TARGET_NEON"
b617fc71 3809{
d57daa0c 3810 arm_const_bounds (operands[2], 1, 33);
94f0f2cc 3811 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 3812}
f7379e5e 3813 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
c956e102 3814)
88f77cba 3815
55a9b91b
MW
3816(define_insn "neon_vcvt<sup>_n<mode>"
3817 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3818 (unspec:<VH_CVTTO>
3819 [(match_operand:VH 1 "s_register_operand" "w")
3820 (match_operand:SI 2 "immediate_operand" "i")]
3821 VCVT_US_N))]
3822 "TARGET_NEON_FP16INST"
3823{
d57daa0c 3824 arm_const_bounds (operands[2], 0, 17);
55a9b91b
MW
3825 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3826}
3827 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3828)
3829
94f0f2cc 3830(define_insn "neon_vcvt<sup>_n<mode>"
88f77cba
JB
3831 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3832 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
94f0f2cc
JG
3833 (match_operand:SI 2 "immediate_operand" "i")]
3834 VCVT_US_N))]
88f77cba 3835 "TARGET_NEON"
b617fc71 3836{
d57daa0c 3837 arm_const_bounds (operands[2], 1, 33);
94f0f2cc 3838 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 3839}
f7379e5e 3840 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
c956e102 3841)
88f77cba 3842
55a9b91b
MW
3843(define_insn "neon_vcvt<sup>_n<mode>"
3844 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3845 (unspec:<VH_CVTTO>
3846 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3847 (match_operand:SI 2 "immediate_operand" "i")]
3848 VCVT_US_N))]
3849 "TARGET_NEON_FP16INST"
3850{
d57daa0c 3851 arm_const_bounds (operands[2], 0, 17);
55a9b91b
MW
3852 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3853}
3854 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3855)
3856
3857(define_insn "neon_vcvt<vcvth_op><sup><mode>"
3858 [(set
3859 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3860 (unspec:<VH_CVTTO>
3861 [(match_operand:VH 1 "s_register_operand" "w")]
3862 VCVT_HF_US))]
3863 "TARGET_NEON_FP16INST"
3864 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3865 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3866)
3867
88f77cba
JB
3868(define_insn "neon_vmovn<mode>"
3869 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
94f0f2cc 3870 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
88f77cba
JB
3871 UNSPEC_VMOVN))]
3872 "TARGET_NEON"
c956e102 3873 "vmovn.<V_if_elem>\t%P0, %q1"
f7379e5e 3874 [(set_attr "type" "neon_shift_imm_narrow_q")]
c956e102 3875)
88f77cba 3876
94f0f2cc 3877(define_insn "neon_vqmovn<sup><mode>"
88f77cba 3878 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
94f0f2cc
JG
3879 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3880 VQMOVN))]
88f77cba 3881 "TARGET_NEON"
94f0f2cc 3882 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
f7379e5e 3883 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
c956e102 3884)
88f77cba
JB
3885
3886(define_insn "neon_vqmovun<mode>"
3887 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
94f0f2cc 3888 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
88f77cba
JB
3889 UNSPEC_VQMOVUN))]
3890 "TARGET_NEON"
c956e102 3891 "vqmovun.<V_s_elem>\t%P0, %q1"
f7379e5e 3892 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
c956e102 3893)
88f77cba 3894
94f0f2cc 3895(define_insn "neon_vmovl<sup><mode>"
88f77cba 3896 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
94f0f2cc
JG
3897 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3898 VMOVL))]
88f77cba 3899 "TARGET_NEON"
94f0f2cc 3900 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
f7379e5e 3901 [(set_attr "type" "neon_shift_imm_long")]
c956e102 3902)
88f77cba
JB
3903
3904(define_insn "neon_vmul_lane<mode>"
3905 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3906 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3907 (match_operand:VMD 2 "s_register_operand"
3908 "<scalar_mul_constraint>")
94f0f2cc 3909 (match_operand:SI 3 "immediate_operand" "i")]
88f77cba
JB
3910 UNSPEC_VMUL_LANE))]
3911 "TARGET_NEON"
b617fc71 3912{
b617fc71
JB
3913 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3914}
003bb7f3 3915 [(set (attr "type")
b75b1be2 3916 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
3917 (const_string "neon_fp_mul_s_scalar<q>")
3918 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
c956e102 3919)
88f77cba
JB
3920
3921(define_insn "neon_vmul_lane<mode>"
3922 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3923 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3924 (match_operand:<V_HALF> 2 "s_register_operand"
3925 "<scalar_mul_constraint>")
94f0f2cc 3926 (match_operand:SI 3 "immediate_operand" "i")]
88f77cba
JB
3927 UNSPEC_VMUL_LANE))]
3928 "TARGET_NEON"
b617fc71 3929{
b617fc71
JB
3930 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3931}
003bb7f3 3932 [(set (attr "type")
b75b1be2 3933 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
3934 (const_string "neon_fp_mul_s_scalar<q>")
3935 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
c956e102 3936)
88f77cba 3937
55a9b91b
MW
3938(define_insn "neon_vmul_lane<mode>"
3939 [(set (match_operand:VH 0 "s_register_operand" "=w")
3940 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3941 (match_operand:V4HF 2 "s_register_operand"
3942 "<scalar_mul_constraint>")
3943 (match_operand:SI 3 "immediate_operand" "i")]
3944 UNSPEC_VMUL_LANE))]
3945 "TARGET_NEON_FP16INST"
3946 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3947 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3948)
3949
94f0f2cc 3950(define_insn "neon_vmull<sup>_lane<mode>"
88f77cba
JB
3951 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3952 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3953 (match_operand:VMDI 2 "s_register_operand"
3954 "<scalar_mul_constraint>")
94f0f2cc
JG
3955 (match_operand:SI 3 "immediate_operand" "i")]
3956 VMULL_LANE))]
88f77cba 3957 "TARGET_NEON"
b617fc71 3958{
94f0f2cc 3959 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
b617fc71 3960}
f7379e5e 3961 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
c956e102 3962)
88f77cba
JB
3963
3964(define_insn "neon_vqdmull_lane<mode>"
3965 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3966 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3967 (match_operand:VMDI 2 "s_register_operand"
3968 "<scalar_mul_constraint>")
94f0f2cc 3969 (match_operand:SI 3 "immediate_operand" "i")]
88f77cba
JB
3970 UNSPEC_VQDMULL_LANE))]
3971 "TARGET_NEON"
b617fc71 3972{
b617fc71
JB
3973 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3974}
f7379e5e 3975 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
c956e102 3976)
88f77cba 3977
94f0f2cc 3978(define_insn "neon_vq<r>dmulh_lane<mode>"
88f77cba
JB
3979 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3980 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3981 (match_operand:<V_HALF> 2 "s_register_operand"
3982 "<scalar_mul_constraint>")
94f0f2cc
JG
3983 (match_operand:SI 3 "immediate_operand" "i")]
3984 VQDMULH_LANE))]
88f77cba 3985 "TARGET_NEON"
b617fc71 3986{
94f0f2cc 3987 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
b617fc71 3988}
f7379e5e 3989 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
c956e102 3990)
88f77cba 3991
94f0f2cc 3992(define_insn "neon_vq<r>dmulh_lane<mode>"
88f77cba
JB
3993 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3994 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3995 (match_operand:VMDI 2 "s_register_operand"
3996 "<scalar_mul_constraint>")
94f0f2cc
JG
3997 (match_operand:SI 3 "immediate_operand" "i")]
3998 VQDMULH_LANE))]
88f77cba 3999 "TARGET_NEON"
b617fc71 4000{
94f0f2cc 4001 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
b617fc71 4002}
f7379e5e 4003 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
c956e102 4004)
88f77cba 4005
5f2ca3b2
MW
4006;; vqrdmlah_lane, vqrdmlsh_lane
4007(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4008 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4009 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4010 (match_operand:VMQI 2 "s_register_operand" "w")
4011 (match_operand:<V_HALF> 3 "s_register_operand"
4012 "<scalar_mul_constraint>")
4013 (match_operand:SI 4 "immediate_operand" "i")]
4014 VQRDMLH_AS))]
4015 "TARGET_NEON_RDMA"
4016{
4017 return
4018 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4019}
4020 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4021)
4022
4023(define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4024 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4025 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4026 (match_operand:VMDI 2 "s_register_operand" "w")
4027 (match_operand:VMDI 3 "s_register_operand"
4028 "<scalar_mul_constraint>")
4029 (match_operand:SI 4 "immediate_operand" "i")]
4030 VQRDMLH_AS))]
4031 "TARGET_NEON_RDMA"
4032{
4033 return
4034 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4035}
4036 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4037)
4038
88f77cba
JB
4039(define_insn "neon_vmla_lane<mode>"
4040 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4041 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4042 (match_operand:VMD 2 "s_register_operand" "w")
4043 (match_operand:VMD 3 "s_register_operand"
4044 "<scalar_mul_constraint>")
94f0f2cc 4045 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4046 UNSPEC_VMLA_LANE))]
4047 "TARGET_NEON"
b617fc71 4048{
b617fc71
JB
4049 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4050}
003bb7f3 4051 [(set (attr "type")
b75b1be2 4052 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4053 (const_string "neon_fp_mla_s_scalar<q>")
4054 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
c956e102 4055)
88f77cba
JB
4056
4057(define_insn "neon_vmla_lane<mode>"
4058 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4059 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4060 (match_operand:VMQ 2 "s_register_operand" "w")
4061 (match_operand:<V_HALF> 3 "s_register_operand"
4062 "<scalar_mul_constraint>")
94f0f2cc 4063 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4064 UNSPEC_VMLA_LANE))]
4065 "TARGET_NEON"
b617fc71 4066{
b617fc71
JB
4067 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4068}
003bb7f3 4069 [(set (attr "type")
b75b1be2 4070 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4071 (const_string "neon_fp_mla_s_scalar<q>")
4072 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
c956e102 4073)
88f77cba 4074
94f0f2cc 4075(define_insn "neon_vmlal<sup>_lane<mode>"
88f77cba
JB
4076 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4077 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4078 (match_operand:VMDI 2 "s_register_operand" "w")
4079 (match_operand:VMDI 3 "s_register_operand"
4080 "<scalar_mul_constraint>")
94f0f2cc
JG
4081 (match_operand:SI 4 "immediate_operand" "i")]
4082 VMLAL_LANE))]
88f77cba 4083 "TARGET_NEON"
b617fc71 4084{
94f0f2cc 4085 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
b617fc71 4086}
f7379e5e 4087 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
c956e102 4088)
88f77cba
JB
4089
4090(define_insn "neon_vqdmlal_lane<mode>"
4091 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4092 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4093 (match_operand:VMDI 2 "s_register_operand" "w")
4094 (match_operand:VMDI 3 "s_register_operand"
4095 "<scalar_mul_constraint>")
94f0f2cc 4096 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4097 UNSPEC_VQDMLAL_LANE))]
4098 "TARGET_NEON"
b617fc71 4099{
b617fc71
JB
4100 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4101}
f7379e5e 4102 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
c956e102 4103)
88f77cba
JB
4104
4105(define_insn "neon_vmls_lane<mode>"
4106 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4107 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4108 (match_operand:VMD 2 "s_register_operand" "w")
4109 (match_operand:VMD 3 "s_register_operand"
4110 "<scalar_mul_constraint>")
94f0f2cc 4111 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4112 UNSPEC_VMLS_LANE))]
4113 "TARGET_NEON"
b617fc71 4114{
b617fc71
JB
4115 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4116}
003bb7f3 4117 [(set (attr "type")
b75b1be2 4118 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4119 (const_string "neon_fp_mla_s_scalar<q>")
4120 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
c956e102 4121)
88f77cba
JB
4122
4123(define_insn "neon_vmls_lane<mode>"
4124 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4125 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4126 (match_operand:VMQ 2 "s_register_operand" "w")
4127 (match_operand:<V_HALF> 3 "s_register_operand"
4128 "<scalar_mul_constraint>")
94f0f2cc 4129 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4130 UNSPEC_VMLS_LANE))]
4131 "TARGET_NEON"
b617fc71 4132{
b617fc71
JB
4133 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4134}
003bb7f3 4135 [(set (attr "type")
b75b1be2 4136 (if_then_else (match_test "<Is_float_mode>")
f7379e5e
JG
4137 (const_string "neon_fp_mla_s_scalar<q>")
4138 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
c956e102 4139)
88f77cba 4140
94f0f2cc 4141(define_insn "neon_vmlsl<sup>_lane<mode>"
88f77cba
JB
4142 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4143 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4144 (match_operand:VMDI 2 "s_register_operand" "w")
4145 (match_operand:VMDI 3 "s_register_operand"
4146 "<scalar_mul_constraint>")
94f0f2cc
JG
4147 (match_operand:SI 4 "immediate_operand" "i")]
4148 VMLSL_LANE))]
88f77cba 4149 "TARGET_NEON"
b617fc71 4150{
94f0f2cc 4151 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
b617fc71 4152}
f7379e5e 4153 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
c956e102 4154)
88f77cba
JB
4155
4156(define_insn "neon_vqdmlsl_lane<mode>"
4157 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4158 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4159 (match_operand:VMDI 2 "s_register_operand" "w")
4160 (match_operand:VMDI 3 "s_register_operand"
4161 "<scalar_mul_constraint>")
94f0f2cc 4162 (match_operand:SI 4 "immediate_operand" "i")]
88f77cba
JB
4163 UNSPEC_VQDMLSL_LANE))]
4164 "TARGET_NEON"
b617fc71 4165{
b617fc71
JB
4166 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4167}
f7379e5e 4168 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
c956e102 4169)
88f77cba
JB
4170
4171; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4172; core register into a temp register, then use a scalar taken from that. This
4173; isn't an optimal solution if e.g. the scalar has just been read from memory
4174; or extracted from another vector. The latter case it's currently better to
4175; use the "_lane" variant, and the former case can probably be implemented
4176; using vld1_lane, but that hasn't been done yet.
4177
4178(define_expand "neon_vmul_n<mode>"
cd65e265
DZ
4179 [(match_operand:VMD 0 "s_register_operand")
4180 (match_operand:VMD 1 "s_register_operand")
4181 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4182 "TARGET_NEON"
4183{
4184 rtx tmp = gen_reg_rtx (<MODE>mode);
4185 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4186 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc 4187 const0_rtx));
88f77cba
JB
4188 DONE;
4189})
4190
4191(define_expand "neon_vmul_n<mode>"
cd65e265
DZ
4192 [(match_operand:VMQ 0 "s_register_operand")
4193 (match_operand:VMQ 1 "s_register_operand")
4194 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4195 "TARGET_NEON"
4196{
4197 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4198 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4199 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc 4200 const0_rtx));
88f77cba
JB
4201 DONE;
4202})
4203
55a9b91b
MW
4204(define_expand "neon_vmul_n<mode>"
4205 [(match_operand:VH 0 "s_register_operand")
4206 (match_operand:VH 1 "s_register_operand")
4207 (match_operand:<V_elem> 2 "s_register_operand")]
4208 "TARGET_NEON_FP16INST"
4209{
4210 rtx tmp = gen_reg_rtx (V4HFmode);
4211 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4212 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4213 const0_rtx));
4214 DONE;
4215})
4216
94f0f2cc 4217(define_expand "neon_vmulls_n<mode>"
cd65e265
DZ
4218 [(match_operand:<V_widen> 0 "s_register_operand")
4219 (match_operand:VMDI 1 "s_register_operand")
4220 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4221 "TARGET_NEON"
4222{
4223 rtx tmp = gen_reg_rtx (<MODE>mode);
4224 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
94f0f2cc
JG
4225 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4226 const0_rtx));
4227 DONE;
4228})
4229
4230(define_expand "neon_vmullu_n<mode>"
cd65e265
DZ
4231 [(match_operand:<V_widen> 0 "s_register_operand")
4232 (match_operand:VMDI 1 "s_register_operand")
4233 (match_operand:<V_elem> 2 "s_register_operand")]
94f0f2cc
JG
4234 "TARGET_NEON"
4235{
4236 rtx tmp = gen_reg_rtx (<MODE>mode);
4237 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4238 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4239 const0_rtx));
88f77cba
JB
4240 DONE;
4241})
4242
4243(define_expand "neon_vqdmull_n<mode>"
cd65e265
DZ
4244 [(match_operand:<V_widen> 0 "s_register_operand")
4245 (match_operand:VMDI 1 "s_register_operand")
4246 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4247 "TARGET_NEON"
4248{
4249 rtx tmp = gen_reg_rtx (<MODE>mode);
4250 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4251 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc 4252 const0_rtx));
88f77cba
JB
4253 DONE;
4254})
4255
4256(define_expand "neon_vqdmulh_n<mode>"
cd65e265
DZ
4257 [(match_operand:VMDI 0 "s_register_operand")
4258 (match_operand:VMDI 1 "s_register_operand")
4259 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4260 "TARGET_NEON"
4261{
4262 rtx tmp = gen_reg_rtx (<MODE>mode);
4263 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4264 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc
JG
4265 const0_rtx));
4266 DONE;
4267})
4268
4269(define_expand "neon_vqrdmulh_n<mode>"
cd65e265
DZ
4270 [(match_operand:VMDI 0 "s_register_operand")
4271 (match_operand:VMDI 1 "s_register_operand")
4272 (match_operand:<V_elem> 2 "s_register_operand")]
94f0f2cc
JG
4273 "TARGET_NEON"
4274{
4275 rtx tmp = gen_reg_rtx (<MODE>mode);
4276 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4277 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4278 const0_rtx));
88f77cba
JB
4279 DONE;
4280})
4281
4282(define_expand "neon_vqdmulh_n<mode>"
cd65e265
DZ
4283 [(match_operand:VMQI 0 "s_register_operand")
4284 (match_operand:VMQI 1 "s_register_operand")
4285 (match_operand:<V_elem> 2 "s_register_operand")]
88f77cba
JB
4286 "TARGET_NEON"
4287{
4288 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4289 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4290 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
94f0f2cc
JG
4291 const0_rtx));
4292 DONE;
4293})
4294
4295(define_expand "neon_vqrdmulh_n<mode>"
cd65e265
DZ
4296 [(match_operand:VMQI 0 "s_register_operand")
4297 (match_operand:VMQI 1 "s_register_operand")
4298 (match_operand:<V_elem> 2 "s_register_operand")]
94f0f2cc
JG
4299 "TARGET_NEON"
4300{
4301 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4302 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4303 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4304 const0_rtx));
88f77cba
JB
4305 DONE;
4306})
4307
4308(define_expand "neon_vmla_n<mode>"
cd65e265
DZ
4309 [(match_operand:VMD 0 "s_register_operand")
4310 (match_operand:VMD 1 "s_register_operand")
4311 (match_operand:VMD 2 "s_register_operand")
4312 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4313 "TARGET_NEON"
4314{
4315 rtx tmp = gen_reg_rtx (<MODE>mode);
4316 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4317 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4318 tmp, const0_rtx));
88f77cba
JB
4319 DONE;
4320})
4321
4322(define_expand "neon_vmla_n<mode>"
cd65e265
DZ
4323 [(match_operand:VMQ 0 "s_register_operand")
4324 (match_operand:VMQ 1 "s_register_operand")
4325 (match_operand:VMQ 2 "s_register_operand")
4326 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4327 "TARGET_NEON"
4328{
4329 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4330 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4331 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4332 tmp, const0_rtx));
88f77cba
JB
4333 DONE;
4334})
4335
94f0f2cc 4336(define_expand "neon_vmlals_n<mode>"
cd65e265
DZ
4337 [(match_operand:<V_widen> 0 "s_register_operand")
4338 (match_operand:<V_widen> 1 "s_register_operand")
4339 (match_operand:VMDI 2 "s_register_operand")
4340 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4341 "TARGET_NEON"
4342{
4343 rtx tmp = gen_reg_rtx (<MODE>mode);
4344 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
94f0f2cc
JG
4345 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4346 tmp, const0_rtx));
4347 DONE;
4348})
4349
4350(define_expand "neon_vmlalu_n<mode>"
cd65e265
DZ
4351 [(match_operand:<V_widen> 0 "s_register_operand")
4352 (match_operand:<V_widen> 1 "s_register_operand")
4353 (match_operand:VMDI 2 "s_register_operand")
4354 (match_operand:<V_elem> 3 "s_register_operand")]
94f0f2cc
JG
4355 "TARGET_NEON"
4356{
4357 rtx tmp = gen_reg_rtx (<MODE>mode);
4358 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4359 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4360 tmp, const0_rtx));
88f77cba
JB
4361 DONE;
4362})
4363
4364(define_expand "neon_vqdmlal_n<mode>"
cd65e265
DZ
4365 [(match_operand:<V_widen> 0 "s_register_operand")
4366 (match_operand:<V_widen> 1 "s_register_operand")
4367 (match_operand:VMDI 2 "s_register_operand")
4368 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4369 "TARGET_NEON"
4370{
4371 rtx tmp = gen_reg_rtx (<MODE>mode);
4372 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4373 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4374 tmp, const0_rtx));
88f77cba
JB
4375 DONE;
4376})
4377
4378(define_expand "neon_vmls_n<mode>"
cd65e265
DZ
4379 [(match_operand:VMD 0 "s_register_operand")
4380 (match_operand:VMD 1 "s_register_operand")
4381 (match_operand:VMD 2 "s_register_operand")
4382 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4383 "TARGET_NEON"
4384{
4385 rtx tmp = gen_reg_rtx (<MODE>mode);
4386 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4387 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4388 tmp, const0_rtx));
88f77cba
JB
4389 DONE;
4390})
4391
4392(define_expand "neon_vmls_n<mode>"
cd65e265
DZ
4393 [(match_operand:VMQ 0 "s_register_operand")
4394 (match_operand:VMQ 1 "s_register_operand")
4395 (match_operand:VMQ 2 "s_register_operand")
4396 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4397 "TARGET_NEON"
4398{
4399 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4400 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4401 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc
JG
4402 tmp, const0_rtx));
4403 DONE;
4404})
4405
4406(define_expand "neon_vmlsls_n<mode>"
cd65e265
DZ
4407 [(match_operand:<V_widen> 0 "s_register_operand")
4408 (match_operand:<V_widen> 1 "s_register_operand")
4409 (match_operand:VMDI 2 "s_register_operand")
4410 (match_operand:<V_elem> 3 "s_register_operand")]
94f0f2cc
JG
4411 "TARGET_NEON"
4412{
4413 rtx tmp = gen_reg_rtx (<MODE>mode);
4414 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4415 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4416 tmp, const0_rtx));
88f77cba
JB
4417 DONE;
4418})
4419
94f0f2cc 4420(define_expand "neon_vmlslu_n<mode>"
cd65e265
DZ
4421 [(match_operand:<V_widen> 0 "s_register_operand")
4422 (match_operand:<V_widen> 1 "s_register_operand")
4423 (match_operand:VMDI 2 "s_register_operand")
4424 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4425 "TARGET_NEON"
4426{
4427 rtx tmp = gen_reg_rtx (<MODE>mode);
4428 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
94f0f2cc
JG
4429 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4430 tmp, const0_rtx));
88f77cba
JB
4431 DONE;
4432})
4433
4434(define_expand "neon_vqdmlsl_n<mode>"
cd65e265
DZ
4435 [(match_operand:<V_widen> 0 "s_register_operand")
4436 (match_operand:<V_widen> 1 "s_register_operand")
4437 (match_operand:VMDI 2 "s_register_operand")
4438 (match_operand:<V_elem> 3 "s_register_operand")]
88f77cba
JB
4439 "TARGET_NEON"
4440{
4441 rtx tmp = gen_reg_rtx (<MODE>mode);
4442 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4443 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
94f0f2cc 4444 tmp, const0_rtx));
88f77cba
JB
4445 DONE;
4446})
4447
ff03f2d1 4448(define_insn "@neon_vext<mode>"
88f77cba
JB
4449 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4450 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4451 (match_operand:VDQX 2 "s_register_operand" "w")
4452 (match_operand:SI 3 "immediate_operand" "i")]
4453 UNSPEC_VEXT))]
4454 "TARGET_NEON"
b617fc71 4455{
d57daa0c 4456 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
b617fc71
JB
4457 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4458}
f7379e5e 4459 [(set_attr "type" "neon_ext<q>")]
c956e102 4460)
88f77cba 4461
ff03f2d1 4462(define_insn "@neon_vrev64<mode>"
88f77cba 4463 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
94f0f2cc 4464 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
88f77cba
JB
4465 UNSPEC_VREV64))]
4466 "TARGET_NEON"
c956e102 4467 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 4468 [(set_attr "type" "neon_rev<q>")]
c956e102 4469)
88f77cba 4470
ff03f2d1 4471(define_insn "@neon_vrev32<mode>"
88f77cba 4472 [(set (match_operand:VX 0 "s_register_operand" "=w")
94f0f2cc 4473 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
88f77cba
JB
4474 UNSPEC_VREV32))]
4475 "TARGET_NEON"
c956e102 4476 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 4477 [(set_attr "type" "neon_rev<q>")]
c956e102 4478)
88f77cba 4479
ff03f2d1 4480(define_insn "@neon_vrev16<mode>"
88f77cba 4481 [(set (match_operand:VE 0 "s_register_operand" "=w")
94f0f2cc 4482 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
88f77cba
JB
4483 UNSPEC_VREV16))]
4484 "TARGET_NEON"
c956e102 4485 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
f7379e5e 4486 [(set_attr "type" "neon_rev<q>")]
c956e102 4487)
88f77cba
JB
4488
4489; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4490; allocation. For an intrinsic of form:
4491; rD = vbsl_* (rS, rN, rM)
4492; We can use any of:
4493; vbsl rS, rN, rM (if D = S)
4494; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4495; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4496
4497(define_insn "neon_vbsl<mode>_internal"
4498 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4499 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4500 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4501 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4502 UNSPEC_VBSL))]
4503 "TARGET_NEON"
4504 "@
4505 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4506 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
c956e102 4507 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
f7379e5e 4508 [(set_attr "type" "neon_bsl<q>")]
c956e102 4509)
88f77cba 4510
c2978b34 4511(define_expand "@neon_vbsl<mode>"
cd65e265
DZ
4512 [(set (match_operand:VDQX 0 "s_register_operand")
4513 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4514 (match_operand:VDQX 2 "s_register_operand")
4515 (match_operand:VDQX 3 "s_register_operand")]
88f77cba
JB
4516 UNSPEC_VBSL))]
4517 "TARGET_NEON"
4518{
4519 /* We can't alias operands together if they have different modes. */
4520 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4521})
4522
94f0f2cc
JG
4523;; vshl, vrshl
4524(define_insn "neon_v<shift_op><sup><mode>"
88f77cba
JB
4525 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4526 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
4527 (match_operand:VDQIX 2 "s_register_operand" "w")]
4528 VSHL))]
88f77cba 4529 "TARGET_NEON"
94f0f2cc 4530 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 4531 [(set_attr "type" "neon_shift_imm<q>")]
c956e102 4532)
88f77cba 4533
94f0f2cc
JG
4534;; vqshl, vqrshl
4535(define_insn "neon_v<shift_op><sup><mode>"
88f77cba
JB
4536 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4537 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
4538 (match_operand:VDQIX 2 "s_register_operand" "w")]
4539 VQSHL))]
88f77cba 4540 "TARGET_NEON"
94f0f2cc 4541 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
f7379e5e 4542 [(set_attr "type" "neon_sat_shift_imm<q>")]
c956e102 4543)
88f77cba 4544
94f0f2cc
JG
4545;; vshr_n, vrshr_n
4546(define_insn "neon_v<shift_op><sup>_n<mode>"
88f77cba
JB
4547 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4548 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
4549 (match_operand:SI 2 "immediate_operand" "i")]
4550 VSHR_N))]
88f77cba 4551 "TARGET_NEON"
b617fc71 4552{
d57daa0c 4553 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
94f0f2cc 4554 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 4555}
f7379e5e 4556 [(set_attr "type" "neon_shift_imm<q>")]
c956e102 4557)
88f77cba 4558
94f0f2cc
JG
4559;; vshrn_n, vrshrn_n
4560(define_insn "neon_v<shift_op>_n<mode>"
88f77cba
JB
4561 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4562 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
4563 (match_operand:SI 2 "immediate_operand" "i")]
4564 VSHRN_N))]
88f77cba 4565 "TARGET_NEON"
b617fc71 4566{
d57daa0c 4567 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
94f0f2cc 4568 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
b617fc71 4569}
f7379e5e 4570 [(set_attr "type" "neon_shift_imm_narrow_q")]
c956e102 4571)
88f77cba 4572
94f0f2cc
JG
4573;; vqshrn_n, vqrshrn_n
4574(define_insn "neon_v<shift_op><sup>_n<mode>"
88f77cba
JB
4575 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4576 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
4577 (match_operand:SI 2 "immediate_operand" "i")]
4578 VQSHRN_N))]
88f77cba 4579 "TARGET_NEON"
b617fc71 4580{
d57daa0c 4581 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
94f0f2cc 4582 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
b617fc71 4583}
f7379e5e 4584 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
c956e102 4585)
88f77cba 4586
94f0f2cc
JG
4587;; vqshrun_n, vqrshrun_n
4588(define_insn "neon_v<shift_op>_n<mode>"
88f77cba
JB
4589 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4590 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
94f0f2cc
JG
4591 (match_operand:SI 2 "immediate_operand" "i")]
4592 VQSHRUN_N))]
88f77cba 4593 "TARGET_NEON"
b617fc71 4594{
d57daa0c 4595 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
94f0f2cc 4596 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
b617fc71 4597}
f7379e5e 4598 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
c956e102 4599)
88f77cba
JB
4600
4601(define_insn "neon_vshl_n<mode>"
4602 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4603 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc 4604 (match_operand:SI 2 "immediate_operand" "i")]
88f77cba
JB
4605 UNSPEC_VSHL_N))]
4606 "TARGET_NEON"
b617fc71 4607{
d57daa0c 4608 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
b617fc71
JB
4609 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4610}
f7379e5e 4611 [(set_attr "type" "neon_shift_imm<q>")]
c956e102 4612)
88f77cba 4613
94f0f2cc 4614(define_insn "neon_vqshl_<sup>_n<mode>"
88f77cba
JB
4615 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4616 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc
JG
4617 (match_operand:SI 2 "immediate_operand" "i")]
4618 VQSHL_N))]
88f77cba 4619 "TARGET_NEON"
b617fc71 4620{
d57daa0c 4621 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
94f0f2cc 4622 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 4623}
f7379e5e 4624 [(set_attr "type" "neon_sat_shift_imm<q>")]
c956e102 4625)
88f77cba
JB
4626
4627(define_insn "neon_vqshlu_n<mode>"
4628 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4629 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
94f0f2cc 4630 (match_operand:SI 2 "immediate_operand" "i")]
88f77cba
JB
4631 UNSPEC_VQSHLU_N))]
4632 "TARGET_NEON"
b617fc71 4633{
d57daa0c 4634 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
94f0f2cc 4635 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
b617fc71 4636}
f7379e5e 4637 [(set_attr "type" "neon_sat_shift_imm<q>")]
c956e102 4638)
88f77cba 4639
94f0f2cc 4640(define_insn "neon_vshll<sup>_n<mode>"
88f77cba
JB
4641 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4642 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
94f0f2cc
JG
4643 (match_operand:SI 2 "immediate_operand" "i")]
4644 VSHLL_N))]
88f77cba 4645 "TARGET_NEON"
b617fc71 4646{
8cb32ff2 4647 /* The boundaries are: 0 < imm <= size. */
d57daa0c 4648 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
94f0f2cc 4649 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
b617fc71 4650}
f7379e5e 4651 [(set_attr "type" "neon_shift_imm_long")]
c956e102 4652)
88f77cba 4653
94f0f2cc
JG
4654;; vsra_n, vrsra_n
4655(define_insn "neon_v<shift_op><sup>_n<mode>"
88f77cba
JB
4656 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4657 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4658 (match_operand:VDQIX 2 "s_register_operand" "w")
94f0f2cc
JG
4659 (match_operand:SI 3 "immediate_operand" "i")]
4660 VSRA_N))]
88f77cba 4661 "TARGET_NEON"
b617fc71 4662{
d57daa0c 4663 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
94f0f2cc 4664 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
b617fc71 4665}
f7379e5e 4666 [(set_attr "type" "neon_shift_acc<q>")]
c956e102 4667)
88f77cba
JB
4668
4669(define_insn "neon_vsri_n<mode>"
4670 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4671 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4672 (match_operand:VDQIX 2 "s_register_operand" "w")
4673 (match_operand:SI 3 "immediate_operand" "i")]
4674 UNSPEC_VSRI))]
4675 "TARGET_NEON"
b617fc71 4676{
d57daa0c 4677 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
b617fc71
JB
4678 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4679}
f7379e5e 4680 [(set_attr "type" "neon_shift_reg<q>")]
c956e102 4681)
88f77cba
JB
4682
4683(define_insn "neon_vsli_n<mode>"
4684 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4685 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4686 (match_operand:VDQIX 2 "s_register_operand" "w")
4687 (match_operand:SI 3 "immediate_operand" "i")]
4688 UNSPEC_VSLI))]
4689 "TARGET_NEON"
b617fc71 4690{
d57daa0c 4691 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
b617fc71
JB
4692 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4693}
f7379e5e 4694 [(set_attr "type" "neon_shift_reg<q>")]
c956e102 4695)
88f77cba
JB
4696
4697(define_insn "neon_vtbl1v8qi"
4698 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4699 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4700 (match_operand:V8QI 2 "s_register_operand" "w")]
4701 UNSPEC_VTBL))]
4702 "TARGET_NEON"
c956e102 4703 "vtbl.8\t%P0, {%P1}, %P2"
f7379e5e 4704 [(set_attr "type" "neon_tbl1")]
c956e102 4705)
88f77cba
JB
4706
4707(define_insn "neon_vtbl2v8qi"
4708 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4709 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4710 (match_operand:V8QI 2 "s_register_operand" "w")]
4711 UNSPEC_VTBL))]
4712 "TARGET_NEON"
4713{
4714 rtx ops[4];
4715 int tabbase = REGNO (operands[1]);
4716
4717 ops[0] = operands[0];
4718 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4719 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4720 ops[3] = operands[2];
4721 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4722
4723 return "";
c956e102 4724}
f7379e5e 4725 [(set_attr "type" "neon_tbl2")]
c956e102 4726)
88f77cba
JB
4727
4728(define_insn "neon_vtbl3v8qi"
4729 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4730 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4731 (match_operand:V8QI 2 "s_register_operand" "w")]
4732 UNSPEC_VTBL))]
4733 "TARGET_NEON"
4734{
4735 rtx ops[5];
4736 int tabbase = REGNO (operands[1]);
4737
4738 ops[0] = operands[0];
4739 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4740 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4741 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4742 ops[4] = operands[2];
4743 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4744
4745 return "";
c956e102 4746}
f7379e5e 4747 [(set_attr "type" "neon_tbl3")]
c956e102 4748)
88f77cba
JB
4749
4750(define_insn "neon_vtbl4v8qi"
4751 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4752 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4753 (match_operand:V8QI 2 "s_register_operand" "w")]
4754 UNSPEC_VTBL))]
4755 "TARGET_NEON"
4756{
4757 rtx ops[6];
4758 int tabbase = REGNO (operands[1]);
4759
4760 ops[0] = operands[0];
4761 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4762 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4763 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4764 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4765 ops[5] = operands[2];
4766 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4767
4768 return "";
c956e102 4769}
f7379e5e 4770 [(set_attr "type" "neon_tbl4")]
c956e102 4771)
88f77cba 4772
b440f324
RH
4773;; These three are used by the vec_perm infrastructure for V16QImode.
4774(define_insn_and_split "neon_vtbl1v16qi"
4775 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4776 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4777 (match_operand:V16QI 2 "s_register_operand" "w")]
4778 UNSPEC_VTBL))]
4779 "TARGET_NEON"
4780 "#"
4781 "&& reload_completed"
4782 [(const_int 0)]
4783{
4784 rtx op0, op1, op2, part0, part2;
4785 unsigned ofs;
4786
4787 op0 = operands[0];
4788 op1 = gen_lowpart (TImode, operands[1]);
4789 op2 = operands[2];
4790
4791 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4792 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4793 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4794 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4795
4796 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4797 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4798 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4799 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4800 DONE;
f7379e5e
JG
4801}
4802 [(set_attr "type" "multiple")]
4803)
b440f324
RH
4804
4805(define_insn_and_split "neon_vtbl2v16qi"
4806 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4807 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4808 (match_operand:V16QI 2 "s_register_operand" "w")]
4809 UNSPEC_VTBL))]
4810 "TARGET_NEON"
4811 "#"
4812 "&& reload_completed"
4813 [(const_int 0)]
4814{
4815 rtx op0, op1, op2, part0, part2;
4816 unsigned ofs;
4817
4818 op0 = operands[0];
4819 op1 = operands[1];
4820 op2 = operands[2];
4821
4822 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4823 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4824 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4825 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4826
4827 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4828 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4829 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4830 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4831 DONE;
f7379e5e
JG
4832}
4833 [(set_attr "type" "multiple")]
4834)
b440f324
RH
4835
4836;; ??? Logically we should extend the regular neon_vcombine pattern to
4837;; handle quad-word input modes, producing octa-word output modes. But
4838;; that requires us to add support for octa-word vector modes in moves.
4839;; That seems overkill for this one use in vec_perm.
4840(define_insn_and_split "neon_vcombinev16qi"
4841 [(set (match_operand:OI 0 "s_register_operand" "=w")
4842 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4843 (match_operand:V16QI 2 "s_register_operand" "w")]
4844 UNSPEC_VCONCAT))]
4845 "TARGET_NEON"
4846 "#"
4847 "&& reload_completed"
4848 [(const_int 0)]
4849{
4850 neon_split_vcombine (operands);
4851 DONE;
f7379e5e
JG
4852}
4853[(set_attr "type" "multiple")]
4854)
b440f324 4855
88f77cba
JB
4856(define_insn "neon_vtbx1v8qi"
4857 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4858 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4859 (match_operand:V8QI 2 "s_register_operand" "w")
4860 (match_operand:V8QI 3 "s_register_operand" "w")]
4861 UNSPEC_VTBX))]
4862 "TARGET_NEON"
c956e102 4863 "vtbx.8\t%P0, {%P2}, %P3"
f7379e5e 4864 [(set_attr "type" "neon_tbl1")]
c956e102 4865)
88f77cba
JB
4866
4867(define_insn "neon_vtbx2v8qi"
4868 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4869 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4870 (match_operand:TI 2 "s_register_operand" "w")
4871 (match_operand:V8QI 3 "s_register_operand" "w")]
4872 UNSPEC_VTBX))]
4873 "TARGET_NEON"
4874{
4875 rtx ops[4];
4876 int tabbase = REGNO (operands[2]);
4877
4878 ops[0] = operands[0];
4879 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4880 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4881 ops[3] = operands[3];
4882 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4883
4884 return "";
c956e102 4885}
f7379e5e 4886 [(set_attr "type" "neon_tbl2")]
c956e102 4887)
88f77cba
JB
4888
4889(define_insn "neon_vtbx3v8qi"
4890 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4891 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4892 (match_operand:EI 2 "s_register_operand" "w")
4893 (match_operand:V8QI 3 "s_register_operand" "w")]
4894 UNSPEC_VTBX))]
4895 "TARGET_NEON"
4896{
4897 rtx ops[5];
4898 int tabbase = REGNO (operands[2]);
4899
4900 ops[0] = operands[0];
4901 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4902 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4903 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4904 ops[4] = operands[3];
4905 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4906
4907 return "";
c956e102 4908}
f7379e5e 4909 [(set_attr "type" "neon_tbl3")]
c956e102 4910)
88f77cba
JB
4911
4912(define_insn "neon_vtbx4v8qi"
4913 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4914 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4915 (match_operand:OI 2 "s_register_operand" "w")
4916 (match_operand:V8QI 3 "s_register_operand" "w")]
4917 UNSPEC_VTBX))]
4918 "TARGET_NEON"
4919{
4920 rtx ops[6];
4921 int tabbase = REGNO (operands[2]);
4922
4923 ops[0] = operands[0];
4924 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4925 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4926 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4927 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4928 ops[5] = operands[3];
4929 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4930
4931 return "";
c956e102 4932}
f7379e5e 4933 [(set_attr "type" "neon_tbl4")]
c956e102 4934)
88f77cba 4935
ff03f2d1 4936(define_expand "@neon_vtrn<mode>_internal"
28131dfe 4937 [(parallel
b1a970a5
MW
4938 [(set (match_operand:VDQWH 0 "s_register_operand")
4939 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4940 (match_operand:VDQWH 2 "s_register_operand")]
28131dfe 4941 UNSPEC_VTRN1))
b1a970a5
MW
4942 (set (match_operand:VDQWH 3 "s_register_operand")
4943 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
28131dfe
RE
4944 "TARGET_NEON"
4945 ""
4946)
4947
4948;; Note: Different operand numbering to handle tied registers correctly.
4949(define_insn "*neon_vtrn<mode>_insn"
b1a970a5
MW
4950 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4951 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4952 (match_operand:VDQWH 3 "s_register_operand" "2")]
4953 UNSPEC_VTRN1))
4954 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4955 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4956 UNSPEC_VTRN2))]
7e7cfcf6 4957 "TARGET_NEON"
28131dfe 4958 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
f7379e5e 4959 [(set_attr "type" "neon_permute<q>")]
c956e102 4960)
88f77cba 4961
ff03f2d1 4962(define_expand "@neon_vzip<mode>_internal"
28131dfe 4963 [(parallel
b1a970a5
MW
4964 [(set (match_operand:VDQWH 0 "s_register_operand")
4965 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4966 (match_operand:VDQWH 2 "s_register_operand")]
4967 UNSPEC_VZIP1))
4968 (set (match_operand:VDQWH 3 "s_register_operand")
4969 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
28131dfe
RE
4970 "TARGET_NEON"
4971 ""
4972)
4973
4974;; Note: Different operand numbering to handle tied registers correctly.
4975(define_insn "*neon_vzip<mode>_insn"
b1a970a5
MW
4976 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4977 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4978 (match_operand:VDQWH 3 "s_register_operand" "2")]
4979 UNSPEC_VZIP1))
4980 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4981 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4982 UNSPEC_VZIP2))]
7e7cfcf6 4983 "TARGET_NEON"
28131dfe 4984 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
f7379e5e 4985 [(set_attr "type" "neon_zip<q>")]
c956e102 4986)
88f77cba 4987
ff03f2d1 4988(define_expand "@neon_vuzp<mode>_internal"
28131dfe 4989 [(parallel
b1a970a5
MW
4990 [(set (match_operand:VDQWH 0 "s_register_operand")
4991 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4992 (match_operand:VDQWH 2 "s_register_operand")]
28131dfe 4993 UNSPEC_VUZP1))
cd65e265 4994 (set (match_operand:VDQWH 3 "s_register_operand")
b1a970a5 4995 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
28131dfe
RE
4996 "TARGET_NEON"
4997 ""
4998)
4999
5000;; Note: Different operand numbering to handle tied registers correctly.
5001(define_insn "*neon_vuzp<mode>_insn"
b1a970a5
MW
5002 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5003 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5004 (match_operand:VDQWH 3 "s_register_operand" "2")]
5005 UNSPEC_VUZP1))
5006 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5007 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5008 UNSPEC_VUZP2))]
88f77cba 5009 "TARGET_NEON"
28131dfe 5010 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
f7379e5e 5011 [(set_attr "type" "neon_zip<q>")]
c956e102 5012)
88f77cba 5013
3188ed59
RS
5014(define_expand "vec_load_lanes<mode><mode>"
5015 [(set (match_operand:VDQX 0 "s_register_operand")
5016 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5017 UNSPEC_VLD1))]
5018 "TARGET_NEON")
5019
88f77cba
JB
5020(define_insn "neon_vld1<mode>"
5021 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
6308e208 5022 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
88f77cba
JB
5023 UNSPEC_VLD1))]
5024 "TARGET_NEON"
6308e208 5025 "vld1.<V_sz_elem>\t%h0, %A1"
f7379e5e 5026 [(set_attr "type" "neon_load1_1reg<q>")]
c956e102 5027)
88f77cba 5028
22f9db64
CB
5029;; The lane numbers in the RTL are in GCC lane order, having been flipped
5030;; in arm_expand_neon_args. The lane numbers are restored to architectural
5031;; lane order here.
88f77cba
JB
5032(define_insn "neon_vld1_lane<mode>"
5033 [(set (match_operand:VDX 0 "s_register_operand" "=w")
6308e208 5034 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5035 (match_operand:VDX 2 "s_register_operand" "0")
5036 (match_operand:SI 3 "immediate_operand" "i")]
5037 UNSPEC_VLD1_LANE))]
5038 "TARGET_NEON"
5039{
22f9db64 5040 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba 5041 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
22f9db64 5042 operands[3] = GEN_INT (lane);
88f77cba 5043 if (max == 1)
6308e208 5044 return "vld1.<V_sz_elem>\t%P0, %A1";
88f77cba 5045 else
6308e208 5046 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
c956e102 5047}
f7379e5e 5048 [(set_attr "type" "neon_load1_one_lane<q>")]
c956e102 5049)
88f77cba 5050
22f9db64
CB
5051;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5052;; here on big endian targets.
88f77cba
JB
5053(define_insn "neon_vld1_lane<mode>"
5054 [(set (match_operand:VQX 0 "s_register_operand" "=w")
6308e208 5055 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5056 (match_operand:VQX 2 "s_register_operand" "0")
5057 (match_operand:SI 3 "immediate_operand" "i")]
5058 UNSPEC_VLD1_LANE))]
5059 "TARGET_NEON"
5060{
22f9db64 5061 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba 5062 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
22f9db64 5063 operands[3] = GEN_INT (lane);
88f77cba 5064 int regno = REGNO (operands[0]);
e68ffe57 5065 if (lane >= max / 2)
88f77cba
JB
5066 {
5067 lane -= max / 2;
5068 regno += 2;
5069 operands[3] = GEN_INT (lane);
5070 }
5071 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5072 if (max == 2)
6308e208 5073 return "vld1.<V_sz_elem>\t%P0, %A1";
88f77cba 5074 else
6308e208 5075 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
c956e102 5076}
f7379e5e 5077 [(set_attr "type" "neon_load1_one_lane<q>")]
c956e102 5078)
88f77cba
JB
5079
5080(define_insn "neon_vld1_dup<mode>"
92422235
CL
5081 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5082 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
88f77cba 5083 "TARGET_NEON"
27d2e612 5084 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
f7379e5e 5085 [(set_attr "type" "neon_load1_all_lanes<q>")]
27d2e612
RE
5086)
5087
5088;; Special case for DImode. Treat it exactly like a simple load.
5089(define_expand "neon_vld1_dupdi"
cd65e265
DZ
5090 [(set (match_operand:DI 0 "s_register_operand")
5091 (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
27d2e612
RE
5092 UNSPEC_VLD1))]
5093 "TARGET_NEON"
5094 ""
c956e102 5095)
88f77cba
JB
5096
5097(define_insn "neon_vld1_dup<mode>"
92422235
CL
5098 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5099 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
88f77cba
JB
5100 "TARGET_NEON"
5101{
8490252a 5102 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
c956e102 5103}
f7379e5e 5104 [(set_attr "type" "neon_load1_all_lanes<q>")]
8490252a
CL
5105)
5106
5107(define_insn_and_split "neon_vld1_dupv2di"
5108 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5109 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5110 "TARGET_NEON"
5111 "#"
5112 "&& reload_completed"
5113 [(const_int 0)]
5114 {
5115 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5116 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5117 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5118 DONE;
5119 }
5120 [(set_attr "length" "8")
f7379e5e 5121 (set_attr "type" "neon_load1_all_lanes_q")]
c956e102 5122)
88f77cba 5123
3188ed59
RS
5124(define_expand "vec_store_lanes<mode><mode>"
5125 [(set (match_operand:VDQX 0 "neon_struct_operand")
5126 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5127 UNSPEC_VST1))]
5128 "TARGET_NEON")
5129
88f77cba 5130(define_insn "neon_vst1<mode>"
6308e208 5131 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
88f77cba
JB
5132 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5133 UNSPEC_VST1))]
5134 "TARGET_NEON"
6308e208 5135 "vst1.<V_sz_elem>\t%h1, %A0"
f7379e5e 5136 [(set_attr "type" "neon_store1_1reg<q>")])
88f77cba 5137
22f9db64
CB
5138;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5139;; here on big endian targets.
88f77cba 5140(define_insn "neon_vst1_lane<mode>"
6308e208 5141 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
058e2674
UW
5142 (unspec:<V_elem>
5143 [(match_operand:VDX 1 "s_register_operand" "w")
5144 (match_operand:SI 2 "immediate_operand" "i")]
5145 UNSPEC_VST1_LANE))]
88f77cba
JB
5146 "TARGET_NEON"
5147{
22f9db64 5148 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba 5149 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
22f9db64 5150 operands[2] = GEN_INT (lane);
88f77cba 5151 if (max == 1)
6308e208 5152 return "vst1.<V_sz_elem>\t{%P1}, %A0";
88f77cba 5153 else
6308e208 5154 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
c956e102 5155}
f7379e5e
JG
5156 [(set_attr "type" "neon_store1_one_lane<q>")]
5157)
88f77cba 5158
22f9db64
CB
5159;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5160;; here on big endian targets.
88f77cba 5161(define_insn "neon_vst1_lane<mode>"
6308e208 5162 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
058e2674
UW
5163 (unspec:<V_elem>
5164 [(match_operand:VQX 1 "s_register_operand" "w")
5165 (match_operand:SI 2 "immediate_operand" "i")]
5166 UNSPEC_VST1_LANE))]
88f77cba
JB
5167 "TARGET_NEON"
5168{
22f9db64 5169 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5170 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5171 int regno = REGNO (operands[1]);
e68ffe57 5172 if (lane >= max / 2)
88f77cba
JB
5173 {
5174 lane -= max / 2;
5175 regno += 2;
88f77cba 5176 }
22f9db64 5177 operands[2] = GEN_INT (lane);
88f77cba
JB
5178 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5179 if (max == 2)
6308e208 5180 return "vst1.<V_sz_elem>\t{%P1}, %A0";
88f77cba 5181 else
6308e208 5182 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
c956e102 5183}
f7379e5e 5184 [(set_attr "type" "neon_store1_one_lane<q>")]
c956e102 5185)
88f77cba 5186
3188ed59
RS
5187(define_expand "vec_load_lanesti<mode>"
5188 [(set (match_operand:TI 0 "s_register_operand")
5189 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5190 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5191 UNSPEC_VLD2))]
5192 "TARGET_NEON")
5193
88f77cba
JB
5194(define_insn "neon_vld2<mode>"
5195 [(set (match_operand:TI 0 "s_register_operand" "=w")
6308e208 5196 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
eb637e76 5197 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5198 UNSPEC_VLD2))]
5199 "TARGET_NEON"
5200{
5201 if (<V_sz_elem> == 64)
6308e208 5202 return "vld1.64\t%h0, %A1";
88f77cba 5203 else
6308e208 5204 return "vld2.<V_sz_elem>\t%h0, %A1";
c956e102 5205}
003bb7f3 5206 [(set (attr "type")
c956e102 5207 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5208 (const_string "neon_load1_2reg<q>")
5209 (const_string "neon_load2_2reg<q>")))]
c956e102 5210)
88f77cba 5211
3188ed59
RS
5212(define_expand "vec_load_lanesoi<mode>"
5213 [(set (match_operand:OI 0 "s_register_operand")
5214 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4b644867 5215 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5216 UNSPEC_VLD2))]
5217 "TARGET_NEON")
5218
88f77cba
JB
5219(define_insn "neon_vld2<mode>"
5220 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 5221 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
eb637e76 5222 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5223 UNSPEC_VLD2))]
5224 "TARGET_NEON"
6308e208 5225 "vld2.<V_sz_elem>\t%h0, %A1"
f7379e5e 5226 [(set_attr "type" "neon_load2_2reg_q")])
88f77cba 5227
22f9db64
CB
5228;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5229;; here on big endian targets.
88f77cba
JB
5230(define_insn "neon_vld2_lane<mode>"
5231 [(set (match_operand:TI 0 "s_register_operand" "=w")
6308e208 5232 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5233 (match_operand:TI 2 "s_register_operand" "0")
5234 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5235 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5236 UNSPEC_VLD2_LANE))]
5237 "TARGET_NEON"
5238{
22f9db64 5239 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5240 int regno = REGNO (operands[0]);
5241 rtx ops[4];
88f77cba
JB
5242 ops[0] = gen_rtx_REG (DImode, regno);
5243 ops[1] = gen_rtx_REG (DImode, regno + 2);
5244 ops[2] = operands[1];
22f9db64 5245 ops[3] = GEN_INT (lane);
6308e208 5246 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
88f77cba 5247 return "";
c956e102 5248}
f7379e5e 5249 [(set_attr "type" "neon_load2_one_lane<q>")]
c956e102 5250)
88f77cba 5251
22f9db64
CB
5252;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5253;; here on big endian targets.
88f77cba
JB
5254(define_insn "neon_vld2_lane<mode>"
5255 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 5256 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5257 (match_operand:OI 2 "s_register_operand" "0")
5258 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5259 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5260 UNSPEC_VLD2_LANE))]
5261 "TARGET_NEON"
5262{
22f9db64 5263 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5264 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5265 int regno = REGNO (operands[0]);
5266 rtx ops[4];
e68ffe57 5267 if (lane >= max / 2)
88f77cba
JB
5268 {
5269 lane -= max / 2;
5270 regno += 2;
5271 }
5272 ops[0] = gen_rtx_REG (DImode, regno);
5273 ops[1] = gen_rtx_REG (DImode, regno + 4);
5274 ops[2] = operands[1];
5275 ops[3] = GEN_INT (lane);
6308e208 5276 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
88f77cba 5277 return "";
c956e102 5278}
f7379e5e 5279 [(set_attr "type" "neon_load2_one_lane<q>")]
c956e102 5280)
88f77cba
JB
5281
5282(define_insn "neon_vld2_dup<mode>"
5283 [(set (match_operand:TI 0 "s_register_operand" "=w")
6308e208 5284 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
eb637e76 5285 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5286 UNSPEC_VLD2_DUP))]
5287 "TARGET_NEON"
5288{
5289 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6308e208 5290 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
88f77cba 5291 else
6308e208 5292 return "vld1.<V_sz_elem>\t%h0, %A1";
c956e102 5293}
003bb7f3 5294 [(set (attr "type")
c956e102 5295 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
f7379e5e
JG
5296 (const_string "neon_load2_all_lanes<q>")
5297 (const_string "neon_load1_1reg<q>")))]
c956e102 5298)
88f77cba 5299
eb637e76
DB
5300(define_insn "neon_vld2_dupv8bf"
5301 [(set (match_operand:OI 0 "s_register_operand" "=w")
5302 (unspec:OI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5303 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5304 UNSPEC_VLD2_DUP))]
5305 "TARGET_BF16_SIMD"
5306 {
5307 rtx ops[5];
5308 int tabbase = REGNO (operands[0]);
5309
5310 ops[4] = operands[1];
5311 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5312 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5313 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5314 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5315 output_asm_insn ("vld2.16\t{%P0, %P1, %P2, %P3}, %A4", ops);
5316 return "";
5317 }
5318 [(set_attr "type" "neon_load2_all_lanes_q")]
5319)
5320
3188ed59
RS
5321(define_expand "vec_store_lanesti<mode>"
5322 [(set (match_operand:TI 0 "neon_struct_operand")
5323 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5324 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5325 UNSPEC_VST2))]
5326 "TARGET_NEON")
5327
88f77cba 5328(define_insn "neon_vst2<mode>"
6308e208 5329 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
88f77cba 5330 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
ff229375 5331 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5332 UNSPEC_VST2))]
5333 "TARGET_NEON"
5334{
5335 if (<V_sz_elem> == 64)
6308e208 5336 return "vst1.64\t%h1, %A0";
88f77cba 5337 else
6308e208 5338 return "vst2.<V_sz_elem>\t%h1, %A0";
c956e102 5339}
003bb7f3 5340 [(set (attr "type")
c956e102 5341 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5342 (const_string "neon_store1_2reg<q>")
5343 (const_string "neon_store2_one_lane<q>")))]
c956e102 5344)
88f77cba 5345
3188ed59
RS
5346(define_expand "vec_store_lanesoi<mode>"
5347 [(set (match_operand:OI 0 "neon_struct_operand")
5348 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4b644867 5349 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5350 UNSPEC_VST2))]
5351 "TARGET_NEON")
5352
88f77cba 5353(define_insn "neon_vst2<mode>"
6308e208 5354 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
88f77cba 5355 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
ff229375 5356 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5357 UNSPEC_VST2))]
5358 "TARGET_NEON"
6308e208 5359 "vst2.<V_sz_elem>\t%h1, %A0"
f7379e5e 5360 [(set_attr "type" "neon_store2_4reg<q>")]
c956e102 5361)
88f77cba 5362
22f9db64
CB
5363;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5364;; here on big endian targets.
88f77cba 5365(define_insn "neon_vst2_lane<mode>"
6308e208 5366 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
5367 (unspec:<V_two_elem>
5368 [(match_operand:TI 1 "s_register_operand" "w")
5369 (match_operand:SI 2 "immediate_operand" "i")
4b644867 5370 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5371 UNSPEC_VST2_LANE))]
5372 "TARGET_NEON"
5373{
22f9db64 5374 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5375 int regno = REGNO (operands[1]);
5376 rtx ops[4];
88f77cba
JB
5377 ops[0] = operands[0];
5378 ops[1] = gen_rtx_REG (DImode, regno);
5379 ops[2] = gen_rtx_REG (DImode, regno + 2);
22f9db64 5380 ops[3] = GEN_INT (lane);
6308e208 5381 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
88f77cba 5382 return "";
c956e102 5383}
f7379e5e 5384 [(set_attr "type" "neon_store2_one_lane<q>")]
c956e102 5385)
88f77cba 5386
22f9db64
CB
5387;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5388;; here on big endian targets.
88f77cba 5389(define_insn "neon_vst2_lane<mode>"
6308e208 5390 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
5391 (unspec:<V_two_elem>
5392 [(match_operand:OI 1 "s_register_operand" "w")
5393 (match_operand:SI 2 "immediate_operand" "i")
4b644867 5394 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5395 UNSPEC_VST2_LANE))]
5396 "TARGET_NEON"
5397{
22f9db64 5398 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5399 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5400 int regno = REGNO (operands[1]);
5401 rtx ops[4];
e68ffe57 5402 if (lane >= max / 2)
88f77cba
JB
5403 {
5404 lane -= max / 2;
5405 regno += 2;
5406 }
5407 ops[0] = operands[0];
5408 ops[1] = gen_rtx_REG (DImode, regno);
5409 ops[2] = gen_rtx_REG (DImode, regno + 4);
5410 ops[3] = GEN_INT (lane);
6308e208 5411 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
88f77cba 5412 return "";
c956e102 5413}
f7379e5e 5414 [(set_attr "type" "neon_store2_one_lane<q>")]
c956e102 5415)
88f77cba 5416
3188ed59
RS
5417(define_expand "vec_load_lanesei<mode>"
5418 [(set (match_operand:EI 0 "s_register_operand")
5419 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5420 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5421 UNSPEC_VLD3))]
5422 "TARGET_NEON")
5423
88f77cba
JB
5424(define_insn "neon_vld3<mode>"
5425 [(set (match_operand:EI 0 "s_register_operand" "=w")
6308e208 5426 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
eb637e76 5427 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5428 UNSPEC_VLD3))]
5429 "TARGET_NEON"
5430{
5431 if (<V_sz_elem> == 64)
6308e208 5432 return "vld1.64\t%h0, %A1";
88f77cba 5433 else
6308e208 5434 return "vld3.<V_sz_elem>\t%h0, %A1";
c956e102 5435}
003bb7f3 5436 [(set (attr "type")
c956e102 5437 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5438 (const_string "neon_load1_3reg<q>")
5439 (const_string "neon_load3_3reg<q>")))]
c956e102 5440)
88f77cba 5441
3188ed59
RS
5442(define_expand "vec_load_lanesci<mode>"
5443 [(match_operand:CI 0 "s_register_operand")
5444 (match_operand:CI 1 "neon_struct_operand")
4b644867 5445 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5446 "TARGET_NEON"
5447{
5448 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5449 DONE;
5450})
5451
88f77cba 5452(define_expand "neon_vld3<mode>"
6308e208
RS
5453 [(match_operand:CI 0 "s_register_operand")
5454 (match_operand:CI 1 "neon_struct_operand")
eb637e76 5455 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5456 "TARGET_NEON"
5457{
6308e208
RS
5458 rtx mem;
5459
5460 mem = adjust_address (operands[1], EImode, 0);
5461 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5462 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5463 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
88f77cba
JB
5464 DONE;
5465})
5466
5467(define_insn "neon_vld3qa<mode>"
5468 [(set (match_operand:CI 0 "s_register_operand" "=w")
6308e208 5469 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
eb637e76 5470 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5471 UNSPEC_VLD3A))]
88f77cba
JB
5472 "TARGET_NEON"
5473{
5474 int regno = REGNO (operands[0]);
5475 rtx ops[4];
5476 ops[0] = gen_rtx_REG (DImode, regno);
5477 ops[1] = gen_rtx_REG (DImode, regno + 4);
5478 ops[2] = gen_rtx_REG (DImode, regno + 8);
a6217191 5479 ops[3] = operands[1];
6308e208 5480 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
88f77cba 5481 return "";
c956e102 5482}
f7379e5e 5483 [(set_attr "type" "neon_load3_3reg<q>")]
c956e102 5484)
88f77cba
JB
5485
5486(define_insn "neon_vld3qb<mode>"
5487 [(set (match_operand:CI 0 "s_register_operand" "=w")
6308e208
RS
5488 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5489 (match_operand:CI 2 "s_register_operand" "0")
eb637e76 5490 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5491 UNSPEC_VLD3B))]
88f77cba
JB
5492 "TARGET_NEON"
5493{
5494 int regno = REGNO (operands[0]);
5495 rtx ops[4];
5496 ops[0] = gen_rtx_REG (DImode, regno + 2);
5497 ops[1] = gen_rtx_REG (DImode, regno + 6);
5498 ops[2] = gen_rtx_REG (DImode, regno + 10);
6308e208
RS
5499 ops[3] = operands[1];
5500 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
88f77cba 5501 return "";
c956e102 5502}
f7379e5e 5503 [(set_attr "type" "neon_load3_3reg<q>")]
c956e102 5504)
88f77cba 5505
22f9db64
CB
5506;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5507;; here on big endian targets.
88f77cba
JB
5508(define_insn "neon_vld3_lane<mode>"
5509 [(set (match_operand:EI 0 "s_register_operand" "=w")
6308e208 5510 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5511 (match_operand:EI 2 "s_register_operand" "0")
5512 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5513 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5514 UNSPEC_VLD3_LANE))]
5515 "TARGET_NEON"
5516{
22f9db64 5517 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5518 int regno = REGNO (operands[0]);
5519 rtx ops[5];
88f77cba
JB
5520 ops[0] = gen_rtx_REG (DImode, regno);
5521 ops[1] = gen_rtx_REG (DImode, regno + 2);
5522 ops[2] = gen_rtx_REG (DImode, regno + 4);
5523 ops[3] = operands[1];
22f9db64 5524 ops[4] = GEN_INT (lane);
9594fe5e 5525 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
88f77cba
JB
5526 ops);
5527 return "";
c956e102 5528}
f7379e5e 5529 [(set_attr "type" "neon_load3_one_lane<q>")]
c956e102 5530)
88f77cba 5531
22f9db64
CB
5532;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5533;; here on big endian targets.
88f77cba
JB
5534(define_insn "neon_vld3_lane<mode>"
5535 [(set (match_operand:CI 0 "s_register_operand" "=w")
6308e208 5536 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5537 (match_operand:CI 2 "s_register_operand" "0")
5538 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5539 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5540 UNSPEC_VLD3_LANE))]
5541 "TARGET_NEON"
5542{
22f9db64 5543 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5544 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5545 int regno = REGNO (operands[0]);
5546 rtx ops[5];
e68ffe57 5547 if (lane >= max / 2)
88f77cba
JB
5548 {
5549 lane -= max / 2;
5550 regno += 2;
5551 }
5552 ops[0] = gen_rtx_REG (DImode, regno);
5553 ops[1] = gen_rtx_REG (DImode, regno + 4);
5554 ops[2] = gen_rtx_REG (DImode, regno + 8);
5555 ops[3] = operands[1];
5556 ops[4] = GEN_INT (lane);
9594fe5e 5557 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
88f77cba
JB
5558 ops);
5559 return "";
c956e102 5560}
f7379e5e 5561 [(set_attr "type" "neon_load3_one_lane<q>")]
c956e102 5562)
88f77cba
JB
5563
5564(define_insn "neon_vld3_dup<mode>"
5565 [(set (match_operand:EI 0 "s_register_operand" "=w")
6308e208 5566 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
eb637e76 5567 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5568 UNSPEC_VLD3_DUP))]
5569 "TARGET_NEON"
5570{
5571 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5572 {
5573 int regno = REGNO (operands[0]);
5574 rtx ops[4];
5575 ops[0] = gen_rtx_REG (DImode, regno);
5576 ops[1] = gen_rtx_REG (DImode, regno + 2);
5577 ops[2] = gen_rtx_REG (DImode, regno + 4);
5578 ops[3] = operands[1];
9594fe5e 5579 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
88f77cba
JB
5580 return "";
5581 }
5582 else
6308e208 5583 return "vld1.<V_sz_elem>\t%h0, %A1";
c956e102 5584}
003bb7f3 5585 [(set (attr "type")
c956e102 5586 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
f7379e5e
JG
5587 (const_string "neon_load3_all_lanes<q>")
5588 (const_string "neon_load1_1reg<q>")))])
88f77cba 5589
eb637e76
DB
5590(define_insn "neon_vld3_dupv8bf"
5591 [(set (match_operand:CI 0 "s_register_operand" "=w")
5592 (unspec:CI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5593 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5594 UNSPEC_VLD2_DUP))]
5595 "TARGET_BF16_SIMD"
5596 {
5597 rtx ops[4];
5598 int tabbase = REGNO (operands[0]);
5599
5600 ops[3] = operands[1];
5601 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5602 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5603 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5604 output_asm_insn ("vld3.16\t{%P0[], %P1[], %P2[]}, %A3", ops);
5605 return "";
5606 }
5607 [(set_attr "type" "neon_load3_all_lanes_q")]
5608)
5609
3188ed59
RS
5610(define_expand "vec_store_lanesei<mode>"
5611 [(set (match_operand:EI 0 "neon_struct_operand")
5612 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5613 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5614 UNSPEC_VST3))]
5615 "TARGET_NEON")
5616
88f77cba 5617(define_insn "neon_vst3<mode>"
6308e208 5618 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
88f77cba 5619 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
ff229375 5620 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5621 UNSPEC_VST3))]
5622 "TARGET_NEON"
5623{
5624 if (<V_sz_elem> == 64)
6308e208 5625 return "vst1.64\t%h1, %A0";
88f77cba 5626 else
6308e208 5627 return "vst3.<V_sz_elem>\t%h1, %A0";
c956e102 5628}
003bb7f3 5629 [(set (attr "type")
c956e102 5630 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5631 (const_string "neon_store1_3reg<q>")
5632 (const_string "neon_store3_one_lane<q>")))])
88f77cba 5633
3188ed59
RS
5634(define_expand "vec_store_lanesci<mode>"
5635 [(match_operand:CI 0 "neon_struct_operand")
5636 (match_operand:CI 1 "s_register_operand")
4b644867 5637 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5638 "TARGET_NEON"
5639{
5640 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5641 DONE;
5642})
5643
88f77cba 5644(define_expand "neon_vst3<mode>"
6308e208
RS
5645 [(match_operand:CI 0 "neon_struct_operand")
5646 (match_operand:CI 1 "s_register_operand")
ff229375 5647 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5648 "TARGET_NEON"
5649{
6308e208
RS
5650 rtx mem;
5651
5652 mem = adjust_address (operands[0], EImode, 0);
5653 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5654 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5655 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
88f77cba
JB
5656 DONE;
5657})
5658
5659(define_insn "neon_vst3qa<mode>"
6308e208
RS
5660 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5661 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
ff229375 5662 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5663 UNSPEC_VST3A))]
88f77cba
JB
5664 "TARGET_NEON"
5665{
6308e208 5666 int regno = REGNO (operands[1]);
88f77cba
JB
5667 rtx ops[4];
5668 ops[0] = operands[0];
5669 ops[1] = gen_rtx_REG (DImode, regno);
5670 ops[2] = gen_rtx_REG (DImode, regno + 4);
5671 ops[3] = gen_rtx_REG (DImode, regno + 8);
6308e208 5672 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
88f77cba 5673 return "";
c956e102 5674}
f7379e5e 5675 [(set_attr "type" "neon_store3_3reg<q>")]
c956e102 5676)
88f77cba
JB
5677
5678(define_insn "neon_vst3qb<mode>"
6308e208
RS
5679 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5680 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
ff229375 5681 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5682 UNSPEC_VST3B))]
88f77cba
JB
5683 "TARGET_NEON"
5684{
6308e208 5685 int regno = REGNO (operands[1]);
88f77cba
JB
5686 rtx ops[4];
5687 ops[0] = operands[0];
5688 ops[1] = gen_rtx_REG (DImode, regno + 2);
5689 ops[2] = gen_rtx_REG (DImode, regno + 6);
5690 ops[3] = gen_rtx_REG (DImode, regno + 10);
6308e208 5691 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
88f77cba 5692 return "";
c956e102 5693}
f7379e5e 5694 [(set_attr "type" "neon_store3_3reg<q>")]
c956e102 5695)
88f77cba 5696
22f9db64
CB
5697;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5698;; here on big endian targets.
88f77cba 5699(define_insn "neon_vst3_lane<mode>"
6308e208 5700 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
5701 (unspec:<V_three_elem>
5702 [(match_operand:EI 1 "s_register_operand" "w")
5703 (match_operand:SI 2 "immediate_operand" "i")
4b644867 5704 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5705 UNSPEC_VST3_LANE))]
5706 "TARGET_NEON"
5707{
22f9db64 5708 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5709 int regno = REGNO (operands[1]);
5710 rtx ops[5];
88f77cba
JB
5711 ops[0] = operands[0];
5712 ops[1] = gen_rtx_REG (DImode, regno);
5713 ops[2] = gen_rtx_REG (DImode, regno + 2);
5714 ops[3] = gen_rtx_REG (DImode, regno + 4);
22f9db64 5715 ops[4] = GEN_INT (lane);
9594fe5e 5716 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
88f77cba
JB
5717 ops);
5718 return "";
c956e102 5719}
f7379e5e 5720 [(set_attr "type" "neon_store3_one_lane<q>")]
c956e102 5721)
88f77cba 5722
22f9db64
CB
5723;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5724;; here on big endian targets.
88f77cba 5725(define_insn "neon_vst3_lane<mode>"
6308e208 5726 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
5727 (unspec:<V_three_elem>
5728 [(match_operand:CI 1 "s_register_operand" "w")
5729 (match_operand:SI 2 "immediate_operand" "i")
4b644867 5730 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5731 UNSPEC_VST3_LANE))]
5732 "TARGET_NEON"
5733{
22f9db64 5734 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
5735 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5736 int regno = REGNO (operands[1]);
5737 rtx ops[5];
e68ffe57 5738 if (lane >= max / 2)
88f77cba
JB
5739 {
5740 lane -= max / 2;
5741 regno += 2;
5742 }
5743 ops[0] = operands[0];
5744 ops[1] = gen_rtx_REG (DImode, regno);
5745 ops[2] = gen_rtx_REG (DImode, regno + 4);
5746 ops[3] = gen_rtx_REG (DImode, regno + 8);
5747 ops[4] = GEN_INT (lane);
9594fe5e 5748 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
88f77cba
JB
5749 ops);
5750 return "";
c956e102 5751}
f7379e5e
JG
5752 [(set_attr "type" "neon_store3_one_lane<q>")]
5753)
88f77cba 5754
3188ed59
RS
5755(define_expand "vec_load_lanesoi<mode>"
5756 [(set (match_operand:OI 0 "s_register_operand")
5757 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5758 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759 UNSPEC_VLD4))]
5760 "TARGET_NEON")
5761
88f77cba
JB
5762(define_insn "neon_vld4<mode>"
5763 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 5764 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
eb637e76 5765 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5766 UNSPEC_VLD4))]
5767 "TARGET_NEON"
5768{
5769 if (<V_sz_elem> == 64)
6308e208 5770 return "vld1.64\t%h0, %A1";
88f77cba 5771 else
6308e208 5772 return "vld4.<V_sz_elem>\t%h0, %A1";
c956e102 5773}
003bb7f3 5774 [(set (attr "type")
c956e102 5775 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5776 (const_string "neon_load1_4reg<q>")
5777 (const_string "neon_load4_4reg<q>")))]
c956e102 5778)
88f77cba 5779
3188ed59
RS
5780(define_expand "vec_load_lanesxi<mode>"
5781 [(match_operand:XI 0 "s_register_operand")
5782 (match_operand:XI 1 "neon_struct_operand")
4b644867 5783 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5784 "TARGET_NEON"
5785{
5786 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5787 DONE;
5788})
5789
88f77cba 5790(define_expand "neon_vld4<mode>"
6308e208
RS
5791 [(match_operand:XI 0 "s_register_operand")
5792 (match_operand:XI 1 "neon_struct_operand")
eb637e76 5793 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5794 "TARGET_NEON"
5795{
6308e208
RS
5796 rtx mem;
5797
5798 mem = adjust_address (operands[1], OImode, 0);
5799 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5800 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5801 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
88f77cba
JB
5802 DONE;
5803})
5804
5805(define_insn "neon_vld4qa<mode>"
5806 [(set (match_operand:XI 0 "s_register_operand" "=w")
6308e208 5807 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
eb637e76 5808 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5809 UNSPEC_VLD4A))]
88f77cba
JB
5810 "TARGET_NEON"
5811{
5812 int regno = REGNO (operands[0]);
5813 rtx ops[5];
5814 ops[0] = gen_rtx_REG (DImode, regno);
5815 ops[1] = gen_rtx_REG (DImode, regno + 4);
5816 ops[2] = gen_rtx_REG (DImode, regno + 8);
5817 ops[3] = gen_rtx_REG (DImode, regno + 12);
a6217191 5818 ops[4] = operands[1];
6308e208 5819 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
88f77cba 5820 return "";
c956e102 5821}
f7379e5e 5822 [(set_attr "type" "neon_load4_4reg<q>")]
c956e102 5823)
88f77cba
JB
5824
5825(define_insn "neon_vld4qb<mode>"
5826 [(set (match_operand:XI 0 "s_register_operand" "=w")
6308e208
RS
5827 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5828 (match_operand:XI 2 "s_register_operand" "0")
eb637e76 5829 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 5830 UNSPEC_VLD4B))]
88f77cba
JB
5831 "TARGET_NEON"
5832{
5833 int regno = REGNO (operands[0]);
5834 rtx ops[5];
5835 ops[0] = gen_rtx_REG (DImode, regno + 2);
5836 ops[1] = gen_rtx_REG (DImode, regno + 6);
5837 ops[2] = gen_rtx_REG (DImode, regno + 10);
5838 ops[3] = gen_rtx_REG (DImode, regno + 14);
6308e208
RS
5839 ops[4] = operands[1];
5840 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
88f77cba 5841 return "";
c956e102 5842}
f7379e5e 5843 [(set_attr "type" "neon_load4_4reg<q>")]
c956e102 5844)
88f77cba 5845
22f9db64
CB
5846;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5847;; here on big endian targets.
88f77cba
JB
5848(define_insn "neon_vld4_lane<mode>"
5849 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 5850 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5851 (match_operand:OI 2 "s_register_operand" "0")
5852 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5853 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5854 UNSPEC_VLD4_LANE))]
5855 "TARGET_NEON"
5856{
22f9db64 5857 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5858 int regno = REGNO (operands[0]);
5859 rtx ops[6];
88f77cba
JB
5860 ops[0] = gen_rtx_REG (DImode, regno);
5861 ops[1] = gen_rtx_REG (DImode, regno + 2);
5862 ops[2] = gen_rtx_REG (DImode, regno + 4);
5863 ops[3] = gen_rtx_REG (DImode, regno + 6);
5864 ops[4] = operands[1];
22f9db64 5865 ops[5] = GEN_INT (lane);
6308e208 5866 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
88f77cba
JB
5867 ops);
5868 return "";
c956e102 5869}
f7379e5e 5870 [(set_attr "type" "neon_load4_one_lane<q>")]
c956e102 5871)
88f77cba 5872
22f9db64
CB
5873;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5874;; here on big endian targets.
88f77cba
JB
5875(define_insn "neon_vld4_lane<mode>"
5876 [(set (match_operand:XI 0 "s_register_operand" "=w")
6308e208 5877 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
88f77cba
JB
5878 (match_operand:XI 2 "s_register_operand" "0")
5879 (match_operand:SI 3 "immediate_operand" "i")
4b644867 5880 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5881 UNSPEC_VLD4_LANE))]
5882 "TARGET_NEON"
5883{
22f9db64 5884 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
88f77cba
JB
5885 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5886 int regno = REGNO (operands[0]);
5887 rtx ops[6];
e68ffe57 5888 if (lane >= max / 2)
88f77cba
JB
5889 {
5890 lane -= max / 2;
5891 regno += 2;
5892 }
5893 ops[0] = gen_rtx_REG (DImode, regno);
5894 ops[1] = gen_rtx_REG (DImode, regno + 4);
5895 ops[2] = gen_rtx_REG (DImode, regno + 8);
5896 ops[3] = gen_rtx_REG (DImode, regno + 12);
5897 ops[4] = operands[1];
5898 ops[5] = GEN_INT (lane);
6308e208 5899 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
88f77cba
JB
5900 ops);
5901 return "";
c956e102 5902}
f7379e5e 5903 [(set_attr "type" "neon_load4_one_lane<q>")]
c956e102 5904)
88f77cba
JB
5905
5906(define_insn "neon_vld4_dup<mode>"
5907 [(set (match_operand:OI 0 "s_register_operand" "=w")
6308e208 5908 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
eb637e76 5909 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5910 UNSPEC_VLD4_DUP))]
5911 "TARGET_NEON"
5912{
5913 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5914 {
5915 int regno = REGNO (operands[0]);
5916 rtx ops[5];
5917 ops[0] = gen_rtx_REG (DImode, regno);
5918 ops[1] = gen_rtx_REG (DImode, regno + 2);
5919 ops[2] = gen_rtx_REG (DImode, regno + 4);
5920 ops[3] = gen_rtx_REG (DImode, regno + 6);
5921 ops[4] = operands[1];
6308e208 5922 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
88f77cba
JB
5923 ops);
5924 return "";
5925 }
5926 else
6308e208 5927 return "vld1.<V_sz_elem>\t%h0, %A1";
c956e102 5928}
003bb7f3 5929 [(set (attr "type")
c956e102 5930 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
f7379e5e
JG
5931 (const_string "neon_load4_all_lanes<q>")
5932 (const_string "neon_load1_1reg<q>")))]
c956e102 5933)
88f77cba 5934
eb637e76
DB
5935(define_insn "neon_vld4_dupv8bf"
5936 [(set (match_operand:XI 0 "s_register_operand" "=w")
5937 (unspec:XI [(match_operand:V2BF 1 "neon_struct_operand" "Um")
5938 (unspec:V8BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5939 UNSPEC_VLD2_DUP))]
5940 "TARGET_BF16_SIMD"
5941 {
5942 rtx ops[5];
5943 int tabbase = REGNO (operands[0]);
5944
5945 ops[4] = operands[1];
5946 ops[0] = gen_rtx_REG (V4BFmode, tabbase);
5947 ops[1] = gen_rtx_REG (V4BFmode, tabbase + 2);
5948 ops[2] = gen_rtx_REG (V4BFmode, tabbase + 4);
5949 ops[3] = gen_rtx_REG (V4BFmode, tabbase + 6);
5950 output_asm_insn ("vld4.16\t{%P0[], %P1[], %P2[], %P3[]}, %A4", ops);
5951 return "";
5952 }
5953 [(set_attr "type" "neon_load4_all_lanes_q")]
5954)
5955
3188ed59
RS
5956(define_expand "vec_store_lanesoi<mode>"
5957 [(set (match_operand:OI 0 "neon_struct_operand")
5958 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5959 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5960 UNSPEC_VST4))]
5961 "TARGET_NEON")
5962
88f77cba 5963(define_insn "neon_vst4<mode>"
6308e208 5964 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
88f77cba 5965 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
ff229375 5966 (unspec:VDXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5967 UNSPEC_VST4))]
5968 "TARGET_NEON"
5969{
5970 if (<V_sz_elem> == 64)
6308e208 5971 return "vst1.64\t%h1, %A0";
88f77cba 5972 else
6308e208 5973 return "vst4.<V_sz_elem>\t%h1, %A0";
c956e102 5974}
003bb7f3 5975 [(set (attr "type")
c956e102 5976 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
f7379e5e
JG
5977 (const_string "neon_store1_4reg<q>")
5978 (const_string "neon_store4_4reg<q>")))]
c956e102 5979)
88f77cba 5980
3188ed59
RS
5981(define_expand "vec_store_lanesxi<mode>"
5982 [(match_operand:XI 0 "neon_struct_operand")
5983 (match_operand:XI 1 "s_register_operand")
4b644867 5984 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3188ed59
RS
5985 "TARGET_NEON"
5986{
5987 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5988 DONE;
5989})
5990
88f77cba 5991(define_expand "neon_vst4<mode>"
6308e208
RS
5992 [(match_operand:XI 0 "neon_struct_operand")
5993 (match_operand:XI 1 "s_register_operand")
ff229375 5994 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
5995 "TARGET_NEON"
5996{
6308e208
RS
5997 rtx mem;
5998
5999 mem = adjust_address (operands[0], OImode, 0);
6000 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6001 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6002 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
88f77cba
JB
6003 DONE;
6004})
6005
6006(define_insn "neon_vst4qa<mode>"
6308e208
RS
6007 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6008 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
ff229375 6009 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 6010 UNSPEC_VST4A))]
88f77cba
JB
6011 "TARGET_NEON"
6012{
6308e208 6013 int regno = REGNO (operands[1]);
88f77cba
JB
6014 rtx ops[5];
6015 ops[0] = operands[0];
6016 ops[1] = gen_rtx_REG (DImode, regno);
6017 ops[2] = gen_rtx_REG (DImode, regno + 4);
6018 ops[3] = gen_rtx_REG (DImode, regno + 8);
6019 ops[4] = gen_rtx_REG (DImode, regno + 12);
6308e208 6020 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
88f77cba 6021 return "";
c956e102 6022}
f7379e5e 6023 [(set_attr "type" "neon_store4_4reg<q>")]
c956e102 6024)
88f77cba
JB
6025
6026(define_insn "neon_vst4qb<mode>"
6308e208
RS
6027 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6028 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
ff229375 6029 (unspec:VQ2BF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6308e208 6030 UNSPEC_VST4B))]
88f77cba
JB
6031 "TARGET_NEON"
6032{
6308e208 6033 int regno = REGNO (operands[1]);
88f77cba
JB
6034 rtx ops[5];
6035 ops[0] = operands[0];
6036 ops[1] = gen_rtx_REG (DImode, regno + 2);
6037 ops[2] = gen_rtx_REG (DImode, regno + 6);
6038 ops[3] = gen_rtx_REG (DImode, regno + 10);
6039 ops[4] = gen_rtx_REG (DImode, regno + 14);
6308e208 6040 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
88f77cba 6041 return "";
c956e102 6042}
f7379e5e 6043 [(set_attr "type" "neon_store4_4reg<q>")]
c956e102 6044)
88f77cba 6045
22f9db64
CB
6046;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6047;; here on big endian targets.
88f77cba 6048(define_insn "neon_vst4_lane<mode>"
6308e208 6049 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
6050 (unspec:<V_four_elem>
6051 [(match_operand:OI 1 "s_register_operand" "w")
6052 (match_operand:SI 2 "immediate_operand" "i")
4b644867 6053 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6054 UNSPEC_VST4_LANE))]
6055 "TARGET_NEON"
6056{
22f9db64 6057 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
6058 int regno = REGNO (operands[1]);
6059 rtx ops[6];
88f77cba
JB
6060 ops[0] = operands[0];
6061 ops[1] = gen_rtx_REG (DImode, regno);
6062 ops[2] = gen_rtx_REG (DImode, regno + 2);
6063 ops[3] = gen_rtx_REG (DImode, regno + 4);
6064 ops[4] = gen_rtx_REG (DImode, regno + 6);
22f9db64 6065 ops[5] = GEN_INT (lane);
6308e208 6066 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
88f77cba
JB
6067 ops);
6068 return "";
c956e102 6069}
f7379e5e 6070 [(set_attr "type" "neon_store4_one_lane<q>")]
c956e102 6071)
88f77cba 6072
22f9db64
CB
6073;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6074;; here on big endian targets.
88f77cba 6075(define_insn "neon_vst4_lane<mode>"
6308e208 6076 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
88f77cba
JB
6077 (unspec:<V_four_elem>
6078 [(match_operand:XI 1 "s_register_operand" "w")
6079 (match_operand:SI 2 "immediate_operand" "i")
4b644867 6080 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
88f77cba
JB
6081 UNSPEC_VST4_LANE))]
6082 "TARGET_NEON"
6083{
22f9db64 6084 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
88f77cba
JB
6085 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6086 int regno = REGNO (operands[1]);
6087 rtx ops[6];
e68ffe57 6088 if (lane >= max / 2)
88f77cba
JB
6089 {
6090 lane -= max / 2;
6091 regno += 2;
6092 }
6093 ops[0] = operands[0];
6094 ops[1] = gen_rtx_REG (DImode, regno);
6095 ops[2] = gen_rtx_REG (DImode, regno + 4);
6096 ops[3] = gen_rtx_REG (DImode, regno + 8);
6097 ops[4] = gen_rtx_REG (DImode, regno + 12);
6098 ops[5] = GEN_INT (lane);
6308e208 6099 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
88f77cba
JB
6100 ops);
6101 return "";
c956e102 6102}
f7379e5e 6103 [(set_attr "type" "neon_store4_4reg<q>")]
c956e102 6104)
88f77cba 6105
46b57af1
TB
6106(define_insn "neon_vec_unpack<US>_lo_<mode>"
6107 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6108 (SE:<V_unpack> (vec_select:<V_HALF>
6109 (match_operand:VU 1 "register_operand" "w")
6110 (match_operand:VU 2 "vect_par_constant_low" ""))))]
0094f21b 6111 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1 6112 "vmovl.<US><V_sz_elem> %q0, %e1"
f7379e5e 6113 [(set_attr "type" "neon_shift_imm_long")]
46b57af1
TB
6114)
6115
6116(define_insn "neon_vec_unpack<US>_hi_<mode>"
6117 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6118 (SE:<V_unpack> (vec_select:<V_HALF>
6119 (match_operand:VU 1 "register_operand" "w")
6120 (match_operand:VU 2 "vect_par_constant_high" ""))))]
0094f21b 6121 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1 6122 "vmovl.<US><V_sz_elem> %q0, %f1"
f7379e5e 6123 [(set_attr "type" "neon_shift_imm_long")]
46b57af1
TB
6124)
6125
6126(define_expand "vec_unpack<US>_hi_<mode>"
cd65e265 6127 [(match_operand:<V_unpack> 0 "register_operand")
46b57af1 6128 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
0094f21b 6129 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1
TB
6130 {
6131 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6132 rtx t1;
6133 int i;
6134 for (i = 0; i < (<V_mode_nunits>/2); i++)
6135 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6136
6137 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6138 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6139 operands[1],
6140 t1));
6141 DONE;
6142 }
6143)
6144
6145(define_expand "vec_unpack<US>_lo_<mode>"
cd65e265
DZ
6146 [(match_operand:<V_unpack> 0 "register_operand")
6147 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
0094f21b 6148 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1
TB
6149 {
6150 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6151 rtx t1;
6152 int i;
6153 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6154 RTVEC_ELT (v, i) = GEN_INT (i);
6155 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6156 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6157 operands[1],
6158 t1));
6159 DONE;
6160 }
6161)
6162
6163(define_insn "neon_vec_<US>mult_lo_<mode>"
6164 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6165 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6166 (match_operand:VU 1 "register_operand" "w")
6167 (match_operand:VU 2 "vect_par_constant_low" "")))
6168 (SE:<V_unpack> (vec_select:<V_HALF>
6169 (match_operand:VU 3 "register_operand" "w")
6170 (match_dup 2)))))]
0094f21b 6171 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1 6172 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
f7379e5e 6173 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
46b57af1
TB
6174)
6175
6176(define_expand "vec_widen_<US>mult_lo_<mode>"
cd65e265
DZ
6177 [(match_operand:<V_unpack> 0 "register_operand")
6178 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6179 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
0094f21b 6180 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1
TB
6181 {
6182 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6183 rtx t1;
6184 int i;
6185 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6186 RTVEC_ELT (v, i) = GEN_INT (i);
6187 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6188
6189 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6190 operands[1],
6191 t1,
6192 operands[2]));
6193 DONE;
6194 }
6195)
6196
6197(define_insn "neon_vec_<US>mult_hi_<mode>"
6198 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6199 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6200 (match_operand:VU 1 "register_operand" "w")
6201 (match_operand:VU 2 "vect_par_constant_high" "")))
6202 (SE:<V_unpack> (vec_select:<V_HALF>
6203 (match_operand:VU 3 "register_operand" "w")
6204 (match_dup 2)))))]
0094f21b 6205 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1 6206 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
f7379e5e 6207 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
46b57af1
TB
6208)
6209
6210(define_expand "vec_widen_<US>mult_hi_<mode>"
cd65e265
DZ
6211 [(match_operand:<V_unpack> 0 "register_operand")
6212 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6213 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
0094f21b 6214 "TARGET_NEON && !BYTES_BIG_ENDIAN"
46b57af1
TB
6215 {
6216 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6217 rtx t1;
6218 int i;
6219 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6220 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6221 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6222
6223 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6224 operands[1],
6225 t1,
6226 operands[2]));
6227 DONE;
6228
6229 }
6230)
6231
36ba4aae
IR
6232(define_insn "neon_vec_<US>shiftl_<mode>"
6233 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6234 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6235 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6236 "TARGET_NEON"
6237{
6238 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6239}
f7379e5e 6240 [(set_attr "type" "neon_shift_imm_long")]
36ba4aae
IR
6241)
6242
6243(define_expand "vec_widen_<US>shiftl_lo_<mode>"
cd65e265
DZ
6244 [(match_operand:<V_unpack> 0 "register_operand")
6245 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6246 (match_operand:SI 2 "immediate_operand")]
36ba4aae
IR
6247 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6248 {
6249 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6250 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6251 operands[2]));
6252 DONE;
6253 }
6254)
6255
6256(define_expand "vec_widen_<US>shiftl_hi_<mode>"
cd65e265
DZ
6257 [(match_operand:<V_unpack> 0 "register_operand")
6258 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6259 (match_operand:SI 2 "immediate_operand")]
36ba4aae
IR
6260 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6261 {
6262 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6263 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6264 GET_MODE_SIZE (<V_HALF>mode)),
6265 operands[2]));
6266 DONE;
6267 }
6268)
6269
46b57af1
TB
6270;; Vectorize for non-neon-quad case
6271(define_insn "neon_unpack<US>_<mode>"
6272 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
7f27ec08 6273 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
46b57af1 6274 "TARGET_NEON"
7f27ec08 6275 "vmovl.<US><V_sz_elem> %q0, %P1"
f7379e5e 6276 [(set_attr "type" "neon_move")]
46b57af1
TB
6277)
6278
6279(define_expand "vec_unpack<US>_lo_<mode>"
cd65e265 6280 [(match_operand:<V_double_width> 0 "register_operand")
46b57af1
TB
6281 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6282 "TARGET_NEON"
6283{
6284 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6285 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6286 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6287
6288 DONE;
6289}
6290)
6291
6292(define_expand "vec_unpack<US>_hi_<mode>"
cd65e265 6293 [(match_operand:<V_double_width> 0 "register_operand")
46b57af1
TB
6294 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6295 "TARGET_NEON"
6296{
6297 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6298 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6299 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6300
6301 DONE;
6302}
6303)
6304
6305(define_insn "neon_vec_<US>mult_<mode>"
6306 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6307 (mult:<V_widen> (SE:<V_widen>
6308 (match_operand:VDI 1 "register_operand" "w"))
6309 (SE:<V_widen>
6310 (match_operand:VDI 2 "register_operand" "w"))))]
6311 "TARGET_NEON"
7f27ec08 6312 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
f7379e5e 6313 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
46b57af1
TB
6314)
6315
6316(define_expand "vec_widen_<US>mult_hi_<mode>"
cd65e265
DZ
6317 [(match_operand:<V_double_width> 0 "register_operand")
6318 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6319 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
46b57af1
TB
6320 "TARGET_NEON"
6321 {
6322 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6323 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6324 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6325
6326 DONE;
6327
6328 }
6329)
6330
6331(define_expand "vec_widen_<US>mult_lo_<mode>"
cd65e265
DZ
6332 [(match_operand:<V_double_width> 0 "register_operand")
6333 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6334 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
46b57af1
TB
6335 "TARGET_NEON"
6336 {
6337 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6338 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6339 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6340
6341 DONE;
6342
6343 }
6344)
0f38f229 6345
36ba4aae 6346(define_expand "vec_widen_<US>shiftl_hi_<mode>"
cd65e265
DZ
6347 [(match_operand:<V_double_width> 0 "register_operand")
6348 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6349 (match_operand:SI 2 "immediate_operand")]
36ba4aae
IR
6350 "TARGET_NEON"
6351 {
6352 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6353 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6354 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6355
6356 DONE;
6357 }
6358)
6359
6360(define_expand "vec_widen_<US>shiftl_lo_<mode>"
cd65e265
DZ
6361 [(match_operand:<V_double_width> 0 "register_operand")
6362 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6363 (match_operand:SI 2 "immediate_operand")]
36ba4aae
IR
6364 "TARGET_NEON"
6365 {
6366 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6367 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6368 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6369
6370 DONE;
6371 }
6372)
6373
0094f21b
JB
6374; FIXME: These instruction patterns can't be used safely in big-endian mode
6375; because the ordering of vector elements in Q registers is different from what
6376; the semantics of the instructions require.
6377
0f38f229
TB
6378(define_insn "vec_pack_trunc_<mode>"
6379 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6380 (vec_concat:<V_narrow_pack>
6381 (truncate:<V_narrow>
6382 (match_operand:VN 1 "register_operand" "w"))
6383 (truncate:<V_narrow>
6384 (match_operand:VN 2 "register_operand" "w"))))]
0094f21b 6385 "TARGET_NEON && !BYTES_BIG_ENDIAN"
30cecf17 6386 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
f7379e5e 6387 [(set_attr "type" "multiple")
30cecf17 6388 (set_attr "length" "8")]
0f38f229
TB
6389)
6390
6391;; For the non-quad case.
6392(define_insn "neon_vec_pack_trunc_<mode>"
6393 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
7f27ec08 6394 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
0094f21b 6395 "TARGET_NEON && !BYTES_BIG_ENDIAN"
7f27ec08 6396 "vmovn.i<V_sz_elem>\t%P0, %q1"
f7379e5e 6397 [(set_attr "type" "neon_move_narrow_q")]
0f38f229
TB
6398)
6399
6400(define_expand "vec_pack_trunc_<mode>"
cd65e265
DZ
6401 [(match_operand:<V_narrow_pack> 0 "register_operand")
6402 (match_operand:VSHFT 1 "register_operand")
0f38f229 6403 (match_operand:VSHFT 2 "register_operand")]
0094f21b 6404 "TARGET_NEON && !BYTES_BIG_ENDIAN"
0f38f229
TB
6405{
6406 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6407
6408 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6409 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6410 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6411 DONE;
6412})
bd1aa4f4
SS
6413
6414(define_insn "neon_vabd<mode>_2"
d0b6b5a7
KT
6415 [(set (match_operand:VF 0 "s_register_operand" "=w")
6416 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6417 (match_operand:VF 2 "s_register_operand" "w"))))]
bb78e587 6418 "ARM_HAVE_NEON_<MODE>_ARITH"
bd1aa4f4 6419 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
d0b6b5a7 6420 [(set_attr "type" "neon_fp_abd_s<q>")]
bd1aa4f4
SS
6421)
6422
6423(define_insn "neon_vabd<mode>_3"
d0b6b5a7
KT
6424 [(set (match_operand:VF 0 "s_register_operand" "=w")
6425 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6426 (match_operand:VF 2 "s_register_operand" "w")]
6427 UNSPEC_VSUB)))]
bb78e587 6428 "ARM_HAVE_NEON_<MODE>_ARITH"
bd1aa4f4 6429 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
d0b6b5a7 6430 [(set_attr "type" "neon_fp_abd_s<q>")]
bd1aa4f4 6431)
436016f4
DZ
6432
6433(define_insn "neon_<sup>mmlav16qi"
6434 [(set (match_operand:V4SI 0 "register_operand" "=w")
6435 (plus:V4SI
6436 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
6437 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
6438 (match_operand:V4SI 1 "register_operand" "0")))]
6439 "TARGET_I8MM"
6440 "v<sup>mmla.<mmla_sfx>\t%q0, %q2, %q3"
6441 [(set_attr "type" "neon_mla_s_q")]
6442)
eb7ba6c3
DZ
6443
6444(define_insn "neon_vbfdot<VCVTF:mode>"
6445 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6446 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6447 (unspec:VCVTF [
6448 (match_operand:<VSF2BF> 2 "register_operand" "w")
6449 (match_operand:<VSF2BF> 3 "register_operand" "w")]
6450 UNSPEC_DOT_S)))]
6451 "TARGET_BF16_SIMD"
6452 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
6453 [(set_attr "type" "neon_dot<q>")]
6454)
6455
6456(define_insn "neon_vbfdot_lanev4bf<VCVTF:mode>"
6457 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6458 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6459 (unspec:VCVTF [
6460 (match_operand:<VSF2BF> 2 "register_operand" "w")
6461 (match_operand:V4BF 3 "register_operand" "x")
6462 (match_operand:SI 4 "immediate_operand" "i")]
6463 UNSPEC_DOT_S)))]
6464 "TARGET_BF16_SIMD"
6465 "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"
6466 [(set_attr "type" "neon_dot<q>")]
6467)
6468
6469(define_insn "neon_vbfdot_lanev8bf<VCVTF:mode>"
6470 [(set (match_operand:VCVTF 0 "register_operand" "=w")
6471 (plus:VCVTF (match_operand:VCVTF 1 "register_operand" "0")
6472 (unspec:VCVTF [
6473 (match_operand:<VSF2BF> 2 "register_operand" "w")
6474 (match_operand:V8BF 3 "register_operand" "x")
6475 (match_operand:SI 4 "immediate_operand" "i")]
6476 UNSPEC_DOT_S)))]
6477 "TARGET_BF16_SIMD"
6478 {
6479 int lane = INTVAL (operands[4]);
6480 int half = GET_MODE_NUNITS (GET_MODE (operands[3])) / 4;
6481 if (lane < half)
6482 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %e3[%c4]";
6483 else
6484 {
6485 operands[4] = GEN_INT (lane - half);
6486 return "vdot.bf16\\t%<V_reg>0, %<V_reg>2, %f3[%c4]";
6487 }
6488 }
6489 [(set_attr "type" "neon_dot<q>")]
6490)
8e6d0dba
DZ
6491
6492(define_insn "neon_vbfcvtv4sf<VBFCVT:mode>"
6493 [(set (match_operand:VBFCVT 0 "register_operand" "=w")
6494 (unspec:VBFCVT [(match_operand:V4SF 1 "register_operand" "w")]
6495 UNSPEC_BFCVT))]
6496 "TARGET_BF16_SIMD"
6497 "vcvt.bf16.f32\\t%<V_bf_low>0, %q1"
6498 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6499)
6500
6501(define_insn "neon_vbfcvtv4sf_highv8bf"
6502 [(set (match_operand:V8BF 0 "register_operand" "=w")
6503 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
6504 (match_operand:V4SF 2 "register_operand" "w")]
6505 UNSPEC_BFCVT_HIGH))]
6506 "TARGET_BF16_SIMD"
6507 "vcvt.bf16.f32\\t%f0, %q2"
6508 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
6509)
6510
6511(define_insn "neon_vbfcvtsf"
6512 [(set (match_operand:BF 0 "register_operand" "=t")
6513 (unspec:BF [(match_operand:SF 1 "register_operand" "t")]
6514 UNSPEC_BFCVT))]
6515 "TARGET_BF16_FP"
6516 "vcvtb.bf16.f32\\t%0, %1"
6517 [(set_attr "type" "f_cvt")]
6518)
6519
6520(define_insn "neon_vbfcvt<VBFCVT:mode>"
6521 [(set (match_operand:V4SF 0 "register_operand" "=w")
6522 (unspec:V4SF [(match_operand:VBFCVT 1 "register_operand" "w")]
6523 UNSPEC_BFCVT))]
6524 "TARGET_BF16_SIMD"
6525 "vshll.u32\\t%q0, %<V_bf_low>1, #16"
6526 [(set_attr "type" "neon_shift_imm_q")]
6527)
6528
6529(define_insn "neon_vbfcvt_highv8bf"
6530 [(set (match_operand:V4SF 0 "register_operand" "=w")
6531 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
6532 UNSPEC_BFCVT_HIGH))]
6533 "TARGET_BF16_SIMD"
6534 "vshll.u32\\t%q0, %f1, #16"
6535 [(set_attr "type" "neon_shift_imm_q")]
6536)
6537
6538;; Convert a BF scalar operand to SF via VSHL.
6539;; VSHL doesn't accept 32-bit registers where the BF and SF scalar operands
6540;; would be allocated, therefore the operands must be converted to intermediate
6541;; vectors (i.e. V2SI) in order to apply 64-bit registers.
6542(define_expand "neon_vbfcvtbf"
6543 [(match_operand:SF 0 "register_operand")
6544 (unspec:SF [(match_operand:BF 1 "register_operand")] UNSPEC_BFCVT)]
6545 "TARGET_BF16_FP"
6546{
6547 rtx op0 = gen_reg_rtx (V2SImode);
6548 rtx op1 = gen_reg_rtx (V2SImode);
6549 emit_insn (gen_neon_vbfcvtbf_cvtmodev2si (op1, operands[1]));
6550 emit_insn (gen_neon_vshl_nv2si (op0, op1, gen_int_mode(16, SImode)));
6551 emit_insn (gen_neon_vbfcvtbf_cvtmodesf (operands[0], op0));
6552 DONE;
6553})
6554
6555;; Convert BF mode to V2SI and V2SI to SF.
6556;; Implement this by allocating a 32-bit operand in the low half of a 64-bit
6557;; register indexed by a 32-bit sub-register number.
6558;; This will generate reloads but compiler can optimize out the moves.
6559;; Use 'x' constraint to guarantee the 32-bit sub-registers in an indexable
6560;; range so that to avoid extra moves.
6561(define_insn "neon_vbfcvtbf_cvtmode<mode>"
6562 [(set (match_operand:VBFCVTM 0 "register_operand" "=x")
6563 (unspec:VBFCVTM [(match_operand:<V_bf_cvt_m> 1 "register_operand" "0")]
6564 UNSPEC_BFCVT))]
6565 "TARGET_BF16_FP"
6566 ""
6567)
2d22ab64
KT
6568
6569(define_insn "neon_vmmlav8bf"
6570 [(set (match_operand:V4SF 0 "register_operand" "=w")
6571 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
6572 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6573 (match_operand:V8BF 3 "register_operand" "w")]
6574 UNSPEC_BFMMLA)))]
6575 "TARGET_BF16_SIMD"
6576 "vmmla.bf16\\t%q0, %q2, %q3"
6577 [(set_attr "type" "neon_fp_mla_s_q")]
6578)
6579
6580(define_insn "neon_vfma<bt>v8bf"
6581 [(set (match_operand:V4SF 0 "register_operand" "=w")
6582 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6583 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6584 (match_operand:V8BF 3 "register_operand" "w")]
6585 BF_MA)))]
6586 "TARGET_BF16_SIMD"
6587 "vfma<bt>.bf16\\t%q0, %q2, %q3"
6588 [(set_attr "type" "neon_fp_mla_s_q")]
6589)
6590
6591(define_insn "neon_vfma<bt>_lanev8bf"
6592 [(set (match_operand:V4SF 0 "register_operand" "=w")
6593 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6594 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6595 (match_operand:V4BF 3 "register_operand" "x")
6596 (match_operand:SI 4 "const_int_operand" "n")]
6597 BF_MA)))]
6598 "TARGET_BF16_SIMD"
6599 "vfma<bt>.bf16\\t%q0, %q2, %P3[%c4]"
6600 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6601)
6602
6603(define_expand "neon_vfma<bt>_laneqv8bf"
6604 [(set (match_operand:V4SF 0 "register_operand" "=w")
6605 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
6606 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
6607 (match_operand:V8BF 3 "register_operand" "x")
6608 (match_operand:SI 4 "const_int_operand" "n")]
6609 BF_MA)))]
6610 "TARGET_BF16_SIMD"
6611 {
6612 int lane = INTVAL (operands[4]);
6613 gcc_assert (IN_RANGE(lane, 0, 7));
6614 if (lane < 4)
6615 {
6616 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], operands[3], operands[4]));
6617 }
6618 else
6619 {
6620 rtx op_highpart = gen_reg_rtx (V4BFmode);
6621 emit_insn (gen_neon_vget_highv8bf (op_highpart, operands[3]));
6622 operands[4] = GEN_INT (lane - 4);
6623 emit_insn (gen_neon_vfma<bt>_lanev8bf (operands[0], operands[1], operands[2], op_highpart, operands[4]));
6624 }
6625 DONE;
6626 }
6627 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
6628)