]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-simd.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
CommitLineData
43e9d192 1;; Machine description for AArch64 AdvSIMD architecture.
a5544970 2;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
43e9d192
IB
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
43e9d192 21(define_expand "mov<mode>"
71a11456
AL
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
43e9d192
IB
24 "TARGET_SIMD"
25 "
bb5d9711
JW
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
6a70badb 34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
bb5d9711 35 && aarch64_mem_pair_operand (operands[0], DImode))
6a70badb 36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
43e9d192
IB
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39)
40
41(define_expand "movmisalign<mode>"
348d4b0a
BC
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
43e9d192
IB
44 "TARGET_SIMD"
45{
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52})
53
54(define_insn "aarch64_simd_dup<mode>"
a844a695
AL
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
dca19fe1 57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
43e9d192 58 "TARGET_SIMD"
91bd4114 59 "@
dca19fe1
WD
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
91bd4114
JG
63)
64
65(define_insn "aarch64_simd_dup<mode>"
862abc04
AL
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
91bd4114
JG
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
78ec3036 71 [(set_attr "type" "neon_dup<q>")]
43e9d192
IB
72)
73
74(define_insn "aarch64_dup_lane<mode>"
862abc04
AL
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
43e9d192 77 (vec_select:<VEL>
862abc04 78 (match_operand:VALL_F16 1 "register_operand" "w")
43e9d192
IB
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
1dd055a2 82 {
7ac29c0f 83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1dd055a2
JG
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
78ec3036 86 [(set_attr "type" "neon_dup<q>")]
43e9d192
IB
87)
88
91bd4114 89(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
88119b46
KT
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
91bd4114
JG
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
43e9d192 96 "TARGET_SIMD"
1dd055a2 97 {
7ac29c0f 98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1dd055a2
JG
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
78ec3036 101 [(set_attr "type" "neon_dup<q>")]
43e9d192
IB
102)
103
0d1cf538 104(define_insn "*aarch64_simd_mov<VD:mode>"
348d4b0a 105 [(set (match_operand:VD 0 "nonimmediate_operand"
ea58eb88 106 "=w, m, m, w, ?r, ?w, ?r, w")
348d4b0a 107 (match_operand:VD 1 "general_operand"
ea58eb88 108 "m, Dz, w, w, w, r, r, Dn"))]
43e9d192
IB
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
ea58eb88 111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
43e9d192
IB
112{
113 switch (which_alternative)
114 {
ea58eb88
JW
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
b187677b 123 return aarch64_output_simd_mov_immediate (operands[1], 64);
43e9d192
IB
124 default: gcc_unreachable ();
125 }
126}
0d1cf538 127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
d78607f5
TC
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
43e9d192
IB
130)
131
0d1cf538 132(define_insn "*aarch64_simd_mov<VQ:mode>"
348d4b0a 133 [(set (match_operand:VQ 0 "nonimmediate_operand"
a25831ac 134 "=w, Umn, m, w, ?r, ?w, ?r, w")
348d4b0a 135 (match_operand:VQ 1 "general_operand"
ea58eb88 136 "m, Dz, w, w, w, r, r, Dn"))]
43e9d192
IB
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
ea58eb88 139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
43e9d192 140{
3520f7cc
JG
141 switch (which_alternative)
142 {
c59b7e28 143 case 0:
ea58eb88 144 return "ldr\t%q0, %1";
c59b7e28 145 case 1:
ea58eb88 146 return "stp\txzr, xzr, %0";
c59b7e28 147 case 2:
ea58eb88 148 return "str\t%q1, %0";
c59b7e28 149 case 3:
ea58eb88 150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
c59b7e28
SN
151 case 4:
152 case 5:
3520f7cc 153 case 6:
ea58eb88
JW
154 return "#";
155 case 7:
b187677b 156 return aarch64_output_simd_mov_immediate (operands[1], 128);
c59b7e28
SN
157 default:
158 gcc_unreachable ();
3520f7cc 159 }
43e9d192 160}
0d1cf538
DI
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
ea58eb88
JW
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
43e9d192
IB
165)
166
d8395013
KT
167;; When storing lane zero we can use the normal STR and its more permissive
168;; addressing modes.
169
170(define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
441b4d0f 175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
d8395013
KT
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178)
179
dfe1da23
JW
180(define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
abc52318
KT
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
dfe1da23 189 GET_MODE_SIZE (<DREG:MODE>mode)))"
abc52318
KT
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192)
193
dfe1da23
JW
194(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
abc52318
KT
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
dfe1da23 203 GET_MODE_SIZE (<DREG:MODE>mode)))"
abc52318
KT
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206)
207
9f5361c8
KT
208(define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %1"
219 [(set_attr "type" "neon_ldp_q")]
220)
221
222(define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %0"
232 [(set_attr "type" "neon_stp_q")]
233)
234
235
43e9d192
IB
236(define_split
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
2d8c6dc1 242 [(const_int 0)]
43e9d192 243{
2d8c6dc1
AH
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245 DONE;
43e9d192
IB
246})
247
fd4842cd
SN
248(define_split
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254 [(const_int 0)]
255{
256 aarch64_split_simd_move (operands[0], operands[1]);
257 DONE;
258})
259
0016d8d9 260(define_expand "@aarch64_split_simd_mov<mode>"
fd4842cd
SN
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
263 "TARGET_SIMD"
264 {
265 rtx dst = operands[0];
266 rtx src = operands[1];
267
268 if (GP_REGNUM_P (REGNO (src)))
269 {
c59b7e28
SN
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
fd4842cd
SN
272
273 emit_insn
c59b7e28 274 (gen_move_lo_quad_<mode> (dst, src_low_part));
fd4842cd 275 emit_insn
c59b7e28 276 (gen_move_hi_quad_<mode> (dst, src_high_part));
fd4842cd
SN
277 }
278
279 else
280 {
c59b7e28
SN
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
f5cbabc1
RS
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
fd4842cd
SN
285
286 emit_insn
c59b7e28 287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
fd4842cd 288 emit_insn
c59b7e28 289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
fd4842cd
SN
290 }
291 DONE;
292 }
293)
294
fd4842cd
SN
295(define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297 (vec_select:<VHALF>
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
301 "umov\t%0, %1.d[0]"
78ec3036 302 [(set_attr "type" "neon_to_gp<q>")
fd4842cd
SN
303 (set_attr "length" "4")
304 ])
305
306(define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308 (vec_select:<VHALF>
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
312 "umov\t%0, %1.d[1]"
78ec3036 313 [(set_attr "type" "neon_to_gp<q>")
fd4842cd
SN
314 (set_attr "length" "4")
315 ])
316
43e9d192 317(define_insn "orn<mode>3"
a844a695
AL
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
321 "TARGET_SIMD"
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
78ec3036 323 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
324)
325
326(define_insn "bic<mode>3"
a844a695
AL
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
330 "TARGET_SIMD"
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
78ec3036 332 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
333)
334
335(define_insn "add<mode>3"
a844a695
AL
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
339 "TARGET_SIMD"
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 341 [(set_attr "type" "neon_add<q>")]
43e9d192
IB
342)
343
344(define_insn "sub<mode>3"
a844a695
AL
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
348 "TARGET_SIMD"
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 350 [(set_attr "type" "neon_sub<q>")]
43e9d192
IB
351)
352
353(define_insn "mul<mode>3"
a844a695
AL
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
43e9d192
IB
357 "TARGET_SIMD"
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 359 [(set_attr "type" "neon_mul_<Vetype><q>")]
43e9d192
IB
360)
361
7eb2bd57 362(define_insn "bswap<mode>2"
c7f28cd5
KT
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365 "TARGET_SIMD"
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
368)
369
cf465d71
AL
370(define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373 UNSPEC_RBIT))]
374 "TARGET_SIMD"
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
377)
378
5e32e83b
JW
379(define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
382 "TARGET_SIMD"
383 {
7eb2bd57 384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
5e32e83b
JW
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386 <MODE>mode, 0);
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389 DONE;
390 }
391)
392
4261463d
TC
393(define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
397 "TARGET_SIMD"
398{
399
5f565314 400 machine_mode imode = <V_INT_EQUIV>mode;
4261463d
TC
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
404
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
407
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
409
410 emit_move_insn (v_bitmask,
5f565314 411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
4261463d
TC
412 HOST_WIDE_INT_M1U << bits));
413
5f565314
RS
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
4261463d
TC
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
418 DONE;
419}
420)
421
7a08d813
TC
422;; These instructions map to the __builtins for the Dot Product operations.
423(define_insn "aarch64_<sur>dot<vsi2qi>"
424 [(set (match_operand:VS 0 "register_operand" "=w")
425 (plus:VS (match_operand:VS 1 "register_operand" "0")
426 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
427 (match_operand:<VSI2QI> 3 "register_operand" "w")]
428 DOTPROD)))]
429 "TARGET_DOTPROD"
430 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
431 [(set_attr "type" "neon_dot")]
432)
433
434;; These expands map to the Dot Product optab the vectorizer checks for.
435;; The auto-vectorizer expects a dot product builtin that also does an
436;; accumulation into the provided register.
437;; Given the following pattern
438;;
439;; for (i=0; i<len; i++) {
440;; c = a[i] * b[i];
441;; r += c;
442;; }
443;; return result;
444;;
445;; This can be auto-vectorized to
446;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
447;;
448;; given enough iterations. However the vectorizer can keep unrolling the loop
449;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
450;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
451;; ...
452;;
453;; and so the vectorizer provides r, in which the result has to be accumulated.
454(define_expand "<sur>dot_prod<vsi2qi>"
455 [(set (match_operand:VS 0 "register_operand")
456 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
457 (match_operand:<VSI2QI> 2 "register_operand")]
458 DOTPROD)
459 (match_operand:VS 3 "register_operand")))]
460 "TARGET_DOTPROD"
461{
462 emit_insn (
463 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
464 operands[2]));
465 emit_insn (gen_rtx_SET (operands[0], operands[3]));
466 DONE;
467})
468
469;; These instructions map to the __builtins for the Dot Product
470;; indexed operations.
471(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
472 [(set (match_operand:VS 0 "register_operand" "=w")
473 (plus:VS (match_operand:VS 1 "register_operand" "0")
474 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
475 (match_operand:V8QI 3 "register_operand" "<h_con>")
476 (match_operand:SI 4 "immediate_operand" "i")]
477 DOTPROD)))]
478 "TARGET_DOTPROD"
479 {
441b4d0f 480 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
7a08d813
TC
481 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
482 }
483 [(set_attr "type" "neon_dot")]
484)
485
486(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
487 [(set (match_operand:VS 0 "register_operand" "=w")
488 (plus:VS (match_operand:VS 1 "register_operand" "0")
489 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
490 (match_operand:V16QI 3 "register_operand" "<h_con>")
491 (match_operand:SI 4 "immediate_operand" "i")]
492 DOTPROD)))]
493 "TARGET_DOTPROD"
494 {
441b4d0f 495 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
7a08d813
TC
496 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
497 }
498 [(set_attr "type" "neon_dot")]
499)
500
ab014eb3
TC
501(define_expand "copysign<mode>3"
502 [(match_operand:VHSDF 0 "register_operand")
503 (match_operand:VHSDF 1 "register_operand")
504 (match_operand:VHSDF 2 "register_operand")]
505 "TARGET_FLOAT && TARGET_SIMD"
506{
5f565314 507 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
ab014eb3
TC
508 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
509
510 emit_move_insn (v_bitmask,
5f565314 511 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
ab014eb3
TC
512 HOST_WIDE_INT_M1U << bits));
513 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
514 operands[2], operands[1]));
515 DONE;
516}
517)
518
779aea46
JG
519(define_insn "*aarch64_mul3_elt<mode>"
520 [(set (match_operand:VMUL 0 "register_operand" "=w")
521 (mult:VMUL
522 (vec_duplicate:VMUL
523 (vec_select:<VEL>
524 (match_operand:VMUL 1 "register_operand" "<h_con>")
525 (parallel [(match_operand:SI 2 "immediate_operand")])))
526 (match_operand:VMUL 3 "register_operand" "w")))]
527 "TARGET_SIMD"
1dd055a2 528 {
7ac29c0f 529 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1dd055a2
JG
530 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
531 }
ab2e8f01 532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
779aea46
JG
533)
534
535(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
536 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
537 (mult:VMUL_CHANGE_NLANES
538 (vec_duplicate:VMUL_CHANGE_NLANES
539 (vec_select:<VEL>
540 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
541 (parallel [(match_operand:SI 2 "immediate_operand")])))
542 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
543 "TARGET_SIMD"
1dd055a2 544 {
7ac29c0f 545 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1dd055a2
JG
546 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
547 }
78ec3036 548 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
779aea46
JG
549)
550
22330033
JW
551(define_insn "*aarch64_mul3_elt_from_dup<mode>"
552 [(set (match_operand:VMUL 0 "register_operand" "=w")
553 (mult:VMUL
554 (vec_duplicate:VMUL
555 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
556 (match_operand:VMUL 2 "register_operand" "w")))]
779aea46 557 "TARGET_SIMD"
22330033 558 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
ab2e8f01 559 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
779aea46
JG
560)
561
0016d8d9 562(define_insn "@aarch64_rsqrte<mode>"
d7f33f07
JW
563 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
564 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
a6fc00da
BH
565 UNSPEC_RSQRTE))]
566 "TARGET_SIMD"
567 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
daef0a8c 568 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
a6fc00da 569
0016d8d9 570(define_insn "@aarch64_rsqrts<mode>"
68ad28c3
JW
571 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
572 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
33d72b63 574 UNSPEC_RSQRTS))]
a6fc00da
BH
575 "TARGET_SIMD"
576 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
33d72b63 577 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
a6fc00da 578
ee62a5a6 579(define_expand "rsqrt<mode>2"
a6fc00da
BH
580 [(set (match_operand:VALLF 0 "register_operand" "=w")
581 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
582 UNSPEC_RSQRT))]
583 "TARGET_SIMD"
584{
98daafa0 585 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
a6fc00da
BH
586 DONE;
587})
588
779aea46
JG
589(define_insn "*aarch64_mul3_elt_to_64v2df"
590 [(set (match_operand:DF 0 "register_operand" "=w")
591 (mult:DF
592 (vec_select:DF
593 (match_operand:V2DF 1 "register_operand" "w")
594 (parallel [(match_operand:SI 2 "immediate_operand")]))
595 (match_operand:DF 3 "register_operand" "w")))]
596 "TARGET_SIMD"
1dd055a2 597 {
7ac29c0f 598 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1dd055a2
JG
599 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
600 }
78ec3036 601 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
779aea46
JG
602)
603
43e9d192 604(define_insn "neg<mode>2"
a844a695
AL
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
43e9d192
IB
607 "TARGET_SIMD"
608 "neg\t%0.<Vtype>, %1.<Vtype>"
78ec3036 609 [(set_attr "type" "neon_neg<q>")]
43e9d192
IB
610)
611
612(define_insn "abs<mode>2"
a844a695
AL
613 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
614 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
43e9d192
IB
615 "TARGET_SIMD"
616 "abs\t%0.<Vtype>, %1.<Vtype>"
78ec3036 617 [(set_attr "type" "neon_abs<q>")]
43e9d192
IB
618)
619
285398d2
JG
620;; The intrinsic version of integer ABS must not be allowed to
621;; combine with any operation with an integerated ABS step, such
622;; as SABD.
623(define_insn "aarch64_abs<mode>"
624 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
625 (unspec:VSDQ_I_DI
626 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
627 UNSPEC_ABS))]
628 "TARGET_SIMD"
629 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
630 [(set_attr "type" "neon_abs<q>")]
631)
632
fa2e9a58
N
633(define_insn "abd<mode>_3"
634 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
635 (abs:VDQ_BHSI (minus:VDQ_BHSI
636 (match_operand:VDQ_BHSI 1 "register_operand" "w")
637 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
638 "TARGET_SIMD"
639 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 640 [(set_attr "type" "neon_abd<q>")]
fa2e9a58
N
641)
642
75add2d0
KT
643(define_insn "aarch64_<sur>abdl2<mode>_3"
644 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
645 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
646 (match_operand:VDQV_S 2 "register_operand" "w")]
647 ABDL2))]
648 "TARGET_SIMD"
649 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
650 [(set_attr "type" "neon_abd<q>")]
651)
652
653(define_insn "aarch64_<sur>abal<mode>_4"
654 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
655 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
656 (match_operand:VDQV_S 2 "register_operand" "w")
657 (match_operand:<VDBLW> 3 "register_operand" "0")]
658 ABAL))]
659 "TARGET_SIMD"
660 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
661 [(set_attr "type" "neon_arith_acc<q>")]
662)
663
664(define_insn "aarch64_<sur>adalp<mode>_3"
665 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
666 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
667 (match_operand:<VDBLW> 2 "register_operand" "0")]
668 ADALP))]
669 "TARGET_SIMD"
670 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
671 [(set_attr "type" "neon_reduc_add<q>")]
672)
673
674;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
675;; inputs in operands 1 and 2. The sequence also has to perform a widening
676;; reduction of the difference into a V4SI vector and accumulate that into
677;; operand 3 before copying that into the result operand 0.
678;; Perform that with a sequence of:
679;; UABDL2 tmp.8h, op1.16b, op2.16b
680;; UABAL tmp.8h, op1.16b, op2.16b
681;; UADALP op3.4s, tmp.8h
682;; MOV op0, op3 // should be eliminated in later passes.
683;; The signed version just uses the signed variants of the above instructions.
684
685(define_expand "<sur>sadv16qi"
686 [(use (match_operand:V4SI 0 "register_operand"))
687 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
688 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
689 (use (match_operand:V4SI 3 "register_operand"))]
690 "TARGET_SIMD"
691 {
692 rtx reduc = gen_reg_rtx (V8HImode);
693 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
694 operands[2]));
695 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
696 operands[2], reduc));
697 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
698 operands[3]));
699 emit_move_insn (operands[0], operands[3]);
700 DONE;
701 }
702)
703
fa2e9a58
N
704(define_insn "aba<mode>_3"
705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
706 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
707 (match_operand:VDQ_BHSI 1 "register_operand" "w")
708 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
709 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
710 "TARGET_SIMD"
711 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 712 [(set_attr "type" "neon_arith_acc<q>")]
fa2e9a58
N
713)
714
a672fa12 715(define_insn "fabd<mode>3"
68ad28c3
JW
716 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
717 (abs:VHSDF_HSDF
718 (minus:VHSDF_HSDF
719 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
720 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1d0c8e5c 721 "TARGET_SIMD"
a672fa12 722 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
33d72b63 723 [(set_attr "type" "neon_fp_abd_<stype><q>")]
1d0c8e5c
VP
724)
725
ab6501d7 726;; For AND (vector, register) and BIC (vector, immediate)
43e9d192 727(define_insn "and<mode>3"
ab6501d7
SD
728 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
729 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
730 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
43e9d192 731 "TARGET_SIMD"
ab6501d7
SD
732 {
733 switch (which_alternative)
734 {
735 case 0:
736 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
737 case 1:
b187677b
RS
738 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
739 AARCH64_CHECK_BIC);
ab6501d7
SD
740 default:
741 gcc_unreachable ();
742 }
743 }
78ec3036 744 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
745)
746
ab6501d7 747;; For ORR (vector, register) and ORR (vector, immediate)
43e9d192 748(define_insn "ior<mode>3"
ab6501d7
SD
749 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
750 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
751 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
43e9d192 752 "TARGET_SIMD"
ab6501d7
SD
753 {
754 switch (which_alternative)
755 {
756 case 0:
757 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
758 case 1:
b187677b
RS
759 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
760 AARCH64_CHECK_ORR);
ab6501d7
SD
761 default:
762 gcc_unreachable ();
763 }
764 }
78ec3036 765 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
766)
767
768(define_insn "xor<mode>3"
a844a695
AL
769 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
772 "TARGET_SIMD"
773 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
78ec3036 774 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
775)
776
777(define_insn "one_cmpl<mode>2"
a844a695
AL
778 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
43e9d192
IB
780 "TARGET_SIMD"
781 "not\t%0.<Vbtype>, %1.<Vbtype>"
78ec3036 782 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
783)
784
785(define_insn "aarch64_simd_vec_set<mode>"
8364e58b
KT
786 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
787 (vec_merge:VALL_F16
788 (vec_duplicate:VALL_F16
789 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
790 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
960ceebc 791 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
43e9d192 792 "TARGET_SIMD"
e58bf20a 793 {
7ac29c0f 794 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
e58bf20a
TB
795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
796 switch (which_alternative)
797 {
798 case 0:
e58bf20a 799 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
8364e58b
KT
800 case 1:
801 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
960ceebc
AL
802 case 2:
803 return "ld1\\t{%0.<Vetype>}[%p2], %1";
e58bf20a
TB
804 default:
805 gcc_unreachable ();
806 }
807 }
8364e58b 808 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
43e9d192
IB
809)
810
9bd62242 811(define_insn "*aarch64_simd_vec_copy_lane<mode>"
b160939b
KT
812 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
813 (vec_merge:VALL_F16
814 (vec_duplicate:VALL_F16
9bd62242 815 (vec_select:<VEL>
b160939b 816 (match_operand:VALL_F16 3 "register_operand" "w")
9bd62242
JG
817 (parallel
818 [(match_operand:SI 4 "immediate_operand" "i")])))
b160939b 819 (match_operand:VALL_F16 1 "register_operand" "0")
9bd62242
JG
820 (match_operand:SI 2 "immediate_operand" "i")))]
821 "TARGET_SIMD"
822 {
441b4d0f 823 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
9bd62242 824 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
441b4d0f 825 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9bd62242
JG
826
827 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
828 }
829 [(set_attr "type" "neon_ins<q>")]
830)
831
832(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
88119b46
KT
833 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
834 (vec_merge:VALL_F16_NO_V2Q
835 (vec_duplicate:VALL_F16_NO_V2Q
9bd62242
JG
836 (vec_select:<VEL>
837 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
838 (parallel
839 [(match_operand:SI 4 "immediate_operand" "i")])))
88119b46 840 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
9bd62242
JG
841 (match_operand:SI 2 "immediate_operand" "i")))]
842 "TARGET_SIMD"
843 {
441b4d0f 844 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
9bd62242 845 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
441b4d0f
RS
846 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
847 INTVAL (operands[4]));
9bd62242
JG
848
849 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
850 }
851 [(set_attr "type" "neon_ins<q>")]
852)
853
43e9d192 854(define_insn "aarch64_simd_lshr<mode>"
a844a695
AL
855 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
856 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
857 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
43e9d192
IB
858 "TARGET_SIMD"
859 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
78ec3036 860 [(set_attr "type" "neon_shift_imm<q>")]
43e9d192
IB
861)
862
863(define_insn "aarch64_simd_ashr<mode>"
a844a695
AL
864 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
865 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
866 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
43e9d192
IB
867 "TARGET_SIMD"
868 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
78ec3036 869 [(set_attr "type" "neon_shift_imm<q>")]
43e9d192
IB
870)
871
872(define_insn "aarch64_simd_imm_shl<mode>"
a844a695
AL
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
43e9d192
IB
876 "TARGET_SIMD"
877 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
78ec3036 878 [(set_attr "type" "neon_shift_imm<q>")]
43e9d192
IB
879)
880
881(define_insn "aarch64_simd_reg_sshl<mode>"
a844a695
AL
882 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
883 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
884 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
885 "TARGET_SIMD"
886 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 887 [(set_attr "type" "neon_shift_reg<q>")]
43e9d192
IB
888)
889
890(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
a844a695
AL
891 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
892 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
893 (match_operand:VDQ_I 2 "register_operand" "w")]
43e9d192
IB
894 UNSPEC_ASHIFT_UNSIGNED))]
895 "TARGET_SIMD"
896 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 897 [(set_attr "type" "neon_shift_reg<q>")]
43e9d192
IB
898)
899
900(define_insn "aarch64_simd_reg_shl<mode>_signed"
a844a695
AL
901 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
902 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
903 (match_operand:VDQ_I 2 "register_operand" "w")]
43e9d192
IB
904 UNSPEC_ASHIFT_SIGNED))]
905 "TARGET_SIMD"
906 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 907 [(set_attr "type" "neon_shift_reg<q>")]
43e9d192
IB
908)
909
910(define_expand "ashl<mode>3"
a844a695
AL
911 [(match_operand:VDQ_I 0 "register_operand" "")
912 (match_operand:VDQ_I 1 "register_operand" "")
43e9d192
IB
913 (match_operand:SI 2 "general_operand" "")]
914 "TARGET_SIMD"
915{
916 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
917 int shift_amount;
918
919 if (CONST_INT_P (operands[2]))
920 {
921 shift_amount = INTVAL (operands[2]);
922 if (shift_amount >= 0 && shift_amount < bit_width)
923 {
924 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
925 shift_amount);
926 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
927 operands[1],
928 tmp));
929 DONE;
930 }
931 else
932 {
933 operands[2] = force_reg (SImode, operands[2]);
934 }
935 }
936 else if (MEM_P (operands[2]))
937 {
938 operands[2] = force_reg (SImode, operands[2]);
939 }
940
941 if (REG_P (operands[2]))
942 {
943 rtx tmp = gen_reg_rtx (<MODE>mode);
944 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
945 convert_to_mode (<VEL>mode,
946 operands[2],
947 0)));
948 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
949 tmp));
950 DONE;
951 }
952 else
953 FAIL;
954}
955)
956
957(define_expand "lshr<mode>3"
a844a695
AL
958 [(match_operand:VDQ_I 0 "register_operand" "")
959 (match_operand:VDQ_I 1 "register_operand" "")
43e9d192
IB
960 (match_operand:SI 2 "general_operand" "")]
961 "TARGET_SIMD"
962{
963 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
964 int shift_amount;
965
966 if (CONST_INT_P (operands[2]))
967 {
968 shift_amount = INTVAL (operands[2]);
969 if (shift_amount > 0 && shift_amount <= bit_width)
970 {
971 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
972 shift_amount);
973 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
974 operands[1],
975 tmp));
976 DONE;
977 }
978 else
979 operands[2] = force_reg (SImode, operands[2]);
980 }
981 else if (MEM_P (operands[2]))
982 {
983 operands[2] = force_reg (SImode, operands[2]);
984 }
985
986 if (REG_P (operands[2]))
987 {
988 rtx tmp = gen_reg_rtx (SImode);
989 rtx tmp1 = gen_reg_rtx (<MODE>mode);
990 emit_insn (gen_negsi2 (tmp, operands[2]));
991 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
992 convert_to_mode (<VEL>mode,
993 tmp, 0)));
994 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
995 operands[1],
996 tmp1));
997 DONE;
998 }
999 else
1000 FAIL;
1001}
1002)
1003
1004(define_expand "ashr<mode>3"
a844a695
AL
1005 [(match_operand:VDQ_I 0 "register_operand" "")
1006 (match_operand:VDQ_I 1 "register_operand" "")
43e9d192
IB
1007 (match_operand:SI 2 "general_operand" "")]
1008 "TARGET_SIMD"
1009{
1010 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1011 int shift_amount;
1012
1013 if (CONST_INT_P (operands[2]))
1014 {
1015 shift_amount = INTVAL (operands[2]);
1016 if (shift_amount > 0 && shift_amount <= bit_width)
1017 {
1018 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1019 shift_amount);
1020 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1021 operands[1],
1022 tmp));
1023 DONE;
1024 }
1025 else
1026 operands[2] = force_reg (SImode, operands[2]);
1027 }
1028 else if (MEM_P (operands[2]))
1029 {
1030 operands[2] = force_reg (SImode, operands[2]);
1031 }
1032
1033 if (REG_P (operands[2]))
1034 {
1035 rtx tmp = gen_reg_rtx (SImode);
1036 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037 emit_insn (gen_negsi2 (tmp, operands[2]));
1038 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039 convert_to_mode (<VEL>mode,
1040 tmp, 0)));
1041 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1042 operands[1],
1043 tmp1));
1044 DONE;
1045 }
1046 else
1047 FAIL;
1048}
1049)
1050
1051(define_expand "vashl<mode>3"
a844a695
AL
1052 [(match_operand:VDQ_I 0 "register_operand" "")
1053 (match_operand:VDQ_I 1 "register_operand" "")
1054 (match_operand:VDQ_I 2 "register_operand" "")]
43e9d192
IB
1055 "TARGET_SIMD"
1056{
1057 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1058 operands[2]));
1059 DONE;
1060})
1061
a844a695 1062;; Using mode VDQ_BHSI as there is no V2DImode neg!
43e9d192
IB
1063;; Negating individual lanes most certainly offsets the
1064;; gain from vectorization.
1065(define_expand "vashr<mode>3"
a844a695
AL
1066 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067 (match_operand:VDQ_BHSI 1 "register_operand" "")
1068 (match_operand:VDQ_BHSI 2 "register_operand" "")]
43e9d192
IB
1069 "TARGET_SIMD"
1070{
1071 rtx neg = gen_reg_rtx (<MODE>mode);
1072 emit (gen_neg<mode>2 (neg, operands[2]));
1073 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1074 neg));
1075 DONE;
1076})
1077
f9a4c9a6
AV
1078;; DI vector shift
1079(define_expand "aarch64_ashr_simddi"
1080 [(match_operand:DI 0 "register_operand" "=w")
1081 (match_operand:DI 1 "register_operand" "w")
179d2941 1082 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
f9a4c9a6
AV
1083 "TARGET_SIMD"
1084 {
b5b34d37
AL
1085 /* An arithmetic shift right by 64 fills the result with copies of the sign
1086 bit, just like asr by 63 - however the standard pattern does not handle
1087 a shift by 64. */
f9a4c9a6 1088 if (INTVAL (operands[2]) == 64)
b5b34d37
AL
1089 operands[2] = GEN_INT (63);
1090 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
f9a4c9a6
AV
1091 DONE;
1092 }
1093)
1094
43e9d192 1095(define_expand "vlshr<mode>3"
a844a695
AL
1096 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097 (match_operand:VDQ_BHSI 1 "register_operand" "")
1098 (match_operand:VDQ_BHSI 2 "register_operand" "")]
43e9d192
IB
1099 "TARGET_SIMD"
1100{
1101 rtx neg = gen_reg_rtx (<MODE>mode);
1102 emit (gen_neg<mode>2 (neg, operands[2]));
1103 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1104 neg));
1105 DONE;
1106})
1107
252c7556
AV
1108(define_expand "aarch64_lshr_simddi"
1109 [(match_operand:DI 0 "register_operand" "=w")
1110 (match_operand:DI 1 "register_operand" "w")
1111 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1112 "TARGET_SIMD"
1113 {
1114 if (INTVAL (operands[2]) == 64)
84488801 1115 emit_move_insn (operands[0], const0_rtx);
252c7556
AV
1116 else
1117 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1118 DONE;
1119 }
1120)
1121
0b4eefd5
AL
1122;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123(define_insn "vec_shr_<mode>"
1124 [(set (match_operand:VD 0 "register_operand" "=w")
9c004c58
RL
1125 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126 (match_operand:SI 2 "immediate_operand" "i")]
1127 UNSPEC_VEC_SHR))]
0b4eefd5
AL
1128 "TARGET_SIMD"
1129 {
1130 if (BYTES_BIG_ENDIAN)
9c004c58 1131 return "shl %d0, %d1, %2";
0b4eefd5
AL
1132 else
1133 return "ushr %d0, %d1, %2";
1134 }
1135 [(set_attr "type" "neon_shift_imm")]
1136)
1137
43e9d192 1138(define_expand "vec_set<mode>"
8364e58b 1139 [(match_operand:VALL_F16 0 "register_operand" "+w")
43e9d192
IB
1140 (match_operand:<VEL> 1 "register_operand" "w")
1141 (match_operand:SI 2 "immediate_operand" "")]
1142 "TARGET_SIMD"
1143 {
1144 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146 GEN_INT (elem), operands[0]));
1147 DONE;
1148 }
1149)
1150
1151
1152(define_insn "aarch64_mla<mode>"
a844a695
AL
1153 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154 (plus:VDQ_BHSI (mult:VDQ_BHSI
1155 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
43e9d192
IB
1158 "TARGET_SIMD"
1159 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 1160 [(set_attr "type" "neon_mla_<Vetype><q>")]
43e9d192
IB
1161)
1162
828e70c1
JG
1163(define_insn "*aarch64_mla_elt<mode>"
1164 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1165 (plus:VDQHS
1166 (mult:VDQHS
1167 (vec_duplicate:VDQHS
1168 (vec_select:<VEL>
1169 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170 (parallel [(match_operand:SI 2 "immediate_operand")])))
1171 (match_operand:VDQHS 3 "register_operand" "w"))
1172 (match_operand:VDQHS 4 "register_operand" "0")))]
1173 "TARGET_SIMD"
1dd055a2 1174 {
7ac29c0f 1175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1dd055a2
JG
1176 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1177 }
78ec3036 1178 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1179)
1180
1181(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1183 (plus:VDQHS
1184 (mult:VDQHS
1185 (vec_duplicate:VDQHS
1186 (vec_select:<VEL>
1187 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188 (parallel [(match_operand:SI 2 "immediate_operand")])))
1189 (match_operand:VDQHS 3 "register_operand" "w"))
1190 (match_operand:VDQHS 4 "register_operand" "0")))]
1191 "TARGET_SIMD"
1dd055a2 1192 {
7ac29c0f 1193 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1dd055a2
JG
1194 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1195 }
78ec3036 1196 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1197)
1198
4b40986c
JW
1199(define_insn "*aarch64_mla_elt_merge<mode>"
1200 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201 (plus:VDQHS
1202 (mult:VDQHS (vec_duplicate:VDQHS
3ec5b5f0 1203 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
4b40986c
JW
1204 (match_operand:VDQHS 2 "register_operand" "w"))
1205 (match_operand:VDQHS 3 "register_operand" "0")))]
1206 "TARGET_SIMD"
1207 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209)
1210
43e9d192 1211(define_insn "aarch64_mls<mode>"
a844a695
AL
1212 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
43e9d192
IB
1216 "TARGET_SIMD"
1217 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 1218 [(set_attr "type" "neon_mla_<Vetype><q>")]
43e9d192
IB
1219)
1220
828e70c1
JG
1221(define_insn "*aarch64_mls_elt<mode>"
1222 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1223 (minus:VDQHS
1224 (match_operand:VDQHS 4 "register_operand" "0")
1225 (mult:VDQHS
1226 (vec_duplicate:VDQHS
1227 (vec_select:<VEL>
1228 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229 (parallel [(match_operand:SI 2 "immediate_operand")])))
1230 (match_operand:VDQHS 3 "register_operand" "w"))))]
1231 "TARGET_SIMD"
1dd055a2 1232 {
7ac29c0f 1233 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1dd055a2
JG
1234 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1235 }
78ec3036 1236 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1237)
1238
1239(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1241 (minus:VDQHS
1242 (match_operand:VDQHS 4 "register_operand" "0")
1243 (mult:VDQHS
1244 (vec_duplicate:VDQHS
1245 (vec_select:<VEL>
1246 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247 (parallel [(match_operand:SI 2 "immediate_operand")])))
1248 (match_operand:VDQHS 3 "register_operand" "w"))))]
1249 "TARGET_SIMD"
1dd055a2 1250 {
7ac29c0f 1251 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1dd055a2
JG
1252 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1253 }
78ec3036 1254 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1255)
1256
4b40986c
JW
1257(define_insn "*aarch64_mls_elt_merge<mode>"
1258 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1259 (minus:VDQHS
1260 (match_operand:VDQHS 1 "register_operand" "0")
1261 (mult:VDQHS (vec_duplicate:VDQHS
3ec5b5f0 1262 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
4b40986c
JW
1263 (match_operand:VDQHS 3 "register_operand" "w"))))]
1264 "TARGET_SIMD"
1265 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1267)
1268
43e9d192 1269;; Max/Min operations.
998eaf97 1270(define_insn "<su><maxmin><mode>3"
a844a695
AL
1271 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
43e9d192 1274 "TARGET_SIMD"
998eaf97 1275 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 1276 [(set_attr "type" "neon_minmax<q>")]
43e9d192
IB
1277)
1278
da9c88d8
RL
1279(define_expand "<su><maxmin>v2di3"
1280 [(set (match_operand:V2DI 0 "register_operand" "")
1281 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282 (match_operand:V2DI 2 "register_operand" "")))]
1283 "TARGET_SIMD"
1284{
1285 enum rtx_code cmp_operator;
1286 rtx cmp_fmt;
1287
1288 switch (<CODE>)
1289 {
1290 case UMIN:
1291 cmp_operator = LTU;
1292 break;
1293 case SMIN:
1294 cmp_operator = LT;
1295 break;
1296 case UMAX:
1297 cmp_operator = GTU;
1298 break;
1299 case SMAX:
1300 cmp_operator = GT;
1301 break;
1302 default:
1303 gcc_unreachable ();
1304 }
1305
1306 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
6c553b76 1307 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
da9c88d8
RL
1308 operands[2], cmp_fmt, operands[1], operands[2]));
1309 DONE;
1310})
1311
7abab3d1
FY
1312;; Pairwise Integer Max/Min operations.
1313(define_insn "aarch64_<maxmin_uns>p<mode>"
1314 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1317 MAXMINV))]
1318 "TARGET_SIMD"
1319 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320 [(set_attr "type" "neon_minmax<q>")]
1321)
1322
1323;; Pairwise FP Max/Min operations.
1324(define_insn "aarch64_<maxmin_uns>p<mode>"
33d72b63
JW
1325 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327 (match_operand:VHSDF 2 "register_operand" "w")]
1328 FMAXMINV))]
7abab3d1
FY
1329 "TARGET_SIMD"
1330 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331 [(set_attr "type" "neon_minmax<q>")]
1332)
1333
5a908485
JG
1334;; vec_concat gives a new vector with the low elements from operand 1, and
1335;; the high elements from operand 2. That is to say, given op1 = { a, b }
1336;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337;; What that means, is that the RTL descriptions of the below patterns
1338;; need to change depending on endianness.
43e9d192 1339
5a908485
JG
1340;; Move to the low architectural bits of the register.
1341;; On little-endian this is { operand, zeroes }
1342;; On big-endian this is { zeroes, operand }
1343
1344(define_insn "move_lo_quad_internal_<mode>"
51437269
GW
1345 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1346 (vec_concat:VQ_NO2E
c59b7e28 1347 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
43e9d192 1348 (vec_duplicate:<VHALF> (const_int 0))))]
5a908485
JG
1349 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1350 "@
1351 dup\\t%d0, %1.d[0]
1352 fmov\\t%d0, %1
1353 dup\\t%d0, %1"
1354 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
488461d8
MM
1355 (set_attr "length" "4")
1356 (set_attr "arch" "simd,fp,simd")]
5a908485
JG
1357)
1358
51437269
GW
1359(define_insn "move_lo_quad_internal_<mode>"
1360 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1361 (vec_concat:VQ_2E
1362 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1363 (const_int 0)))]
1364 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1365 "@
1366 dup\\t%d0, %1.d[0]
1367 fmov\\t%d0, %1
1368 dup\\t%d0, %1"
1369 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
488461d8
MM
1370 (set_attr "length" "4")
1371 (set_attr "arch" "simd,fp,simd")]
51437269
GW
1372)
1373
5a908485 1374(define_insn "move_lo_quad_internal_be_<mode>"
51437269
GW
1375 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1376 (vec_concat:VQ_NO2E
5a908485
JG
1377 (vec_duplicate:<VHALF> (const_int 0))
1378 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1379 "TARGET_SIMD && BYTES_BIG_ENDIAN"
c59b7e28 1380 "@
7af26def
SN
1381 dup\\t%d0, %1.d[0]
1382 fmov\\t%d0, %1
1383 dup\\t%d0, %1"
d7256bb8 1384 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
488461d8
MM
1385 (set_attr "length" "4")
1386 (set_attr "arch" "simd,fp,simd")]
43e9d192
IB
1387)
1388
51437269
GW
1389(define_insn "move_lo_quad_internal_be_<mode>"
1390 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1391 (vec_concat:VQ_2E
1392 (const_int 0)
1393 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1394 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1395 "@
1396 dup\\t%d0, %1.d[0]
1397 fmov\\t%d0, %1
1398 dup\\t%d0, %1"
1399 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
488461d8
MM
1400 (set_attr "length" "4")
1401 (set_attr "arch" "simd,fp,simd")]
51437269
GW
1402)
1403
5a908485
JG
1404(define_expand "move_lo_quad_<mode>"
1405 [(match_operand:VQ 0 "register_operand")
1406 (match_operand:VQ 1 "register_operand")]
1407 "TARGET_SIMD"
1408{
1409 if (BYTES_BIG_ENDIAN)
1410 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1411 else
1412 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1413 DONE;
1414}
1415)
1416
1417;; Move operand1 to the high architectural bits of the register, keeping
1418;; the low architectural bits of operand2.
1419;; For little-endian this is { operand2, operand1 }
1420;; For big-endian this is { operand1, operand2 }
43e9d192
IB
1421
1422(define_insn "aarch64_simd_move_hi_quad_<mode>"
c59b7e28 1423 [(set (match_operand:VQ 0 "register_operand" "+w,w")
43e9d192
IB
1424 (vec_concat:VQ
1425 (vec_select:<VHALF>
1426 (match_dup 0)
1427 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
c59b7e28 1428 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
5a908485 1429 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
c59b7e28
SN
1430 "@
1431 ins\\t%0.d[1], %1.d[0]
1432 ins\\t%0.d[1], %1"
5a908485
JG
1433 [(set_attr "type" "neon_ins")]
1434)
1435
1436(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1437 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1438 (vec_concat:VQ
1439 (match_operand:<VHALF> 1 "register_operand" "w,r")
1440 (vec_select:<VHALF>
1441 (match_dup 0)
988fa693 1442 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
5a908485
JG
1443 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1444 "@
1445 ins\\t%0.d[1], %1.d[0]
1446 ins\\t%0.d[1], %1"
1447 [(set_attr "type" "neon_ins")]
43e9d192
IB
1448)
1449
1450(define_expand "move_hi_quad_<mode>"
1451 [(match_operand:VQ 0 "register_operand" "")
1452 (match_operand:<VHALF> 1 "register_operand" "")]
1453 "TARGET_SIMD"
1454{
f5cbabc1 1455 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
5a908485
JG
1456 if (BYTES_BIG_ENDIAN)
1457 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1458 operands[1], p));
1459 else
1460 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1461 operands[1], p));
43e9d192
IB
1462 DONE;
1463})
1464
1465;; Narrowing operations.
1466
1467;; For doubles.
1468(define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1469 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1470 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1471 "TARGET_SIMD"
1472 "xtn\\t%0.<Vntype>, %1.<Vtype>"
78ec3036 1473 [(set_attr "type" "neon_shift_imm_narrow_q")]
43e9d192
IB
1474)
1475
1476(define_expand "vec_pack_trunc_<mode>"
1477 [(match_operand:<VNARROWD> 0 "register_operand" "")
1478 (match_operand:VDN 1 "register_operand" "")
1479 (match_operand:VDN 2 "register_operand" "")]
1480 "TARGET_SIMD"
1481{
1482 rtx tempreg = gen_reg_rtx (<VDBL>mode);
8fcc1c1f
TB
1483 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1484 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
43e9d192 1485
8fcc1c1f
TB
1486 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1487 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
43e9d192
IB
1488 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1489 DONE;
1490})
1491
1492;; For quads.
1493
1494(define_insn "vec_pack_trunc_<mode>"
7973ca62 1495 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
43e9d192
IB
1496 (vec_concat:<VNARROWQ2>
1497 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1498 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1499 "TARGET_SIMD"
8fcc1c1f
TB
1500 {
1501 if (BYTES_BIG_ENDIAN)
1502 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1503 else
1504 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1505 }
78ec3036 1506 [(set_attr "type" "multiple")
43e9d192
IB
1507 (set_attr "length" "8")]
1508)
1509
1510;; Widening operations.
1511
1512(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1513 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1514 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1515 (match_operand:VQW 1 "register_operand" "w")
1516 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1517 )))]
1518 "TARGET_SIMD"
77b8fb05 1519 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
78ec3036 1520 [(set_attr "type" "neon_shift_imm_long")]
43e9d192
IB
1521)
1522
1523(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1524 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1525 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1526 (match_operand:VQW 1 "register_operand" "w")
1527 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1528 )))]
1529 "TARGET_SIMD"
77b8fb05 1530 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
78ec3036 1531 [(set_attr "type" "neon_shift_imm_long")]
43e9d192
IB
1532)
1533
1534(define_expand "vec_unpack<su>_hi_<mode>"
1535 [(match_operand:<VWIDE> 0 "register_operand" "")
1536 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1537 "TARGET_SIMD"
1538 {
f5cbabc1 1539 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
1540 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1541 operands[1], p));
1542 DONE;
1543 }
1544)
1545
1546(define_expand "vec_unpack<su>_lo_<mode>"
1547 [(match_operand:<VWIDE> 0 "register_operand" "")
1548 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1549 "TARGET_SIMD"
1550 {
f5cbabc1 1551 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
43e9d192
IB
1552 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1553 operands[1], p));
1554 DONE;
1555 }
1556)
1557
1558;; Widening arithmetic.
1559
24244e4d
VP
1560(define_insn "*aarch64_<su>mlal_lo<mode>"
1561 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1562 (plus:<VWIDE>
1563 (mult:<VWIDE>
1564 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1565 (match_operand:VQW 2 "register_operand" "w")
1566 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1567 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1568 (match_operand:VQW 4 "register_operand" "w")
1569 (match_dup 3))))
1570 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1571 "TARGET_SIMD"
1572 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
78ec3036 1573 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
1574)
1575
1576(define_insn "*aarch64_<su>mlal_hi<mode>"
1577 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1578 (plus:<VWIDE>
1579 (mult:<VWIDE>
1580 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581 (match_operand:VQW 2 "register_operand" "w")
1582 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1583 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584 (match_operand:VQW 4 "register_operand" "w")
1585 (match_dup 3))))
1586 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1587 "TARGET_SIMD"
1588 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
78ec3036 1589 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
1590)
1591
1592(define_insn "*aarch64_<su>mlsl_lo<mode>"
1593 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1594 (minus:<VWIDE>
1595 (match_operand:<VWIDE> 1 "register_operand" "0")
1596 (mult:<VWIDE>
1597 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1598 (match_operand:VQW 2 "register_operand" "w")
1599 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1600 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1601 (match_operand:VQW 4 "register_operand" "w")
1602 (match_dup 3))))))]
1603 "TARGET_SIMD"
1604 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
78ec3036 1605 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
1606)
1607
1608(define_insn "*aarch64_<su>mlsl_hi<mode>"
1609 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1610 (minus:<VWIDE>
1611 (match_operand:<VWIDE> 1 "register_operand" "0")
1612 (mult:<VWIDE>
1613 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1614 (match_operand:VQW 2 "register_operand" "w")
1615 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1616 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1617 (match_operand:VQW 4 "register_operand" "w")
1618 (match_dup 3))))))]
1619 "TARGET_SIMD"
1620 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
78ec3036 1621 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
1622)
1623
1624(define_insn "*aarch64_<su>mlal<mode>"
1625 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1626 (plus:<VWIDE>
1627 (mult:<VWIDE>
1628 (ANY_EXTEND:<VWIDE>
a844a695 1629 (match_operand:VD_BHSI 1 "register_operand" "w"))
24244e4d 1630 (ANY_EXTEND:<VWIDE>
a844a695 1631 (match_operand:VD_BHSI 2 "register_operand" "w")))
24244e4d
VP
1632 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1633 "TARGET_SIMD"
1634 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 1635 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
1636)
1637
1638(define_insn "*aarch64_<su>mlsl<mode>"
1639 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1640 (minus:<VWIDE>
1641 (match_operand:<VWIDE> 1 "register_operand" "0")
1642 (mult:<VWIDE>
1643 (ANY_EXTEND:<VWIDE>
a844a695 1644 (match_operand:VD_BHSI 2 "register_operand" "w"))
24244e4d 1645 (ANY_EXTEND:<VWIDE>
a844a695 1646 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
24244e4d
VP
1647 "TARGET_SIMD"
1648 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 1649 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
1650)
1651
43e9d192
IB
1652(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1654 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1655 (match_operand:VQW 1 "register_operand" "w")
1656 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1657 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658 (match_operand:VQW 2 "register_operand" "w")
1659 (match_dup 3)))))]
1660 "TARGET_SIMD"
a02ad1aa 1661 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
78ec3036 1662 [(set_attr "type" "neon_mul_<Vetype>_long")]
43e9d192
IB
1663)
1664
1665(define_expand "vec_widen_<su>mult_lo_<mode>"
1666 [(match_operand:<VWIDE> 0 "register_operand" "")
1667 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1668 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1669 "TARGET_SIMD"
1670 {
f5cbabc1 1671 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
43e9d192
IB
1672 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1673 operands[1],
1674 operands[2], p));
1675 DONE;
1676 }
1677)
1678
1679(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1680 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1681 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682 (match_operand:VQW 1 "register_operand" "w")
1683 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685 (match_operand:VQW 2 "register_operand" "w")
1686 (match_dup 3)))))]
1687 "TARGET_SIMD"
a02ad1aa 1688 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 1689 [(set_attr "type" "neon_mul_<Vetype>_long")]
43e9d192
IB
1690)
1691
1692(define_expand "vec_widen_<su>mult_hi_<mode>"
1693 [(match_operand:<VWIDE> 0 "register_operand" "")
1694 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1695 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1696 "TARGET_SIMD"
1697 {
f5cbabc1 1698 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
1699 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1700 operands[1],
1701 operands[2], p));
1702 DONE;
1703
1704 }
1705)
1706
1707;; FP vector operations.
1708;; AArch64 AdvSIMD supports single-precision (32-bit) and
1709;; double-precision (64-bit) floating-point data types and arithmetic as
1710;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1711;; without the need for -ffast-math or -funsafe-math-optimizations.
1712;;
1713;; Floating-point operations can raise an exception. Vectorizing such
1714;; operations are safe because of reasons explained below.
1715;;
1716;; ARMv8 permits an extension to enable trapped floating-point
1717;; exception handling, however this is an optional feature. In the
1718;; event of a floating-point exception being raised by vectorised
1719;; code then:
1720;; 1. If trapped floating-point exceptions are available, then a trap
1721;; will be taken when any lane raises an enabled exception. A trap
1722;; handler may determine which lane raised the exception.
1723;; 2. Alternatively a sticky exception flag is set in the
1724;; floating-point status register (FPSR). Software may explicitly
1725;; test the exception flags, in which case the tests will either
1726;; prevent vectorisation, allowing precise identification of the
1727;; failing operation, or if tested outside of vectorisable regions
1728;; then the specific operation and lane are not of interest.
1729
1730;; FP arithmetic operations.
1731
1732(define_insn "add<mode>3"
33d72b63
JW
1733 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1734 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1735 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192
IB
1736 "TARGET_SIMD"
1737 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 1738 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
43e9d192
IB
1739)
1740
1741(define_insn "sub<mode>3"
33d72b63
JW
1742 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1743 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1744 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192
IB
1745 "TARGET_SIMD"
1746 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 1747 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
43e9d192
IB
1748)
1749
1750(define_insn "mul<mode>3"
33d72b63
JW
1751 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1752 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1753 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192
IB
1754 "TARGET_SIMD"
1755 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 1756 [(set_attr "type" "neon_fp_mul_<stype><q>")]
43e9d192
IB
1757)
1758
79a2bc2d 1759(define_expand "div<mode>3"
33d72b63
JW
1760 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1761 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1762 (match_operand:VHSDF 2 "register_operand" "w")))]
79a2bc2d
EM
1763 "TARGET_SIMD"
1764{
1765 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1766 DONE;
1767
1768 operands[1] = force_reg (<MODE>mode, operands[1]);
1769})
1770
1771(define_insn "*div<mode>3"
33d72b63
JW
1772 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1774 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192
IB
1775 "TARGET_SIMD"
1776 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 1777 [(set_attr "type" "neon_fp_div_<stype><q>")]
43e9d192
IB
1778)
1779
1780(define_insn "neg<mode>2"
daef0a8c
JW
1781 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1782 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
43e9d192
IB
1783 "TARGET_SIMD"
1784 "fneg\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 1785 [(set_attr "type" "neon_fp_neg_<stype><q>")]
43e9d192
IB
1786)
1787
1788(define_insn "abs<mode>2"
daef0a8c
JW
1789 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1790 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
43e9d192
IB
1791 "TARGET_SIMD"
1792 "fabs\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 1793 [(set_attr "type" "neon_fp_abs_<stype><q>")]
43e9d192
IB
1794)
1795
1796(define_insn "fma<mode>4"
89ed6d5f
JW
1797 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1798 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1799 (match_operand:VHSDF 2 "register_operand" "w")
1800 (match_operand:VHSDF 3 "register_operand" "0")))]
43e9d192
IB
1801 "TARGET_SIMD"
1802 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
89ed6d5f 1803 [(set_attr "type" "neon_fp_mla_<stype><q>")]
43e9d192
IB
1804)
1805
828e70c1
JG
1806(define_insn "*aarch64_fma4_elt<mode>"
1807 [(set (match_operand:VDQF 0 "register_operand" "=w")
1808 (fma:VDQF
1809 (vec_duplicate:VDQF
1810 (vec_select:<VEL>
1811 (match_operand:VDQF 1 "register_operand" "<h_con>")
1812 (parallel [(match_operand:SI 2 "immediate_operand")])))
1813 (match_operand:VDQF 3 "register_operand" "w")
1814 (match_operand:VDQF 4 "register_operand" "0")))]
1815 "TARGET_SIMD"
1dd055a2 1816 {
7ac29c0f 1817 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1dd055a2
JG
1818 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1819 }
78ec3036 1820 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1821)
1822
1823(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1824 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1825 (fma:VDQSF
1826 (vec_duplicate:VDQSF
1827 (vec_select:<VEL>
1828 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1829 (parallel [(match_operand:SI 2 "immediate_operand")])))
1830 (match_operand:VDQSF 3 "register_operand" "w")
1831 (match_operand:VDQSF 4 "register_operand" "0")))]
1832 "TARGET_SIMD"
1dd055a2 1833 {
7ac29c0f 1834 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1dd055a2
JG
1835 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1836 }
78ec3036 1837 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1838)
1839
98b3a5f2
JW
1840(define_insn "*aarch64_fma4_elt_from_dup<mode>"
1841 [(set (match_operand:VMUL 0 "register_operand" "=w")
1842 (fma:VMUL
1843 (vec_duplicate:VMUL
6d06971d 1844 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
98b3a5f2
JW
1845 (match_operand:VMUL 2 "register_operand" "w")
1846 (match_operand:VMUL 3 "register_operand" "0")))]
828e70c1 1847 "TARGET_SIMD"
98b3a5f2 1848 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
ab2e8f01 1849 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
828e70c1
JG
1850)
1851
1852(define_insn "*aarch64_fma4_elt_to_64v2df"
1853 [(set (match_operand:DF 0 "register_operand" "=w")
1854 (fma:DF
1855 (vec_select:DF
1856 (match_operand:V2DF 1 "register_operand" "w")
1857 (parallel [(match_operand:SI 2 "immediate_operand")]))
1858 (match_operand:DF 3 "register_operand" "w")
1859 (match_operand:DF 4 "register_operand" "0")))]
1860 "TARGET_SIMD"
1dd055a2 1861 {
7ac29c0f 1862 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1dd055a2
JG
1863 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1864 }
78ec3036 1865 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
828e70c1
JG
1866)
1867
1868(define_insn "fnma<mode>4"
89ed6d5f
JW
1869 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1870 (fma:VHSDF
a7fef189
SE
1871 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1872 (match_operand:VHSDF 2 "register_operand" "w")
89ed6d5f 1873 (match_operand:VHSDF 3 "register_operand" "0")))]
828e70c1 1874 "TARGET_SIMD"
89ed6d5f
JW
1875 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1876 [(set_attr "type" "neon_fp_mla_<stype><q>")]
828e70c1
JG
1877)
1878
1879(define_insn "*aarch64_fnma4_elt<mode>"
1880 [(set (match_operand:VDQF 0 "register_operand" "=w")
1881 (fma:VDQF
1882 (neg:VDQF
1883 (match_operand:VDQF 3 "register_operand" "w"))
1884 (vec_duplicate:VDQF
1885 (vec_select:<VEL>
1886 (match_operand:VDQF 1 "register_operand" "<h_con>")
1887 (parallel [(match_operand:SI 2 "immediate_operand")])))
1888 (match_operand:VDQF 4 "register_operand" "0")))]
1889 "TARGET_SIMD"
1dd055a2 1890 {
7ac29c0f 1891 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1dd055a2
JG
1892 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1893 }
78ec3036 1894 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1895)
1896
1897(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1898 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1899 (fma:VDQSF
1900 (neg:VDQSF
1901 (match_operand:VDQSF 3 "register_operand" "w"))
1902 (vec_duplicate:VDQSF
1903 (vec_select:<VEL>
1904 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1905 (parallel [(match_operand:SI 2 "immediate_operand")])))
1906 (match_operand:VDQSF 4 "register_operand" "0")))]
1907 "TARGET_SIMD"
1dd055a2 1908 {
7ac29c0f 1909 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1dd055a2
JG
1910 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1911 }
78ec3036 1912 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1913)
1914
98b3a5f2
JW
1915(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1916 [(set (match_operand:VMUL 0 "register_operand" "=w")
1917 (fma:VMUL
1918 (neg:VMUL
1919 (match_operand:VMUL 2 "register_operand" "w"))
1920 (vec_duplicate:VMUL
6d06971d 1921 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
98b3a5f2 1922 (match_operand:VMUL 3 "register_operand" "0")))]
828e70c1 1923 "TARGET_SIMD"
98b3a5f2 1924 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
ab2e8f01 1925 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
828e70c1
JG
1926)
1927
1928(define_insn "*aarch64_fnma4_elt_to_64v2df"
1929 [(set (match_operand:DF 0 "register_operand" "=w")
1930 (fma:DF
1931 (vec_select:DF
1932 (match_operand:V2DF 1 "register_operand" "w")
1933 (parallel [(match_operand:SI 2 "immediate_operand")]))
1934 (neg:DF
1935 (match_operand:DF 3 "register_operand" "w"))
1936 (match_operand:DF 4 "register_operand" "0")))]
1937 "TARGET_SIMD"
1dd055a2 1938 {
7ac29c0f 1939 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1dd055a2
JG
1940 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1941 }
78ec3036 1942 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
828e70c1
JG
1943)
1944
0659ce6f 1945;; Vector versions of the floating-point frint patterns.
74dc11ed 1946;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
0659ce6f 1947(define_insn "<frint_pattern><mode>2"
daef0a8c
JW
1948 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1949 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1950 FRINT))]
42fc9a7f
JG
1951 "TARGET_SIMD"
1952 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 1953 [(set_attr "type" "neon_fp_round_<stype><q>")]
42fc9a7f
JG
1954)
1955
ce966824
JG
1956;; Vector versions of the fcvt standard patterns.
1957;; Expands to lbtrunc, lround, lceil, lfloor
daef0a8c 1958(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
42fc9a7f
JG
1959 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1960 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
daef0a8c 1961 [(match_operand:VHSDF 1 "register_operand" "w")]
42fc9a7f
JG
1962 FCVT)))]
1963 "TARGET_SIMD"
1964 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 1965 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
42fc9a7f
JG
1966)
1967
d7f33f07
JW
1968;; HF Scalar variants of related SIMD instructions.
1969(define_insn "l<fcvt_pattern><su_optab>hfhi2"
1970 [(set (match_operand:HI 0 "register_operand" "=w")
1971 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1972 FCVT)))]
1973 "TARGET_SIMD_F16INST"
1974 "fcvt<frint_suffix><su>\t%h0, %h1"
1975 [(set_attr "type" "neon_fp_to_int_s")]
1976)
1977
1978(define_insn "<optab>_trunchfhi2"
1979 [(set (match_operand:HI 0 "register_operand" "=w")
1980 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1981 "TARGET_SIMD_F16INST"
1982 "fcvtz<su>\t%h0, %h1"
1983 [(set_attr "type" "neon_fp_to_int_s")]
1984)
1985
1986(define_insn "<optab>hihf2"
1987 [(set (match_operand:HF 0 "register_operand" "=w")
1988 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1989 "TARGET_SIMD_F16INST"
1990 "<su_optab>cvtf\t%h0, %h1"
1991 [(set_attr "type" "neon_int_to_fp_s")]
1992)
1993
39252973
KT
1994(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1995 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1996 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1997 [(mult:VDQF
1998 (match_operand:VDQF 1 "register_operand" "w")
1999 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2000 UNSPEC_FRINTZ)))]
2001 "TARGET_SIMD
2002 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2003 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2004 {
2005 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2006 char buf[64];
2007 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2008 output_asm_insn (buf, operands);
2009 return "";
2010 }
2011 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2012)
2013
daef0a8c 2014(define_expand "<optab><VHSDF:mode><fcvt_target>2"
384be29f
JG
2015 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2016 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
daef0a8c
JW
2017 [(match_operand:VHSDF 1 "register_operand")]
2018 UNSPEC_FRINTZ)))]
384be29f
JG
2019 "TARGET_SIMD"
2020 {})
2021
daef0a8c 2022(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
384be29f
JG
2023 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2024 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
daef0a8c
JW
2025 [(match_operand:VHSDF 1 "register_operand")]
2026 UNSPEC_FRINTZ)))]
384be29f
JG
2027 "TARGET_SIMD"
2028 {})
2029
daef0a8c
JW
2030(define_expand "ftrunc<VHSDF:mode>2"
2031 [(set (match_operand:VHSDF 0 "register_operand")
2032 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2033 UNSPEC_FRINTZ))]
384be29f
JG
2034 "TARGET_SIMD"
2035 {})
2036
daef0a8c
JW
2037(define_insn "<optab><fcvt_target><VHSDF:mode>2"
2038 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2039 (FLOATUORS:VHSDF
1709ff9b
JG
2040 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2041 "TARGET_SIMD"
2042 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 2043 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1709ff9b
JG
2044)
2045
4c871069
JG
2046;; Conversions between vectors of floats and doubles.
2047;; Contains a mix of patterns to match standard pattern names
2048;; and those for intrinsics.
2049
2050;; Float widening operations.
2051
03873eb9
AL
2052(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2053 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2054 (float_extend:<VWIDE> (vec_select:<VHALF>
2055 (match_operand:VQ_HSF 1 "register_operand" "w")
2056 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2057 )))]
4c871069 2058 "TARGET_SIMD"
03873eb9 2059 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
78ec3036 2060 [(set_attr "type" "neon_fp_cvt_widen_s")]
4c871069
JG
2061)
2062
2644d4d9
JW
2063;; Convert between fixed-point and floating-point (vector modes)
2064
33d72b63
JW
2065(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2066 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2067 (unspec:<VHSDF:FCVT_TARGET>
2068 [(match_operand:VHSDF 1 "register_operand" "w")
2069 (match_operand:SI 2 "immediate_operand" "i")]
2644d4d9
JW
2070 FCVT_F2FIXED))]
2071 "TARGET_SIMD"
2072 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
33d72b63 2073 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2644d4d9
JW
2074)
2075
33d72b63
JW
2076(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2077 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2078 (unspec:<VDQ_HSDI:FCVT_TARGET>
2079 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2080 (match_operand:SI 2 "immediate_operand" "i")]
2644d4d9
JW
2081 FCVT_FIXED2F))]
2082 "TARGET_SIMD"
2083 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
33d72b63 2084 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2644d4d9
JW
2085)
2086
d5d27976
JG
2087;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2088;; is inconsistent with vector ordering elsewhere in the compiler, in that
2089;; the meaning of HI and LO changes depending on the target endianness.
2090;; While elsewhere we map the higher numbered elements of a vector to
2091;; the lower architectural lanes of the vector, for these patterns we want
2092;; to always treat "hi" as referring to the higher architectural lanes.
2093;; Consequently, while the patterns below look inconsistent with our
9c582551 2094;; other big-endian patterns their behavior is as required.
d5d27976 2095
03873eb9
AL
2096(define_expand "vec_unpacks_lo_<mode>"
2097 [(match_operand:<VWIDE> 0 "register_operand" "")
2098 (match_operand:VQ_HSF 1 "register_operand" "")]
4c871069 2099 "TARGET_SIMD"
03873eb9 2100 {
f5cbabc1 2101 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
03873eb9
AL
2102 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2103 operands[1], p));
2104 DONE;
2105 }
2106)
2107
2108(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2109 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2110 (float_extend:<VWIDE> (vec_select:<VHALF>
2111 (match_operand:VQ_HSF 1 "register_operand" "w")
2112 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2113 )))]
2114 "TARGET_SIMD"
2115 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
78ec3036 2116 [(set_attr "type" "neon_fp_cvt_widen_s")]
4c871069
JG
2117)
2118
03873eb9
AL
2119(define_expand "vec_unpacks_hi_<mode>"
2120 [(match_operand:<VWIDE> 0 "register_operand" "")
2121 (match_operand:VQ_HSF 1 "register_operand" "")]
2122 "TARGET_SIMD"
2123 {
f5cbabc1 2124 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
03873eb9
AL
2125 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2126 operands[1], p));
2127 DONE;
2128 }
2129)
2130(define_insn "aarch64_float_extend_lo_<Vwide>"
2131 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2132 (float_extend:<VWIDE>
2133 (match_operand:VDF 1 "register_operand" "w")))]
4c871069 2134 "TARGET_SIMD"
03873eb9 2135 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
78ec3036 2136 [(set_attr "type" "neon_fp_cvt_widen_s")]
4c871069
JG
2137)
2138
2139;; Float narrowing operations.
2140
922f9c25
AL
2141(define_insn "aarch64_float_truncate_lo_<mode>"
2142 [(set (match_operand:VDF 0 "register_operand" "=w")
2143 (float_truncate:VDF
2144 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4c871069 2145 "TARGET_SIMD"
922f9c25 2146 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
78ec3036 2147 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
4c871069
JG
2148)
2149
d5d27976 2150(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
922f9c25
AL
2151 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2152 (vec_concat:<VDBL>
2153 (match_operand:VDF 1 "register_operand" "0")
2154 (float_truncate:VDF
2155 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
d5d27976 2156 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
922f9c25 2157 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
78ec3036 2158 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
4c871069
JG
2159)
2160
d5d27976
JG
2161(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2162 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2163 (vec_concat:<VDBL>
2164 (float_truncate:VDF
2165 (match_operand:<VWIDE> 2 "register_operand" "w"))
2166 (match_operand:VDF 1 "register_operand" "0")))]
2167 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2168 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2169 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2170)
2171
2172(define_expand "aarch64_float_truncate_hi_<Vdbl>"
2173 [(match_operand:<VDBL> 0 "register_operand" "=w")
2174 (match_operand:VDF 1 "register_operand" "0")
2175 (match_operand:<VWIDE> 2 "register_operand" "w")]
2176 "TARGET_SIMD"
2177{
2178 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2179 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2180 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2181 emit_insn (gen (operands[0], operands[1], operands[2]));
2182 DONE;
2183}
2184)
2185
4c871069
JG
2186(define_expand "vec_pack_trunc_v2df"
2187 [(set (match_operand:V4SF 0 "register_operand")
2188 (vec_concat:V4SF
2189 (float_truncate:V2SF
2190 (match_operand:V2DF 1 "register_operand"))
2191 (float_truncate:V2SF
2192 (match_operand:V2DF 2 "register_operand"))
2193 ))]
2194 "TARGET_SIMD"
2195 {
2196 rtx tmp = gen_reg_rtx (V2SFmode);
8fcc1c1f
TB
2197 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2198 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2199
2200 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
4c871069 2201 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
8fcc1c1f 2202 tmp, operands[hi]));
4c871069
JG
2203 DONE;
2204 }
2205)
2206
2207(define_expand "vec_pack_trunc_df"
2208 [(set (match_operand:V2SF 0 "register_operand")
2209 (vec_concat:V2SF
2210 (float_truncate:SF
2211 (match_operand:DF 1 "register_operand"))
2212 (float_truncate:SF
2213 (match_operand:DF 2 "register_operand"))
2214 ))]
2215 "TARGET_SIMD"
2216 {
2217 rtx tmp = gen_reg_rtx (V2SFmode);
8fcc1c1f
TB
2218 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2219 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2220
2221 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2222 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
4c871069
JG
2223 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2224 DONE;
2225 }
2226)
2227
43e9d192
IB
2228;; FP Max/Min
2229;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2230;; expression like:
2231;; a = (b < c) ? b : c;
7a335530
IB
2232;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2233;; -fno-signed-zeros are enabled either explicitly or indirectly via
2234;; -ffast-math.
43e9d192
IB
2235;;
2236;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2237;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2238;; operand will be returned when both operands are zero (i.e. they may not
2239;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2240;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2241;; NaNs.
2242
998eaf97 2243(define_insn "<su><maxmin><mode>3"
33d72b63
JW
2244 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2245 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2246 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192 2247 "TARGET_SIMD"
998eaf97 2248 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 2249 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
43e9d192
IB
2250)
2251
1efafef3
TC
2252;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2253;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2254;; which implement the IEEE fmax ()/fmin () functions.
998eaf97 2255(define_insn "<maxmin_uns><mode>3"
33d72b63
JW
2256 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2257 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2258 (match_operand:VHSDF 2 "register_operand" "w")]
2259 FMAXMIN_UNS))]
43e9d192 2260 "TARGET_SIMD"
998eaf97 2261 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 2262 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
43e9d192
IB
2263)
2264
36054fab 2265;; 'across lanes' add.
43e9d192 2266
f5156c3e
AL
2267(define_expand "reduc_plus_scal_<mode>"
2268 [(match_operand:<VEL> 0 "register_operand" "=w")
a844a695 2269 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
f5156c3e
AL
2270 UNSPEC_ADDV)]
2271 "TARGET_SIMD"
2272 {
7ac29c0f 2273 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
f5156c3e
AL
2274 rtx scratch = gen_reg_rtx (<MODE>mode);
2275 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2276 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2277 DONE;
2278 }
2279)
2280
3629030e 2281(define_insn "aarch64_faddp<mode>"
33d72b63
JW
2282 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2283 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2284 (match_operand:VHSDF 2 "register_operand" "w")]
2285 UNSPEC_FADDV))]
3629030e
JW
2286 "TARGET_SIMD"
2287 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 2288 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3629030e
JW
2289)
2290
f5156c3e 2291(define_insn "aarch64_reduc_plus_internal<mode>"
43e9d192
IB
2292 [(set (match_operand:VDQV 0 "register_operand" "=w")
2293 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
f5156c3e 2294 UNSPEC_ADDV))]
43e9d192 2295 "TARGET_SIMD"
92835317 2296 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
78ec3036 2297 [(set_attr "type" "neon_reduc_add<q>")]
43e9d192
IB
2298)
2299
f5156c3e 2300(define_insn "aarch64_reduc_plus_internalv2si"
43e9d192
IB
2301 [(set (match_operand:V2SI 0 "register_operand" "=w")
2302 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
f5156c3e 2303 UNSPEC_ADDV))]
43e9d192
IB
2304 "TARGET_SIMD"
2305 "addp\\t%0.2s, %1.2s, %1.2s"
78ec3036 2306 [(set_attr "type" "neon_reduc_add")]
43e9d192
IB
2307)
2308
628d13d9
JG
2309(define_insn "reduc_plus_scal_<mode>"
2310 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2311 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
92835317 2312 UNSPEC_FADDV))]
43e9d192 2313 "TARGET_SIMD"
36054fab 2314 "faddp\\t%<Vetype>0, %1.<Vtype>"
78ec3036 2315 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
43e9d192
IB
2316)
2317
f5156c3e
AL
2318(define_expand "reduc_plus_scal_v4sf"
2319 [(set (match_operand:SF 0 "register_operand")
36054fab 2320 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
92835317 2321 UNSPEC_FADDV))]
36054fab
JG
2322 "TARGET_SIMD"
2323{
7ac29c0f 2324 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
f5156c3e 2325 rtx scratch = gen_reg_rtx (V4SFmode);
3629030e
JW
2326 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2327 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
f5156c3e 2328 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
36054fab
JG
2329 DONE;
2330})
2331
a5e69cad
FY
2332(define_insn "clrsb<mode>2"
2333 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2334 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2335 "TARGET_SIMD"
2336 "cls\\t%0.<Vtype>, %1.<Vtype>"
2337 [(set_attr "type" "neon_cls<q>")]
2338)
2339
b5574232
VP
2340(define_insn "clz<mode>2"
2341 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2342 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2343 "TARGET_SIMD"
2344 "clz\\t%0.<Vtype>, %1.<Vtype>"
78ec3036 2345 [(set_attr "type" "neon_cls<q>")]
b5574232
VP
2346)
2347
a5e69cad
FY
2348(define_insn "popcount<mode>2"
2349 [(set (match_operand:VB 0 "register_operand" "=w")
2350 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2351 "TARGET_SIMD"
2352 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2353 [(set_attr "type" "neon_cnt<q>")]
2354)
2355
998eaf97
JG
2356;; 'across lanes' max and min ops.
2357
64b0f928 2358;; Template for outputting a scalar, so we can create __builtins which can be
16d24520 2359;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
64b0f928
AL
2360(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2361 [(match_operand:<VEL> 0 "register_operand")
703bbcdf
JW
2362 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2363 FMAXMINV)]
64b0f928
AL
2364 "TARGET_SIMD"
2365 {
7ac29c0f 2366 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
64b0f928
AL
2367 rtx scratch = gen_reg_rtx (<MODE>mode);
2368 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2369 operands[1]));
2370 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2371 DONE;
2372 }
2373)
2374
2375;; Likewise for integer cases, signed and unsigned.
2376(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2377 [(match_operand:<VEL> 0 "register_operand")
2378 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2379 MAXMINV)]
2380 "TARGET_SIMD"
2381 {
7ac29c0f 2382 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
64b0f928
AL
2383 rtx scratch = gen_reg_rtx (<MODE>mode);
2384 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2385 operands[1]));
2386 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2387 DONE;
2388 }
2389)
2390
2391(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
92835317
TB
2392 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2393 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
43e9d192
IB
2394 MAXMINV))]
2395 "TARGET_SIMD"
998eaf97 2396 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
78ec3036 2397 [(set_attr "type" "neon_reduc_minmax<q>")]
43e9d192
IB
2398)
2399
64b0f928 2400(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
43e9d192
IB
2401 [(set (match_operand:V2SI 0 "register_operand" "=w")
2402 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2403 MAXMINV))]
2404 "TARGET_SIMD"
998eaf97 2405 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
78ec3036 2406 [(set_attr "type" "neon_reduc_minmax")]
43e9d192
IB
2407)
2408
64b0f928 2409(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
703bbcdf
JW
2410 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2411 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2412 FMAXMINV))]
998eaf97 2413 "TARGET_SIMD"
64b0f928 2414 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
703bbcdf 2415 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
998eaf97
JG
2416)
2417
09962a4a
JG
2418;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2419;; allocation.
2420;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2421;; to select.
2422;;
2423;; Thus our BSL is of the form:
2424;; op0 = bsl (mask, op2, op3)
4fda1ad1 2425;; We can use any of:
09962a4a
JG
2426;;
2427;; if (op0 = mask)
2428;; bsl mask, op1, op2
2429;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2430;; bit op0, op2, mask
2431;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2432;; bif op0, op1, mask
3297949e
KT
2433;;
2434;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2435;; Some forms of straight-line code may generate the equivalent form
2436;; in *aarch64_simd_bsl<mode>_alt.
4fda1ad1
JG
2437
2438(define_insn "aarch64_simd_bsl<mode>_internal"
aea4b54a
JG
2439 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2440 (xor:VDQ_I
2441 (and:VDQ_I
2442 (xor:VDQ_I
5f565314 2443 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
aea4b54a
JG
2444 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2445 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
5f565314 2446 (match_dup:<V_INT_EQUIV> 3)
09962a4a 2447 ))]
4fda1ad1
JG
2448 "TARGET_SIMD"
2449 "@
2450 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2451 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2452 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
78ec3036 2453 [(set_attr "type" "neon_bsl<q>")]
4fda1ad1
JG
2454)
2455
3297949e
KT
2456;; We need this form in addition to the above pattern to match the case
2457;; when combine tries merging three insns such that the second operand of
2458;; the outer XOR matches the second operand of the inner XOR rather than
2459;; the first. The two are equivalent but since recog doesn't try all
2460;; permutations of commutative operations, we have to have a separate pattern.
2461
2462(define_insn "*aarch64_simd_bsl<mode>_alt"
aea4b54a
JG
2463 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2464 (xor:VDQ_I
2465 (and:VDQ_I
2466 (xor:VDQ_I
2467 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2468 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2469 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2470 (match_dup:<V_INT_EQUIV> 2)))]
3297949e
KT
2471 "TARGET_SIMD"
2472 "@
2473 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2474 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2475 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2476 [(set_attr "type" "neon_bsl<q>")]
2477)
2478
aea4b54a
JG
2479;; DImode is special, we want to avoid computing operations which are
2480;; more naturally computed in general purpose registers in the vector
2481;; registers. If we do that, we need to move all three operands from general
2482;; purpose registers to vector registers, then back again. However, we
2483;; don't want to make this pattern an UNSPEC as we'd lose scope for
2484;; optimizations based on the component operations of a BSL.
2485;;
2486;; That means we need a splitter back to the individual operations, if they
2487;; would be better calculated on the integer side.
2488
2489(define_insn_and_split "aarch64_simd_bsldi_internal"
2490 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2491 (xor:DI
2492 (and:DI
2493 (xor:DI
2494 (match_operand:DI 3 "register_operand" "w,0,w,r")
2495 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2496 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2497 (match_dup:DI 3)
2498 ))]
2499 "TARGET_SIMD"
2500 "@
2501 bsl\\t%0.8b, %2.8b, %3.8b
2502 bit\\t%0.8b, %2.8b, %1.8b
2503 bif\\t%0.8b, %3.8b, %1.8b
2504 #"
6acc5948 2505 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
aea4b54a
JG
2506 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2507{
2508 /* Split back to individual operations. If we're before reload, and
2509 able to create a temporary register, do so. If we're after reload,
2510 we've got an early-clobber destination register, so use that.
2511 Otherwise, we can't create pseudos and we can't yet guarantee that
2512 operands[0] is safe to write, so FAIL to split. */
2513
2514 rtx scratch;
2515 if (reload_completed)
2516 scratch = operands[0];
2517 else if (can_create_pseudo_p ())
2518 scratch = gen_reg_rtx (DImode);
2519 else
2520 FAIL;
2521
2522 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2523 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2524 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2525 DONE;
2526}
2527 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2528 (set_attr "length" "4,4,4,12")]
2529)
2530
2531(define_insn_and_split "aarch64_simd_bsldi_alt"
2532 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2533 (xor:DI
2534 (and:DI
2535 (xor:DI
2536 (match_operand:DI 3 "register_operand" "w,w,0,r")
2537 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2538 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2539 (match_dup:DI 2)
2540 ))]
2541 "TARGET_SIMD"
2542 "@
2543 bsl\\t%0.8b, %3.8b, %2.8b
2544 bit\\t%0.8b, %3.8b, %1.8b
2545 bif\\t%0.8b, %2.8b, %1.8b
2546 #"
6acc5948 2547 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
aea4b54a
JG
2548 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2549{
2550 /* Split back to individual operations. If we're before reload, and
2551 able to create a temporary register, do so. If we're after reload,
2552 we've got an early-clobber destination register, so use that.
2553 Otherwise, we can't create pseudos and we can't yet guarantee that
2554 operands[0] is safe to write, so FAIL to split. */
2555
2556 rtx scratch;
2557 if (reload_completed)
2558 scratch = operands[0];
2559 else if (can_create_pseudo_p ())
2560 scratch = gen_reg_rtx (DImode);
2561 else
2562 FAIL;
2563
2564 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2565 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2566 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2567 DONE;
2568}
2569 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2570 (set_attr "length" "4,4,4,12")]
2571)
2572
4fda1ad1 2573(define_expand "aarch64_simd_bsl<mode>"
46e778c4 2574 [(match_operand:VALLDIF 0 "register_operand")
5f565314 2575 (match_operand:<V_INT_EQUIV> 1 "register_operand")
46e778c4
JG
2576 (match_operand:VALLDIF 2 "register_operand")
2577 (match_operand:VALLDIF 3 "register_operand")]
09962a4a 2578 "TARGET_SIMD"
4fda1ad1
JG
2579{
2580 /* We can't alias operands together if they have different modes. */
c8824f2c
JG
2581 rtx tmp = operands[0];
2582 if (FLOAT_MODE_P (<MODE>mode))
2583 {
5f565314
RS
2584 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2585 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2586 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
c8824f2c 2587 }
5f565314
RS
2588 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2589 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2590 operands[1],
2591 operands[2],
2592 operands[3]));
c8824f2c
JG
2593 if (tmp != operands[0])
2594 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2595
09962a4a 2596 DONE;
4fda1ad1
JG
2597})
2598
5f565314 2599(define_expand "vcond_mask_<mode><v_int_equiv>"
45d569f3
AL
2600 [(match_operand:VALLDI 0 "register_operand")
2601 (match_operand:VALLDI 1 "nonmemory_operand")
2602 (match_operand:VALLDI 2 "nonmemory_operand")
5f565314 2603 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
45d569f3
AL
2604 "TARGET_SIMD"
2605{
2606 /* If we have (a = (P) ? -1 : 0);
2607 Then we can simply move the generated mask (result must be int). */
2608 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2609 && operands[2] == CONST0_RTX (<MODE>mode))
2610 emit_move_insn (operands[0], operands[3]);
2611 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2612 else if (operands[1] == CONST0_RTX (<MODE>mode)
2613 && operands[2] == CONSTM1_RTX (<MODE>mode))
5f565314 2614 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
45d569f3
AL
2615 else
2616 {
2617 if (!REG_P (operands[1]))
2618 operands[1] = force_reg (<MODE>mode, operands[1]);
2619 if (!REG_P (operands[2]))
2620 operands[2] = force_reg (<MODE>mode, operands[2]);
2621 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2622 operands[1], operands[2]));
2623 }
2624
2625 DONE;
2626})
2627
2628;; Patterns comparing two vectors to produce a mask.
2629
2630(define_expand "vec_cmp<mode><mode>"
2631 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2632 (match_operator 1 "comparison_operator"
2633 [(match_operand:VSDQ_I_DI 2 "register_operand")
2634 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2635 "TARGET_SIMD"
2636{
2637 rtx mask = operands[0];
2638 enum rtx_code code = GET_CODE (operands[1]);
2639
2640 switch (code)
2641 {
2642 case NE:
2643 case LE:
2644 case LT:
2645 case GE:
2646 case GT:
2647 case EQ:
2648 if (operands[3] == CONST0_RTX (<MODE>mode))
2649 break;
2650
2651 /* Fall through. */
2652 default:
2653 if (!REG_P (operands[3]))
2654 operands[3] = force_reg (<MODE>mode, operands[3]);
2655
2656 break;
2657 }
2658
2659 switch (code)
2660 {
2661 case LT:
2662 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2663 break;
2664
2665 case GE:
2666 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2667 break;
2668
2669 case LE:
2670 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2671 break;
2672
2673 case GT:
2674 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2675 break;
2676
2677 case LTU:
2678 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2679 break;
2680
2681 case GEU:
2682 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2683 break;
2684
2685 case LEU:
2686 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2687 break;
2688
2689 case GTU:
2690 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2691 break;
2692
2693 case NE:
2694 /* Handle NE as !EQ. */
2695 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
5f565314 2696 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
45d569f3
AL
2697 break;
2698
2699 case EQ:
2700 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2701 break;
2702
2703 default:
2704 gcc_unreachable ();
2705 }
2706
2707 DONE;
2708})
2709
5f565314
RS
2710(define_expand "vec_cmp<mode><v_int_equiv>"
2711 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
45d569f3
AL
2712 (match_operator 1 "comparison_operator"
2713 [(match_operand:VDQF 2 "register_operand")
2714 (match_operand:VDQF 3 "nonmemory_operand")]))]
2715 "TARGET_SIMD"
2716{
2717 int use_zero_form = 0;
2718 enum rtx_code code = GET_CODE (operands[1]);
5f565314 2719 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
45d569f3 2720
bb276776 2721 rtx (*comparison) (rtx, rtx, rtx) = NULL;
45d569f3
AL
2722
2723 switch (code)
2724 {
2725 case LE:
2726 case LT:
2727 case GE:
2728 case GT:
2729 case EQ:
2730 if (operands[3] == CONST0_RTX (<MODE>mode))
2731 {
2732 use_zero_form = 1;
2733 break;
2734 }
2735 /* Fall through. */
2736 default:
2737 if (!REG_P (operands[3]))
2738 operands[3] = force_reg (<MODE>mode, operands[3]);
2739
2740 break;
2741 }
2742
2743 switch (code)
2744 {
2745 case LT:
2746 if (use_zero_form)
2747 {
2748 comparison = gen_aarch64_cmlt<mode>;
2749 break;
2750 }
0be56227 2751 /* Fall through. */
f7d884d4 2752 case UNLT:
45d569f3
AL
2753 std::swap (operands[2], operands[3]);
2754 /* Fall through. */
f7d884d4 2755 case UNGT:
45d569f3
AL
2756 case GT:
2757 comparison = gen_aarch64_cmgt<mode>;
2758 break;
2759 case LE:
2760 if (use_zero_form)
2761 {
2762 comparison = gen_aarch64_cmle<mode>;
2763 break;
2764 }
0be56227 2765 /* Fall through. */
f7d884d4 2766 case UNLE:
45d569f3
AL
2767 std::swap (operands[2], operands[3]);
2768 /* Fall through. */
f7d884d4 2769 case UNGE:
45d569f3
AL
2770 case GE:
2771 comparison = gen_aarch64_cmge<mode>;
2772 break;
2773 case NE:
2774 case EQ:
2775 comparison = gen_aarch64_cmeq<mode>;
2776 break;
2777 case UNEQ:
2778 case ORDERED:
2779 case UNORDERED:
8332c5ee 2780 case LTGT:
45d569f3
AL
2781 break;
2782 default:
2783 gcc_unreachable ();
2784 }
2785
2786 switch (code)
2787 {
2788 case UNGE:
2789 case UNGT:
2790 case UNLE:
2791 case UNLT:
f7d884d4
SD
2792 {
2793 /* All of the above must not raise any FP exceptions. Thus we first
2794 check each operand for NaNs and force any elements containing NaN to
2795 zero before using them in the compare.
2796 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2797 (cm<cc> (isnan (a) ? 0.0 : a,
2798 isnan (b) ? 0.0 : b))
2799 We use the following transformations for doing the comparisions:
2800 a UNGE b -> a GE b
2801 a UNGT b -> a GT b
2802 a UNLE b -> b GE a
2803 a UNLT b -> b GT a. */
2804
2805 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2806 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2807 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2808 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2809 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2810 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2811 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2812 lowpart_subreg (<V_INT_EQUIV>mode,
2813 operands[2],
2814 <MODE>mode)));
2815 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2816 lowpart_subreg (<V_INT_EQUIV>mode,
2817 operands[3],
2818 <MODE>mode)));
2819 gcc_assert (comparison != NULL);
2820 emit_insn (comparison (operands[0],
2821 lowpart_subreg (<MODE>mode,
2822 tmp0, <V_INT_EQUIV>mode),
2823 lowpart_subreg (<MODE>mode,
2824 tmp1, <V_INT_EQUIV>mode)));
2825 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2826 }
45d569f3
AL
2827 break;
2828
2829 case LT:
2830 case LE:
2831 case GT:
2832 case GE:
2833 case EQ:
f7d884d4 2834 case NE:
45d569f3
AL
2835 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2836 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2837 a GE b -> a GE b
2838 a GT b -> a GT b
2839 a LE b -> b GE a
2840 a LT b -> b GT a
f7d884d4
SD
2841 a EQ b -> a EQ b
2842 a NE b -> ~(a EQ b) */
bb276776 2843 gcc_assert (comparison != NULL);
45d569f3 2844 emit_insn (comparison (operands[0], operands[2], operands[3]));
f7d884d4
SD
2845 if (code == NE)
2846 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
45d569f3
AL
2847 break;
2848
8332c5ee
SD
2849 case LTGT:
2850 /* LTGT is not guranteed to not generate a FP exception. So let's
2851 go the faster way : ((a > b) || (b > a)). */
2852 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2853 operands[2], operands[3]));
2854 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2855 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2856 break;
2857
45d569f3 2858 case ORDERED:
f7d884d4
SD
2859 case UNORDERED:
2860 case UNEQ:
2861 /* cmeq (a, a) & cmeq (b, b). */
2862 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2863 operands[2], operands[2]));
2864 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2865 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2866
2867 if (code == UNORDERED)
2868 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2869 else if (code == UNEQ)
2870 {
2871 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2872 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2873 }
45d569f3
AL
2874 break;
2875
2876 default:
2877 gcc_unreachable ();
2878 }
2879
2880 DONE;
2881})
2882
2883(define_expand "vec_cmpu<mode><mode>"
2884 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2885 (match_operator 1 "comparison_operator"
2886 [(match_operand:VSDQ_I_DI 2 "register_operand")
2887 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2888 "TARGET_SIMD"
2889{
2890 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2891 operands[2], operands[3]));
2892 DONE;
2893})
2894
4fda1ad1 2895(define_expand "vcond<mode><mode>"
8b5190ab
AL
2896 [(set (match_operand:VALLDI 0 "register_operand")
2897 (if_then_else:VALLDI
4fda1ad1 2898 (match_operator 3 "comparison_operator"
8b5190ab
AL
2899 [(match_operand:VALLDI 4 "register_operand")
2900 (match_operand:VALLDI 5 "nonmemory_operand")])
2901 (match_operand:VALLDI 1 "nonmemory_operand")
2902 (match_operand:VALLDI 2 "nonmemory_operand")))]
4fda1ad1
JG
2903 "TARGET_SIMD"
2904{
5f565314 2905 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3c556bc4 2906 enum rtx_code code = GET_CODE (operands[3]);
6c553b76 2907
3c556bc4
BC
2908 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2909 it as well as switch operands 1/2 in order to avoid the additional
2910 NOT instruction. */
2911 if (code == NE)
2912 {
2913 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2914 operands[4], operands[5]);
2915 std::swap (operands[1], operands[2]);
2916 }
5f565314
RS
2917 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2918 operands[4], operands[5]));
2919 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2920 operands[2], mask));
6c553b76 2921
4fda1ad1
JG
2922 DONE;
2923})
2924
6c553b76
BC
2925(define_expand "vcond<v_cmp_mixed><mode>"
2926 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2927 (if_then_else:<V_cmp_mixed>
7c19979f 2928 (match_operator 3 "comparison_operator"
6c553b76
BC
2929 [(match_operand:VDQF_COND 4 "register_operand")
2930 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2931 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2932 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
7c19979f
JG
2933 "TARGET_SIMD"
2934{
5f565314 2935 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3c556bc4 2936 enum rtx_code code = GET_CODE (operands[3]);
6c553b76 2937
3c556bc4
BC
2938 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2939 it as well as switch operands 1/2 in order to avoid the additional
2940 NOT instruction. */
2941 if (code == NE)
2942 {
2943 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2944 operands[4], operands[5]);
2945 std::swap (operands[1], operands[2]);
2946 }
5f565314
RS
2947 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2948 operands[4], operands[5]));
2949 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
7c19979f 2950 operands[0], operands[1],
6c553b76
BC
2951 operands[2], mask));
2952
7c19979f
JG
2953 DONE;
2954})
4fda1ad1
JG
2955
2956(define_expand "vcondu<mode><mode>"
8b5190ab
AL
2957 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2958 (if_then_else:VSDQ_I_DI
4fda1ad1 2959 (match_operator 3 "comparison_operator"
8b5190ab
AL
2960 [(match_operand:VSDQ_I_DI 4 "register_operand")
2961 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2962 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2963 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4fda1ad1
JG
2964 "TARGET_SIMD"
2965{
6c553b76 2966 rtx mask = gen_reg_rtx (<MODE>mode);
3c556bc4 2967 enum rtx_code code = GET_CODE (operands[3]);
6c553b76 2968
3c556bc4
BC
2969 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2970 it as well as switch operands 1/2 in order to avoid the additional
2971 NOT instruction. */
2972 if (code == NE)
2973 {
2974 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2975 operands[4], operands[5]);
2976 std::swap (operands[1], operands[2]);
2977 }
6c553b76
BC
2978 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2979 operands[4], operands[5]));
5f565314
RS
2980 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2981 operands[2], mask));
6c553b76
BC
2982 DONE;
2983})
2984
2985(define_expand "vcondu<mode><v_cmp_mixed>"
2986 [(set (match_operand:VDQF 0 "register_operand")
2987 (if_then_else:VDQF
2988 (match_operator 3 "comparison_operator"
2989 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2990 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2991 (match_operand:VDQF 1 "nonmemory_operand")
2992 (match_operand:VDQF 2 "nonmemory_operand")))]
2993 "TARGET_SIMD"
2994{
5f565314 2995 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3c556bc4 2996 enum rtx_code code = GET_CODE (operands[3]);
6c553b76 2997
3c556bc4
BC
2998 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2999 it as well as switch operands 1/2 in order to avoid the additional
3000 NOT instruction. */
3001 if (code == NE)
3002 {
3003 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3004 operands[4], operands[5]);
3005 std::swap (operands[1], operands[2]);
3006 }
6c553b76
BC
3007 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3008 mask, operands[3],
3009 operands[4], operands[5]));
5f565314
RS
3010 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3011 operands[2], mask));
4fda1ad1
JG
3012 DONE;
3013})
3014
43e9d192
IB
3015;; Patterns for AArch64 SIMD Intrinsics.
3016
66adb8eb
JG
3017;; Lane extraction with sign extension to general purpose register.
3018(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3019 [(set (match_operand:GPI 0 "register_operand" "=r")
3020 (sign_extend:GPI
43e9d192 3021 (vec_select:<VEL>
66adb8eb 3022 (match_operand:VDQQH 1 "register_operand" "w")
43e9d192
IB
3023 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3024 "TARGET_SIMD"
e58bf20a 3025 {
7ac29c0f 3026 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
e58bf20a
TB
3027 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3028 }
00d29b97
ST
3029 [(set_attr "type" "neon_to_gp<q>")]\r
3030)\r
3031\r
3032(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"\r
3033 [(set (match_operand:GPI 0 "register_operand" "=r")\r
3034 (zero_extend:GPI\r
3035 (vec_select:<VEL>\r
3036 (match_operand:VDQQH 1 "register_operand" "w")\r
3037 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]\r
3038 "TARGET_SIMD"\r
3039 {\r
3040 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,\r
3041 INTVAL (operands[2]));\r
3042 return "umov\\t%w0, %1.<Vetype>[%2]";\r
3043 }\r
3044 [(set_attr "type" "neon_to_gp<q>")]\r
43e9d192
IB
3045)
3046
66adb8eb
JG
3047;; Lane extraction of a value, neither sign nor zero extension
3048;; is guaranteed so upper bits should be considered undefined.
ed225d0c 3049;; RTL uses GCC vector extension indices throughout so flip only for assembly.
43e9d192 3050(define_insn "aarch64_get_lane<mode>"
2eb2847e 3051 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
43e9d192 3052 (vec_select:<VEL>
71a11456 3053 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
e58bf20a 3054 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
43e9d192 3055 "TARGET_SIMD"
e58bf20a 3056 {
7ac29c0f 3057 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
e58bf20a
TB
3058 switch (which_alternative)
3059 {
3060 case 0:
3061 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3062 case 1:
3063 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3064 case 2:
3065 return "st1\\t{%1.<Vetype>}[%2], %0";
3066 default:
3067 gcc_unreachable ();
3068 }
3069 }
3070 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
43e9d192
IB
3071)
3072
40757a25
KT
3073(define_insn "load_pair_lanes<mode>"
3074 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3075 (vec_concat:<VDBL>
3076 (match_operand:VDC 1 "memory_operand" "Utq")
3077 (match_operand:VDC 2 "memory_operand" "m")))]
3078 "TARGET_SIMD && !STRICT_ALIGNMENT
3079 && rtx_equal_p (XEXP (operands[2], 0),
3080 plus_constant (Pmode,
3081 XEXP (operands[1], 0),
3082 GET_MODE_SIZE (<MODE>mode)))"
3083 "ldr\\t%q0, %1"
3084 [(set_attr "type" "neon_load1_1reg_q")]
3085)
3086
7692ce17 3087(define_insn "store_pair_lanes<mode>"
a25831ac 3088 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
7692ce17
KT
3089 (vec_concat:<VDBL>
3090 (match_operand:VDC 1 "register_operand" "w, r")
3091 (match_operand:VDC 2 "register_operand" "w, r")))]
3092 "TARGET_SIMD"
3093 "@
e69a816d
WD
3094 stp\\t%d1, %d2, %y0
3095 stp\\t%x1, %x2, %y0"
7692ce17
KT
3096 [(set_attr "type" "neon_stp, store_16")]
3097)
3098
43e9d192
IB
3099;; In this insn, operand 1 should be low, and operand 2 the high part of the
3100;; dest vector.
3101
3102(define_insn "*aarch64_combinez<mode>"
c0233c78 3103 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
6432f025
KT
3104 (vec_concat:<VDBL>
3105 (match_operand:VDC 1 "general_operand" "w,?r,m")
3106 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
5a908485 3107 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
c0233c78
JG
3108 "@
3109 mov\\t%0.8b, %1.8b
3110 fmov\t%d0, %1
3111 ldr\\t%d0, %1"
3112 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
488461d8 3113 (set_attr "arch" "simd,fp,simd")]
5a908485
JG
3114)
3115
3116(define_insn "*aarch64_combinez_be<mode>"
c0233c78 3117 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
5a908485 3118 (vec_concat:<VDBL>
6432f025
KT
3119 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3120 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
5a908485 3121 "TARGET_SIMD && BYTES_BIG_ENDIAN"
c0233c78
JG
3122 "@
3123 mov\\t%0.8b, %1.8b
3124 fmov\t%d0, %1
3125 ldr\\t%d0, %1"
3126 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
488461d8 3127 (set_attr "arch" "simd,fp,simd")]
43e9d192
IB
3128)
3129
5a908485
JG
3130(define_expand "aarch64_combine<mode>"
3131 [(match_operand:<VDBL> 0 "register_operand")
3132 (match_operand:VDC 1 "register_operand")
3133 (match_operand:VDC 2 "register_operand")]
3134 "TARGET_SIMD"
3135{
a977dc0c 3136 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
5a908485 3137
8b033a8a 3138 DONE;
0f686aa9 3139}
0f686aa9 3140)
8b033a8a 3141
0016d8d9 3142(define_expand "@aarch64_simd_combine<mode>"
5a908485
JG
3143 [(match_operand:<VDBL> 0 "register_operand")
3144 (match_operand:VDC 1 "register_operand")
3145 (match_operand:VDC 2 "register_operand")]
8b033a8a
SN
3146 "TARGET_SIMD"
3147 {
3148 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3149 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3150 DONE;
0f686aa9
JG
3151 }
3152[(set_attr "type" "multiple")]
3153)
43e9d192
IB
3154
3155;; <su><addsub>l<q>.
3156
8da00d65 3157(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
43e9d192
IB
3158 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3159 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3160 (match_operand:VQW 1 "register_operand" "w")
3161 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3162 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3163 (match_operand:VQW 2 "register_operand" "w")
3164 (match_dup 3)))))]
3165 "TARGET_SIMD"
8da00d65 3166 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 3167 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
43e9d192
IB
3168)
3169
8da00d65
VP
3170(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3171 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3172 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3173 (match_operand:VQW 1 "register_operand" "w")
3174 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3175 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3176 (match_operand:VQW 2 "register_operand" "w")
3177 (match_dup 3)))))]
3178 "TARGET_SIMD"
3179 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
78ec3036 3180 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
8da00d65
VP
3181)
3182
3183
43e9d192
IB
3184(define_expand "aarch64_saddl2<mode>"
3185 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186 (match_operand:VQW 1 "register_operand" "w")
3187 (match_operand:VQW 2 "register_operand" "w")]
3188 "TARGET_SIMD"
3189{
f5cbabc1 3190 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
8da00d65
VP
3191 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3192 operands[2], p));
43e9d192
IB
3193 DONE;
3194})
3195
3196(define_expand "aarch64_uaddl2<mode>"
3197 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198 (match_operand:VQW 1 "register_operand" "w")
3199 (match_operand:VQW 2 "register_operand" "w")]
3200 "TARGET_SIMD"
3201{
f5cbabc1 3202 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
8da00d65
VP
3203 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3204 operands[2], p));
43e9d192
IB
3205 DONE;
3206})
3207
3208(define_expand "aarch64_ssubl2<mode>"
3209 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3210 (match_operand:VQW 1 "register_operand" "w")
3211 (match_operand:VQW 2 "register_operand" "w")]
3212 "TARGET_SIMD"
3213{
f5cbabc1 3214 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
8da00d65 3215 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
43e9d192
IB
3216 operands[2], p));
3217 DONE;
3218})
3219
3220(define_expand "aarch64_usubl2<mode>"
3221 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3222 (match_operand:VQW 1 "register_operand" "w")
3223 (match_operand:VQW 2 "register_operand" "w")]
3224 "TARGET_SIMD"
3225{
f5cbabc1 3226 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
8da00d65 3227 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
43e9d192
IB
3228 operands[2], p));
3229 DONE;
3230})
3231
3232(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3234 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
a844a695 3235 (match_operand:VD_BHSI 1 "register_operand" "w"))
43e9d192 3236 (ANY_EXTEND:<VWIDE>
a844a695 3237 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
43e9d192 3238 "TARGET_SIMD"
130ee2eb 3239 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 3240 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
43e9d192
IB
3241)
3242
3243;; <su><addsub>w<q>.
3244
b1b49824
MC
3245(define_expand "widen_ssum<mode>3"
3246 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3247 (plus:<VDBLW> (sign_extend:<VDBLW>
3248 (match_operand:VQW 1 "register_operand" ""))
3249 (match_operand:<VDBLW> 2 "register_operand" "")))]
3250 "TARGET_SIMD"
3251 {
f5cbabc1 3252 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
b1b49824
MC
3253 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3254
3255 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3256 operands[1], p));
3257 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3258 DONE;
3259 }
3260)
3261
3262(define_expand "widen_ssum<mode>3"
3263 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3264 (plus:<VWIDE> (sign_extend:<VWIDE>
3265 (match_operand:VD_BHSI 1 "register_operand" ""))
3266 (match_operand:<VWIDE> 2 "register_operand" "")))]
3267 "TARGET_SIMD"
3268{
3269 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3270 DONE;
3271})
3272
3273(define_expand "widen_usum<mode>3"
3274 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3275 (plus:<VDBLW> (zero_extend:<VDBLW>
3276 (match_operand:VQW 1 "register_operand" ""))
3277 (match_operand:<VDBLW> 2 "register_operand" "")))]
3278 "TARGET_SIMD"
3279 {
f5cbabc1 3280 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
b1b49824
MC
3281 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3282
3283 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3284 operands[1], p));
3285 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3286 DONE;
3287 }
3288)
3289
3290(define_expand "widen_usum<mode>3"
3291 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3292 (plus:<VWIDE> (zero_extend:<VWIDE>
3293 (match_operand:VD_BHSI 1 "register_operand" ""))
3294 (match_operand:<VWIDE> 2 "register_operand" "")))]
3295 "TARGET_SIMD"
3296{
3297 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3298 DONE;
3299})
3300
8da03df5 3301(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
43e9d192 3302 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
8da03df5
MM
3303 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3304 (ANY_EXTEND:<VWIDE>
3305 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
43e9d192 3306 "TARGET_SIMD"
8da03df5
MM
3307 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308 [(set_attr "type" "neon_sub_widen")]
43e9d192
IB
3309)
3310
8da03df5 3311(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
b1b49824 3312 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
8da03df5
MM
3313 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3314 (ANY_EXTEND:<VWIDE>
3315 (vec_select:<VHALF>
3316 (match_operand:VQW 2 "register_operand" "w")
3317 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
b1b49824 3318 "TARGET_SIMD"
8da03df5
MM
3319 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3320 [(set_attr "type" "neon_sub_widen")]
b1b49824
MC
3321)
3322
8da03df5 3323(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
43e9d192 3324 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
8da03df5
MM
3325 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3326 (ANY_EXTEND:<VWIDE>
3327 (vec_select:<VHALF>
3328 (match_operand:VQW 2 "register_operand" "w")
3329 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
43e9d192 3330 "TARGET_SIMD"
8da03df5
MM
3331 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3332 [(set_attr "type" "neon_sub_widen")]
3333)
3334
3335(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3336 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3337 (plus:<VWIDE>
3338 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3339 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3340 "TARGET_SIMD"
3341 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3342 [(set_attr "type" "neon_add_widen")]
3343)
3344
3345(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3346 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3347 (plus:<VWIDE>
3348 (ANY_EXTEND:<VWIDE>
3349 (vec_select:<VHALF>
3350 (match_operand:VQW 2 "register_operand" "w")
3351 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3352 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3353 "TARGET_SIMD"
3354 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3355 [(set_attr "type" "neon_add_widen")]
3356)
3357
3358(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3359 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3360 (plus:<VWIDE>
3361 (ANY_EXTEND:<VWIDE>
3362 (vec_select:<VHALF>
3363 (match_operand:VQW 2 "register_operand" "w")
3364 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3365 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3366 "TARGET_SIMD"
3367 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3368 [(set_attr "type" "neon_add_widen")]
43e9d192
IB
3369)
3370
3371(define_expand "aarch64_saddw2<mode>"
3372 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3373 (match_operand:<VWIDE> 1 "register_operand" "w")
3374 (match_operand:VQW 2 "register_operand" "w")]
3375 "TARGET_SIMD"
3376{
f5cbabc1 3377 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
3378 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3379 operands[2], p));
3380 DONE;
3381})
3382
3383(define_expand "aarch64_uaddw2<mode>"
3384 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3385 (match_operand:<VWIDE> 1 "register_operand" "w")
3386 (match_operand:VQW 2 "register_operand" "w")]
3387 "TARGET_SIMD"
3388{
f5cbabc1 3389 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
3390 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3391 operands[2], p));
3392 DONE;
3393})
3394
3395
3396(define_expand "aarch64_ssubw2<mode>"
3397 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3398 (match_operand:<VWIDE> 1 "register_operand" "w")
3399 (match_operand:VQW 2 "register_operand" "w")]
3400 "TARGET_SIMD"
3401{
f5cbabc1 3402 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
3403 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3404 operands[2], p));
3405 DONE;
3406})
3407
3408(define_expand "aarch64_usubw2<mode>"
3409 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3410 (match_operand:<VWIDE> 1 "register_operand" "w")
3411 (match_operand:VQW 2 "register_operand" "w")]
3412 "TARGET_SIMD"
3413{
f5cbabc1 3414 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
3415 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3416 operands[2], p));
3417 DONE;
3418})
3419
3420;; <su><r>h<addsub>.
3421
42addb5a
RS
3422(define_expand "<u>avg<mode>3_floor"
3423 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3424 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3425 (match_operand:VDQ_BHSI 2 "register_operand")]
3426 HADD))]
3427 "TARGET_SIMD"
3428)
3429
3430(define_expand "<u>avg<mode>3_ceil"
3431 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3432 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3433 (match_operand:VDQ_BHSI 2 "register_operand")]
3434 RHADD))]
3435 "TARGET_SIMD"
3436)
3437
43e9d192 3438(define_insn "aarch64_<sur>h<addsub><mode>"
a844a695
AL
3439 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3440 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3441 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
43e9d192
IB
3442 HADDSUB))]
3443 "TARGET_SIMD"
3444 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 3445 [(set_attr "type" "neon_<addsub>_halve<q>")]
43e9d192
IB
3446)
3447
3448;; <r><addsub>hn<q>.
3449
3450(define_insn "aarch64_<sur><addsub>hn<mode>"
3451 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3452 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3453 (match_operand:VQN 2 "register_operand" "w")]
3454 ADDSUBHN))]
3455 "TARGET_SIMD"
3456 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 3457 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
43e9d192
IB
3458)
3459
3460(define_insn "aarch64_<sur><addsub>hn2<mode>"
3461 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3462 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3463 (match_operand:VQN 2 "register_operand" "w")
3464 (match_operand:VQN 3 "register_operand" "w")]
3465 ADDSUBHN2))]
3466 "TARGET_SIMD"
3467 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 3468 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
43e9d192
IB
3469)
3470
3471;; pmul.
3472
3473(define_insn "aarch64_pmul<mode>"
3474 [(set (match_operand:VB 0 "register_operand" "=w")
3475 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3476 (match_operand:VB 2 "register_operand" "w")]
3477 UNSPEC_PMUL))]
3478 "TARGET_SIMD"
3479 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 3480 [(set_attr "type" "neon_mul_<Vetype><q>")]
43e9d192
IB
3481)
3482
496ea87d
BB
3483;; fmulx.
3484
3485(define_insn "aarch64_fmulx<mode>"
68ad28c3
JW
3486 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3487 (unspec:VHSDF_HSDF
3488 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3489 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
33d72b63 3490 UNSPEC_FMULX))]
496ea87d
BB
3491 "TARGET_SIMD"
3492 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
33d72b63 3493 [(set_attr "type" "neon_fp_mul_<stype>")]
496ea87d
BB
3494)
3495
9030a4d3
BB
3496;; vmulxq_lane_f32, and vmulx_laneq_f32
3497
3498(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3499 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3500 (unspec:VDQSF
3501 [(match_operand:VDQSF 1 "register_operand" "w")
3502 (vec_duplicate:VDQSF
3503 (vec_select:<VEL>
3504 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3505 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3506 UNSPEC_FMULX))]
3507 "TARGET_SIMD"
3508 {
7ac29c0f 3509 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
9030a4d3
BB
3510 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3511 }
3512 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3513)
3514
3515;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3516
3517(define_insn "*aarch64_mulx_elt<mode>"
3518 [(set (match_operand:VDQF 0 "register_operand" "=w")
3519 (unspec:VDQF
3520 [(match_operand:VDQF 1 "register_operand" "w")
3521 (vec_duplicate:VDQF
3522 (vec_select:<VEL>
3523 (match_operand:VDQF 2 "register_operand" "w")
3524 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3525 UNSPEC_FMULX))]
3526 "TARGET_SIMD"
3527 {
7ac29c0f 3528 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
9030a4d3
BB
3529 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3530 }
3531 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3532)
3533
ab2e8f01 3534;; vmulxq_lane
9030a4d3 3535
ab2e8f01
JW
3536(define_insn "*aarch64_mulx_elt_from_dup<mode>"
3537 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3538 (unspec:VHSDF
3539 [(match_operand:VHSDF 1 "register_operand" "w")
3540 (vec_duplicate:VHSDF
6d06971d 3541 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
9030a4d3
BB
3542 UNSPEC_FMULX))]
3543 "TARGET_SIMD"
ab2e8f01
JW
3544 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3545 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
9030a4d3
BB
3546)
3547
3548;; vmulxs_lane_f32, vmulxs_laneq_f32
3549;; vmulxd_lane_f64 == vmulx_lane_f64
3550;; vmulxd_laneq_f64 == vmulx_laneq_f64
3551
3552(define_insn "*aarch64_vgetfmulx<mode>"
3553 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3554 (unspec:<VEL>
3555 [(match_operand:<VEL> 1 "register_operand" "w")
3556 (vec_select:<VEL>
88119b46 3557 (match_operand:VDQF 2 "register_operand" "w")
9030a4d3
BB
3558 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3559 UNSPEC_FMULX))]
3560 "TARGET_SIMD"
3561 {
7ac29c0f 3562 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
9030a4d3
BB
3563 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3564 }
3565 [(set_attr "type" "fmul<Vetype>")]
3566)
43e9d192
IB
3567;; <su>q<addsub>
3568
3569(define_insn "aarch64_<su_optab><optab><mode>"
3570 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3571 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3572 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3573 "TARGET_SIMD"
3574 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 3575 [(set_attr "type" "neon_<optab><q>")]
43e9d192
IB
3576)
3577
3578;; suqadd and usqadd
3579
3580(define_insn "aarch64_<sur>qadd<mode>"
3581 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3582 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3583 (match_operand:VSDQ_I 2 "register_operand" "w")]
3584 USSUQADD))]
3585 "TARGET_SIMD"
3586 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
78ec3036 3587 [(set_attr "type" "neon_qadd<q>")]
43e9d192
IB
3588)
3589
3590;; sqmovun
3591
3592(define_insn "aarch64_sqmovun<mode>"
3593 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3594 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3595 UNSPEC_SQXTUN))]
3596 "TARGET_SIMD"
3597 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
78ec3036 3598 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
57b26d65 3599)
43e9d192
IB
3600
3601;; sqmovn and uqmovn
3602
3603(define_insn "aarch64_<sur>qmovn<mode>"
3604 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3605 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3606 SUQMOVN))]
3607 "TARGET_SIMD"
3608 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
78ec3036 3609 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
57b26d65 3610)
43e9d192
IB
3611
3612;; <su>q<absneg>
3613
3614(define_insn "aarch64_s<optab><mode>"
9551c7ec
AV
3615 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3616 (UNQOPS:VSDQ_I
3617 (match_operand:VSDQ_I 1 "register_operand" "w")))]
43e9d192
IB
3618 "TARGET_SIMD"
3619 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
78ec3036 3620 [(set_attr "type" "neon_<optab><q>")]
43e9d192
IB
3621)
3622
3623;; sq<r>dmulh.
3624
3625(define_insn "aarch64_sq<r>dmulh<mode>"
3626 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3627 (unspec:VSDQ_HSI
3628 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3629 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3630 VQDMULH))]
3631 "TARGET_SIMD"
3632 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 3633 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
43e9d192
IB
3634)
3635
3636;; sq<r>dmulh_lane
3637
2a49c16d 3638(define_insn "aarch64_sq<r>dmulh_lane<mode>"
b7d7d917
TB
3639 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3640 (unspec:VDQHS
3641 [(match_operand:VDQHS 1 "register_operand" "w")
3642 (vec_select:<VEL>
3643 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3644 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3645 VQDMULH))]
3646 "TARGET_SIMD"
3647 "*
7ac29c0f 3648 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
b7d7d917 3649 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
78ec3036 3650 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
b7d7d917
TB
3651)
3652
2a49c16d 3653(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
b7d7d917
TB
3654 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3655 (unspec:VDQHS
3656 [(match_operand:VDQHS 1 "register_operand" "w")
3657 (vec_select:<VEL>
3658 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3659 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3660 VQDMULH))]
3661 "TARGET_SIMD"
3662 "*
7ac29c0f 3663 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
b7d7d917 3664 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
78ec3036 3665 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
b7d7d917
TB
3666)
3667
2a49c16d 3668(define_insn "aarch64_sq<r>dmulh_lane<mode>"
b7d7d917
TB
3669 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3670 (unspec:SD_HSI
3671 [(match_operand:SD_HSI 1 "register_operand" "w")
43e9d192 3672 (vec_select:<VEL>
278821f2 3673 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
43e9d192
IB
3674 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3675 VQDMULH))]
3676 "TARGET_SIMD"
3677 "*
7ac29c0f 3678 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
b7d7d917 3679 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
78ec3036 3680 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
43e9d192
IB
3681)
3682
2a49c16d 3683(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
d2937a2e
KT
3684 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3685 (unspec:SD_HSI
3686 [(match_operand:SD_HSI 1 "register_operand" "w")
3687 (vec_select:<VEL>
3688 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3689 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3690 VQDMULH))]
3691 "TARGET_SIMD"
3692 "*
7ac29c0f 3693 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
d2937a2e
KT
3694 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3695 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3696)
3697
57b26d65
MW
3698;; sqrdml[as]h.
3699
3700(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3701 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3702 (unspec:VSDQ_HSI
3703 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3704 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3705 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3706 SQRDMLH_AS))]
3707 "TARGET_SIMD_RDMA"
3708 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3709 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3710)
3711
3712;; sqrdml[as]h_lane.
3713
3714(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3715 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3716 (unspec:VDQHS
3717 [(match_operand:VDQHS 1 "register_operand" "0")
3718 (match_operand:VDQHS 2 "register_operand" "w")
3719 (vec_select:<VEL>
51b3f077 3720 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
57b26d65
MW
3721 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3722 SQRDMLH_AS))]
3723 "TARGET_SIMD_RDMA"
3724 {
7ac29c0f 3725 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
57b26d65
MW
3726 return
3727 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3728 }
3729 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3730)
3731
3732(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3733 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3734 (unspec:SD_HSI
3735 [(match_operand:SD_HSI 1 "register_operand" "0")
3736 (match_operand:SD_HSI 2 "register_operand" "w")
3737 (vec_select:<VEL>
51b3f077 3738 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
57b26d65
MW
3739 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3740 SQRDMLH_AS))]
3741 "TARGET_SIMD_RDMA"
3742 {
7ac29c0f 3743 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
57b26d65
MW
3744 return
3745 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3746 }
3747 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3748)
3749
3750;; sqrdml[as]h_laneq.
3751
3752(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3753 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3754 (unspec:VDQHS
3755 [(match_operand:VDQHS 1 "register_operand" "0")
3756 (match_operand:VDQHS 2 "register_operand" "w")
3757 (vec_select:<VEL>
51b3f077 3758 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
57b26d65
MW
3759 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3760 SQRDMLH_AS))]
3761 "TARGET_SIMD_RDMA"
3762 {
7ac29c0f 3763 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
57b26d65
MW
3764 return
3765 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3766 }
3767 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3768)
3769
3770(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3771 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3772 (unspec:SD_HSI
3773 [(match_operand:SD_HSI 1 "register_operand" "0")
3774 (match_operand:SD_HSI 2 "register_operand" "w")
3775 (vec_select:<VEL>
51b3f077 3776 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
57b26d65
MW
3777 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3778 SQRDMLH_AS))]
3779 "TARGET_SIMD_RDMA"
3780 {
7ac29c0f 3781 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
57b26d65
MW
3782 return
3783 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3784 }
3785 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3786)
3787
43e9d192
IB
3788;; vqdml[sa]l
3789
3790(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3791 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3792 (SBINQOPS:<VWIDE>
3793 (match_operand:<VWIDE> 1 "register_operand" "0")
3794 (ss_ashift:<VWIDE>
3795 (mult:<VWIDE>
3796 (sign_extend:<VWIDE>
3797 (match_operand:VSD_HSI 2 "register_operand" "w"))
3798 (sign_extend:<VWIDE>
3799 (match_operand:VSD_HSI 3 "register_operand" "w")))
3800 (const_int 1))))]
3801 "TARGET_SIMD"
3802 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
78ec3036 3803 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
43e9d192
IB
3804)
3805
3806;; vqdml[sa]l_lane
3807
2a49c16d 3808(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
43e9d192
IB
3809 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3810 (SBINQOPS:<VWIDE>
3811 (match_operand:<VWIDE> 1 "register_operand" "0")
3812 (ss_ashift:<VWIDE>
3813 (mult:<VWIDE>
3814 (sign_extend:<VWIDE>
3815 (match_operand:VD_HSI 2 "register_operand" "w"))
3816 (sign_extend:<VWIDE>
3817 (vec_duplicate:VD_HSI
3818 (vec_select:<VEL>
278821f2
KT
3819 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3820 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3821 ))
3822 (const_int 1))))]
3823 "TARGET_SIMD"
3824 {
7ac29c0f 3825 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
278821f2
KT
3826 return
3827 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3828 }
3829 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3830)
3831
2a49c16d 3832(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
278821f2
KT
3833 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3834 (SBINQOPS:<VWIDE>
3835 (match_operand:<VWIDE> 1 "register_operand" "0")
3836 (ss_ashift:<VWIDE>
3837 (mult:<VWIDE>
3838 (sign_extend:<VWIDE>
3839 (match_operand:VD_HSI 2 "register_operand" "w"))
3840 (sign_extend:<VWIDE>
3841 (vec_duplicate:VD_HSI
3842 (vec_select:<VEL>
3843 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
43e9d192
IB
3844 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3845 ))
3846 (const_int 1))))]
3847 "TARGET_SIMD"
1dd055a2 3848 {
7ac29c0f 3849 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
1dd055a2
JG
3850 return
3851 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3852 }
78ec3036 3853 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
3854)
3855
2a49c16d 3856(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
43e9d192
IB
3857 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3858 (SBINQOPS:<VWIDE>
3859 (match_operand:<VWIDE> 1 "register_operand" "0")
3860 (ss_ashift:<VWIDE>
3861 (mult:<VWIDE>
3862 (sign_extend:<VWIDE>
3863 (match_operand:SD_HSI 2 "register_operand" "w"))
3864 (sign_extend:<VWIDE>
3865 (vec_select:<VEL>
278821f2
KT
3866 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3867 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3868 )
3869 (const_int 1))))]
3870 "TARGET_SIMD"
3871 {
7ac29c0f 3872 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
278821f2
KT
3873 return
3874 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3875 }
3876 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3877)
3878
2a49c16d 3879(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
278821f2
KT
3880 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3881 (SBINQOPS:<VWIDE>
3882 (match_operand:<VWIDE> 1 "register_operand" "0")
3883 (ss_ashift:<VWIDE>
3884 (mult:<VWIDE>
3885 (sign_extend:<VWIDE>
3886 (match_operand:SD_HSI 2 "register_operand" "w"))
3887 (sign_extend:<VWIDE>
3888 (vec_select:<VEL>
3889 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
43e9d192
IB
3890 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3891 )
3892 (const_int 1))))]
3893 "TARGET_SIMD"
1dd055a2 3894 {
7ac29c0f 3895 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
1dd055a2
JG
3896 return
3897 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3898 }
78ec3036 3899 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
3900)
3901
43e9d192
IB
3902;; vqdml[sa]l_n
3903
3904(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3905 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3906 (SBINQOPS:<VWIDE>
3907 (match_operand:<VWIDE> 1 "register_operand" "0")
3908 (ss_ashift:<VWIDE>
3909 (mult:<VWIDE>
3910 (sign_extend:<VWIDE>
3911 (match_operand:VD_HSI 2 "register_operand" "w"))
3912 (sign_extend:<VWIDE>
3913 (vec_duplicate:VD_HSI
1c83b673 3914 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
43e9d192
IB
3915 (const_int 1))))]
3916 "TARGET_SIMD"
3917 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
78ec3036 3918 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
3919)
3920
3921;; sqdml[as]l2
3922
3923(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3924 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3925 (SBINQOPS:<VWIDE>
3926 (match_operand:<VWIDE> 1 "register_operand" "0")
3927 (ss_ashift:<VWIDE>
3928 (mult:<VWIDE>
3929 (sign_extend:<VWIDE>
3930 (vec_select:<VHALF>
3931 (match_operand:VQ_HSI 2 "register_operand" "w")
3932 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3933 (sign_extend:<VWIDE>
3934 (vec_select:<VHALF>
3935 (match_operand:VQ_HSI 3 "register_operand" "w")
3936 (match_dup 4))))
3937 (const_int 1))))]
3938 "TARGET_SIMD"
3939 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
78ec3036 3940 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
3941)
3942
3943(define_expand "aarch64_sqdmlal2<mode>"
3944 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3945 (match_operand:<VWIDE> 1 "register_operand" "w")
3946 (match_operand:VQ_HSI 2 "register_operand" "w")
3947 (match_operand:VQ_HSI 3 "register_operand" "w")]
3948 "TARGET_SIMD"
3949{
f5cbabc1 3950 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
3951 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3952 operands[2], operands[3], p));
3953 DONE;
3954})
3955
3956(define_expand "aarch64_sqdmlsl2<mode>"
3957 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3958 (match_operand:<VWIDE> 1 "register_operand" "w")
3959 (match_operand:VQ_HSI 2 "register_operand" "w")
3960 (match_operand:VQ_HSI 3 "register_operand" "w")]
3961 "TARGET_SIMD"
3962{
f5cbabc1 3963 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
3964 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3965 operands[2], operands[3], p));
3966 DONE;
3967})
3968
3969;; vqdml[sa]l2_lane
3970
3971(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3972 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3973 (SBINQOPS:<VWIDE>
3974 (match_operand:<VWIDE> 1 "register_operand" "0")
3975 (ss_ashift:<VWIDE>
3976 (mult:<VWIDE>
3977 (sign_extend:<VWIDE>
3978 (vec_select:<VHALF>
3979 (match_operand:VQ_HSI 2 "register_operand" "w")
3980 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3981 (sign_extend:<VWIDE>
3982 (vec_duplicate:<VHALF>
3983 (vec_select:<VEL>
278821f2
KT
3984 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3985 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3986 ))))
3987 (const_int 1))))]
3988 "TARGET_SIMD"
3989 {
7ac29c0f 3990 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
278821f2
KT
3991 return
3992 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3993 }
3994 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3995)
3996
3997(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3998 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3999 (SBINQOPS:<VWIDE>
4000 (match_operand:<VWIDE> 1 "register_operand" "0")
4001 (ss_ashift:<VWIDE>
4002 (mult:<VWIDE>
4003 (sign_extend:<VWIDE>
4004 (vec_select:<VHALF>
4005 (match_operand:VQ_HSI 2 "register_operand" "w")
4006 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4007 (sign_extend:<VWIDE>
4008 (vec_duplicate:<VHALF>
4009 (vec_select:<VEL>
4010 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
43e9d192
IB
4011 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4012 ))))
4013 (const_int 1))))]
4014 "TARGET_SIMD"
1dd055a2 4015 {
7ac29c0f 4016 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
1dd055a2
JG
4017 return
4018 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4019 }
78ec3036 4020 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
4021)
4022
4023(define_expand "aarch64_sqdmlal2_lane<mode>"
4024 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4025 (match_operand:<VWIDE> 1 "register_operand" "w")
4026 (match_operand:VQ_HSI 2 "register_operand" "w")
278821f2 4027 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
43e9d192
IB
4028 (match_operand:SI 4 "immediate_operand" "i")]
4029 "TARGET_SIMD"
4030{
f5cbabc1 4031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4032 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4033 operands[2], operands[3],
4034 operands[4], p));
4035 DONE;
4036})
4037
4038(define_expand "aarch64_sqdmlal2_laneq<mode>"
4039 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4040 (match_operand:<VWIDE> 1 "register_operand" "w")
4041 (match_operand:VQ_HSI 2 "register_operand" "w")
278821f2 4042 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
43e9d192
IB
4043 (match_operand:SI 4 "immediate_operand" "i")]
4044 "TARGET_SIMD"
4045{
f5cbabc1 4046 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
278821f2 4047 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
43e9d192
IB
4048 operands[2], operands[3],
4049 operands[4], p));
4050 DONE;
4051})
4052
4053(define_expand "aarch64_sqdmlsl2_lane<mode>"
4054 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4055 (match_operand:<VWIDE> 1 "register_operand" "w")
4056 (match_operand:VQ_HSI 2 "register_operand" "w")
278821f2 4057 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
43e9d192
IB
4058 (match_operand:SI 4 "immediate_operand" "i")]
4059 "TARGET_SIMD"
4060{
f5cbabc1 4061 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4062 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4063 operands[2], operands[3],
4064 operands[4], p));
4065 DONE;
4066})
4067
4068(define_expand "aarch64_sqdmlsl2_laneq<mode>"
4069 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4070 (match_operand:<VWIDE> 1 "register_operand" "w")
4071 (match_operand:VQ_HSI 2 "register_operand" "w")
278821f2 4072 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
43e9d192
IB
4073 (match_operand:SI 4 "immediate_operand" "i")]
4074 "TARGET_SIMD"
4075{
f5cbabc1 4076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
278821f2 4077 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
43e9d192
IB
4078 operands[2], operands[3],
4079 operands[4], p));
4080 DONE;
4081})
4082
4083(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4084 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4085 (SBINQOPS:<VWIDE>
4086 (match_operand:<VWIDE> 1 "register_operand" "0")
4087 (ss_ashift:<VWIDE>
4088 (mult:<VWIDE>
4089 (sign_extend:<VWIDE>
4090 (vec_select:<VHALF>
4091 (match_operand:VQ_HSI 2 "register_operand" "w")
4092 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4093 (sign_extend:<VWIDE>
4094 (vec_duplicate:<VHALF>
1c83b673 4095 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
43e9d192
IB
4096 (const_int 1))))]
4097 "TARGET_SIMD"
4098 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
78ec3036 4099 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
4100)
4101
4102(define_expand "aarch64_sqdmlal2_n<mode>"
4103 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4104 (match_operand:<VWIDE> 1 "register_operand" "w")
4105 (match_operand:VQ_HSI 2 "register_operand" "w")
4106 (match_operand:<VEL> 3 "register_operand" "w")]
4107 "TARGET_SIMD"
4108{
f5cbabc1 4109 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4110 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4111 operands[2], operands[3],
4112 p));
4113 DONE;
4114})
4115
4116(define_expand "aarch64_sqdmlsl2_n<mode>"
4117 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4118 (match_operand:<VWIDE> 1 "register_operand" "w")
4119 (match_operand:VQ_HSI 2 "register_operand" "w")
4120 (match_operand:<VEL> 3 "register_operand" "w")]
4121 "TARGET_SIMD"
4122{
f5cbabc1 4123 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4124 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4125 operands[2], operands[3],
4126 p));
4127 DONE;
4128})
4129
4130;; vqdmull
4131
4132(define_insn "aarch64_sqdmull<mode>"
4133 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4134 (ss_ashift:<VWIDE>
4135 (mult:<VWIDE>
4136 (sign_extend:<VWIDE>
4137 (match_operand:VSD_HSI 1 "register_operand" "w"))
4138 (sign_extend:<VWIDE>
4139 (match_operand:VSD_HSI 2 "register_operand" "w")))
4140 (const_int 1)))]
4141 "TARGET_SIMD"
4142 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 4143 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
43e9d192
IB
4144)
4145
4146;; vqdmull_lane
4147
2a49c16d 4148(define_insn "aarch64_sqdmull_lane<mode>"
43e9d192
IB
4149 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4150 (ss_ashift:<VWIDE>
4151 (mult:<VWIDE>
4152 (sign_extend:<VWIDE>
4153 (match_operand:VD_HSI 1 "register_operand" "w"))
4154 (sign_extend:<VWIDE>
4155 (vec_duplicate:VD_HSI
4156 (vec_select:<VEL>
278821f2
KT
4157 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4158 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4159 ))
4160 (const_int 1)))]
4161 "TARGET_SIMD"
4162 {
7ac29c0f 4163 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
278821f2
KT
4164 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4165 }
4166 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4167)
4168
2a49c16d 4169(define_insn "aarch64_sqdmull_laneq<mode>"
278821f2
KT
4170 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4171 (ss_ashift:<VWIDE>
4172 (mult:<VWIDE>
4173 (sign_extend:<VWIDE>
4174 (match_operand:VD_HSI 1 "register_operand" "w"))
4175 (sign_extend:<VWIDE>
4176 (vec_duplicate:VD_HSI
4177 (vec_select:<VEL>
4178 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
43e9d192
IB
4179 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4180 ))
4181 (const_int 1)))]
4182 "TARGET_SIMD"
1dd055a2 4183 {
7ac29c0f 4184 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
1dd055a2
JG
4185 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4186 }
78ec3036 4187 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
4188)
4189
2a49c16d 4190(define_insn "aarch64_sqdmull_lane<mode>"
43e9d192
IB
4191 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4192 (ss_ashift:<VWIDE>
4193 (mult:<VWIDE>
4194 (sign_extend:<VWIDE>
4195 (match_operand:SD_HSI 1 "register_operand" "w"))
4196 (sign_extend:<VWIDE>
4197 (vec_select:<VEL>
278821f2
KT
4198 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4199 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4200 ))
4201 (const_int 1)))]
4202 "TARGET_SIMD"
4203 {
7ac29c0f 4204 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
278821f2
KT
4205 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4206 }
4207 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4208)
4209
2a49c16d 4210(define_insn "aarch64_sqdmull_laneq<mode>"
278821f2
KT
4211 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4212 (ss_ashift:<VWIDE>
4213 (mult:<VWIDE>
4214 (sign_extend:<VWIDE>
4215 (match_operand:SD_HSI 1 "register_operand" "w"))
4216 (sign_extend:<VWIDE>
4217 (vec_select:<VEL>
4218 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
43e9d192
IB
4219 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4220 ))
4221 (const_int 1)))]
4222 "TARGET_SIMD"
1dd055a2 4223 {
7ac29c0f 4224 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
1dd055a2
JG
4225 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4226 }
78ec3036 4227 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
4228)
4229
43e9d192
IB
4230;; vqdmull_n
4231
4232(define_insn "aarch64_sqdmull_n<mode>"
4233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234 (ss_ashift:<VWIDE>
4235 (mult:<VWIDE>
4236 (sign_extend:<VWIDE>
4237 (match_operand:VD_HSI 1 "register_operand" "w"))
4238 (sign_extend:<VWIDE>
4239 (vec_duplicate:VD_HSI
1c83b673 4240 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
43e9d192
IB
4241 )
4242 (const_int 1)))]
4243 "TARGET_SIMD"
4244 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
78ec3036 4245 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
4246)
4247
4248;; vqdmull2
4249
4250
4251
4252(define_insn "aarch64_sqdmull2<mode>_internal"
4253 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4254 (ss_ashift:<VWIDE>
4255 (mult:<VWIDE>
4256 (sign_extend:<VWIDE>
4257 (vec_select:<VHALF>
4258 (match_operand:VQ_HSI 1 "register_operand" "w")
4259 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4260 (sign_extend:<VWIDE>
4261 (vec_select:<VHALF>
4262 (match_operand:VQ_HSI 2 "register_operand" "w")
4263 (match_dup 3)))
4264 )
4265 (const_int 1)))]
4266 "TARGET_SIMD"
4267 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 4268 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
4269)
4270
4271(define_expand "aarch64_sqdmull2<mode>"
4272 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4273 (match_operand:VQ_HSI 1 "register_operand" "w")
278821f2 4274 (match_operand:VQ_HSI 2 "register_operand" "w")]
43e9d192
IB
4275 "TARGET_SIMD"
4276{
f5cbabc1 4277 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4278 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4279 operands[2], p));
4280 DONE;
4281})
4282
4283;; vqdmull2_lane
4284
4285(define_insn "aarch64_sqdmull2_lane<mode>_internal"
4286 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4287 (ss_ashift:<VWIDE>
4288 (mult:<VWIDE>
4289 (sign_extend:<VWIDE>
4290 (vec_select:<VHALF>
4291 (match_operand:VQ_HSI 1 "register_operand" "w")
4292 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4293 (sign_extend:<VWIDE>
4294 (vec_duplicate:<VHALF>
4295 (vec_select:<VEL>
278821f2
KT
4296 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4297 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4298 ))
4299 (const_int 1)))]
4300 "TARGET_SIMD"
4301 {
7ac29c0f 4302 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
278821f2
KT
4303 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4304 }
4305 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4306)
4307
4308(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4309 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4310 (ss_ashift:<VWIDE>
4311 (mult:<VWIDE>
4312 (sign_extend:<VWIDE>
4313 (vec_select:<VHALF>
4314 (match_operand:VQ_HSI 1 "register_operand" "w")
4315 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4316 (sign_extend:<VWIDE>
4317 (vec_duplicate:<VHALF>
4318 (vec_select:<VEL>
4319 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
43e9d192
IB
4320 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4321 ))
4322 (const_int 1)))]
4323 "TARGET_SIMD"
1dd055a2 4324 {
7ac29c0f 4325 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
1dd055a2
JG
4326 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4327 }
78ec3036 4328 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
4329)
4330
4331(define_expand "aarch64_sqdmull2_lane<mode>"
4332 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4333 (match_operand:VQ_HSI 1 "register_operand" "w")
278821f2 4334 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
43e9d192
IB
4335 (match_operand:SI 3 "immediate_operand" "i")]
4336 "TARGET_SIMD"
4337{
f5cbabc1 4338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4339 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4340 operands[2], operands[3],
4341 p));
4342 DONE;
4343})
4344
4345(define_expand "aarch64_sqdmull2_laneq<mode>"
4346 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4347 (match_operand:VQ_HSI 1 "register_operand" "w")
278821f2 4348 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
43e9d192
IB
4349 (match_operand:SI 3 "immediate_operand" "i")]
4350 "TARGET_SIMD"
4351{
f5cbabc1 4352 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
278821f2 4353 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
43e9d192
IB
4354 operands[2], operands[3],
4355 p));
4356 DONE;
4357})
4358
4359;; vqdmull2_n
4360
4361(define_insn "aarch64_sqdmull2_n<mode>_internal"
4362 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4363 (ss_ashift:<VWIDE>
4364 (mult:<VWIDE>
4365 (sign_extend:<VWIDE>
4366 (vec_select:<VHALF>
4367 (match_operand:VQ_HSI 1 "register_operand" "w")
4368 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4369 (sign_extend:<VWIDE>
4370 (vec_duplicate:<VHALF>
1c83b673 4371 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
43e9d192
IB
4372 )
4373 (const_int 1)))]
4374 "TARGET_SIMD"
4375 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
78ec3036 4376 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
4377)
4378
4379(define_expand "aarch64_sqdmull2_n<mode>"
4380 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4381 (match_operand:VQ_HSI 1 "register_operand" "w")
4382 (match_operand:<VEL> 2 "register_operand" "w")]
4383 "TARGET_SIMD"
4384{
f5cbabc1 4385 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4386 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4387 operands[2], p));
4388 DONE;
4389})
4390
4391;; vshl
4392
4393(define_insn "aarch64_<sur>shl<mode>"
4394 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4395 (unspec:VSDQ_I_DI
4396 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4397 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4398 VSHL))]
4399 "TARGET_SIMD"
4400 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
78ec3036 4401 [(set_attr "type" "neon_shift_reg<q>")]
43e9d192
IB
4402)
4403
4404
4405;; vqshl
4406
4407(define_insn "aarch64_<sur>q<r>shl<mode>"
4408 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4409 (unspec:VSDQ_I
4410 [(match_operand:VSDQ_I 1 "register_operand" "w")
4411 (match_operand:VSDQ_I 2 "register_operand" "w")]
4412 VQSHL))]
4413 "TARGET_SIMD"
4414 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
78ec3036 4415 [(set_attr "type" "neon_sat_shift_reg<q>")]
43e9d192
IB
4416)
4417
43e9d192
IB
4418;; vshll_n
4419
4420(define_insn "aarch64_<sur>shll_n<mode>"
4421 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
a844a695 4422 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
cb23a30c
JG
4423 (match_operand:SI 2
4424 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
43e9d192
IB
4425 VSHLL))]
4426 "TARGET_SIMD"
43e9d192 4427 {
10c619de
KT
4428 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4429 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4430 else
4431 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
43e9d192 4432 }
78ec3036 4433 [(set_attr "type" "neon_shift_imm_long")]
43e9d192
IB
4434)
4435
4436;; vshll_high_n
4437
4438(define_insn "aarch64_<sur>shll2_n<mode>"
4439 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4440 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4441 (match_operand:SI 2 "immediate_operand" "i")]
4442 VSHLL))]
4443 "TARGET_SIMD"
43e9d192 4444 {
10c619de
KT
4445 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4446 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4447 else
4448 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
43e9d192 4449 }
78ec3036 4450 [(set_attr "type" "neon_shift_imm_long")]
43e9d192
IB
4451)
4452
43e9d192
IB
4453;; vrshr_n
4454
4455(define_insn "aarch64_<sur>shr_n<mode>"
4456 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4457 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
cb23a30c
JG
4458 (match_operand:SI 2
4459 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
43e9d192
IB
4460 VRSHR_N))]
4461 "TARGET_SIMD"
cb23a30c 4462 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
78ec3036 4463 [(set_attr "type" "neon_sat_shift_imm<q>")]
43e9d192
IB
4464)
4465
4466;; v(r)sra_n
4467
4468(define_insn "aarch64_<sur>sra_n<mode>"
4469 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4470 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4471 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
cb23a30c
JG
4472 (match_operand:SI 3
4473 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
43e9d192
IB
4474 VSRA))]
4475 "TARGET_SIMD"
cb23a30c 4476 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
78ec3036 4477 [(set_attr "type" "neon_shift_acc<q>")]
43e9d192
IB
4478)
4479
4480;; vs<lr>i_n
4481
4482(define_insn "aarch64_<sur>s<lr>i_n<mode>"
4483 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4484 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4485 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
cb23a30c
JG
4486 (match_operand:SI 3
4487 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
43e9d192
IB
4488 VSLRI))]
4489 "TARGET_SIMD"
cb23a30c 4490 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
78ec3036 4491 [(set_attr "type" "neon_shift_imm<q>")]
43e9d192
IB
4492)
4493
4494;; vqshl(u)
4495
4496(define_insn "aarch64_<sur>qshl<u>_n<mode>"
4497 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4498 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
cb23a30c
JG
4499 (match_operand:SI 2
4500 "aarch64_simd_shift_imm_<ve_mode>" "i")]
43e9d192
IB
4501 VQSHL_N))]
4502 "TARGET_SIMD"
cb23a30c 4503 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
78ec3036 4504 [(set_attr "type" "neon_sat_shift_imm<q>")]
43e9d192
IB
4505)
4506
4507
4508;; vq(r)shr(u)n_n
4509
4510(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4511 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4512 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
cb23a30c
JG
4513 (match_operand:SI 2
4514 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
43e9d192
IB
4515 VQSHRN_N))]
4516 "TARGET_SIMD"
cb23a30c 4517 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
78ec3036 4518 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
43e9d192
IB
4519)
4520
4521
889b9412
JG
4522;; cm(eq|ge|gt|lt|le)
4523;; Note, we have constraints for Dz and Z as different expanders
4524;; have different ideas of what should be passed to this pattern.
43e9d192 4525
889b9412 4526(define_insn "aarch64_cm<optab><mode>"
5f565314
RS
4527 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4528 (neg:<V_INT_EQUIV>
4529 (COMPARISONS:<V_INT_EQUIV>
a844a695
AL
4530 (match_operand:VDQ_I 1 "register_operand" "w,w")
4531 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
889b9412 4532 )))]
43e9d192
IB
4533 "TARGET_SIMD"
4534 "@
889b9412
JG
4535 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4536 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
78ec3036 4537 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
43e9d192
IB
4538)
4539
889b9412
JG
4540(define_insn_and_split "aarch64_cm<optab>di"
4541 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4542 (neg:DI
4543 (COMPARISONS:DI
4544 (match_operand:DI 1 "register_operand" "w,w,r")
4545 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
75c7257f
JG
4546 )))
4547 (clobber (reg:CC CC_REGNUM))]
889b9412 4548 "TARGET_SIMD"
110e1ccc 4549 "#"
6acc5948 4550 "&& reload_completed"
110e1ccc
JG
4551 [(set (match_operand:DI 0 "register_operand")
4552 (neg:DI
4553 (COMPARISONS:DI
4554 (match_operand:DI 1 "register_operand")
4555 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4556 )))]
889b9412 4557 {
110e1ccc
JG
4558 /* If we are in the general purpose register file,
4559 we split to a sequence of comparison and store. */
4560 if (GP_REGNUM_P (REGNO (operands[0]))
4561 && GP_REGNUM_P (REGNO (operands[1])))
4562 {
ef4bddc2 4563 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
110e1ccc
JG
4564 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4565 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4566 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4567 DONE;
4568 }
4569 /* Otherwise, we expand to a similar pattern which does not
4570 clobber CC_REGNUM. */
889b9412 4571 }
78ec3036 4572 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
889b9412
JG
4573)
4574
110e1ccc
JG
4575(define_insn "*aarch64_cm<optab>di"
4576 [(set (match_operand:DI 0 "register_operand" "=w,w")
4577 (neg:DI
4578 (COMPARISONS:DI
4579 (match_operand:DI 1 "register_operand" "w,w")
4580 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4581 )))]
4582 "TARGET_SIMD && reload_completed"
4583 "@
4584 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4585 cm<optab>\t%d0, %d1, #0"
4586 [(set_attr "type" "neon_compare, neon_compare_zero")]
4587)
4588
889b9412 4589;; cm(hs|hi)
43e9d192 4590
889b9412 4591(define_insn "aarch64_cm<optab><mode>"
5f565314
RS
4592 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4593 (neg:<V_INT_EQUIV>
4594 (UCOMPARISONS:<V_INT_EQUIV>
a844a695
AL
4595 (match_operand:VDQ_I 1 "register_operand" "w")
4596 (match_operand:VDQ_I 2 "register_operand" "w")
889b9412 4597 )))]
43e9d192 4598 "TARGET_SIMD"
889b9412 4599 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
78ec3036 4600 [(set_attr "type" "neon_compare<q>")]
43e9d192
IB
4601)
4602
889b9412
JG
4603(define_insn_and_split "aarch64_cm<optab>di"
4604 [(set (match_operand:DI 0 "register_operand" "=w,r")
4605 (neg:DI
4606 (UCOMPARISONS:DI
4607 (match_operand:DI 1 "register_operand" "w,r")
4608 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
75c7257f
JG
4609 )))
4610 (clobber (reg:CC CC_REGNUM))]
889b9412 4611 "TARGET_SIMD"
110e1ccc 4612 "#"
6acc5948 4613 "&& reload_completed"
110e1ccc
JG
4614 [(set (match_operand:DI 0 "register_operand")
4615 (neg:DI
4616 (UCOMPARISONS:DI
4617 (match_operand:DI 1 "register_operand")
4618 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4619 )))]
889b9412 4620 {
110e1ccc
JG
4621 /* If we are in the general purpose register file,
4622 we split to a sequence of comparison and store. */
4623 if (GP_REGNUM_P (REGNO (operands[0]))
4624 && GP_REGNUM_P (REGNO (operands[1])))
4625 {
ef4bddc2 4626 machine_mode mode = CCmode;
110e1ccc
JG
4627 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4628 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4629 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4630 DONE;
4631 }
4632 /* Otherwise, we expand to a similar pattern which does not
4633 clobber CC_REGNUM. */
889b9412 4634 }
110e1ccc
JG
4635 [(set_attr "type" "neon_compare,multiple")]
4636)
4637
4638(define_insn "*aarch64_cm<optab>di"
4639 [(set (match_operand:DI 0 "register_operand" "=w")
4640 (neg:DI
4641 (UCOMPARISONS:DI
4642 (match_operand:DI 1 "register_operand" "w")
4643 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4644 )))]
4645 "TARGET_SIMD && reload_completed"
4646 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4647 [(set_attr "type" "neon_compare")]
889b9412 4648)
385eb93d 4649
889b9412
JG
4650;; cmtst
4651
ddeabd3e 4652;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
6c553b76 4653;; we don't have any insns using ne, and aarch64_vcond outputs
ddeabd3e
AL
4654;; not (neg (eq (and x y) 0))
4655;; which is rewritten by simplify_rtx as
4656;; plus (eq (and x y) 0) -1.
4657
889b9412 4658(define_insn "aarch64_cmtst<mode>"
5f565314
RS
4659 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4660 (plus:<V_INT_EQUIV>
4661 (eq:<V_INT_EQUIV>
a844a695
AL
4662 (and:VDQ_I
4663 (match_operand:VDQ_I 1 "register_operand" "w")
4664 (match_operand:VDQ_I 2 "register_operand" "w"))
4665 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
5f565314 4666 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
ddeabd3e 4667 ]
889b9412
JG
4668 "TARGET_SIMD"
4669 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 4670 [(set_attr "type" "neon_tst<q>")]
889b9412
JG
4671)
4672
4673(define_insn_and_split "aarch64_cmtstdi"
4674 [(set (match_operand:DI 0 "register_operand" "=w,r")
4675 (neg:DI
4676 (ne:DI
4677 (and:DI
4678 (match_operand:DI 1 "register_operand" "w,r")
4679 (match_operand:DI 2 "register_operand" "w,r"))
75c7257f
JG
4680 (const_int 0))))
4681 (clobber (reg:CC CC_REGNUM))]
889b9412 4682 "TARGET_SIMD"
110e1ccc 4683 "#"
6acc5948 4684 "&& reload_completed"
110e1ccc
JG
4685 [(set (match_operand:DI 0 "register_operand")
4686 (neg:DI
4687 (ne:DI
4688 (and:DI
4689 (match_operand:DI 1 "register_operand")
4690 (match_operand:DI 2 "register_operand"))
4691 (const_int 0))))]
889b9412 4692 {
110e1ccc
JG
4693 /* If we are in the general purpose register file,
4694 we split to a sequence of comparison and store. */
4695 if (GP_REGNUM_P (REGNO (operands[0]))
4696 && GP_REGNUM_P (REGNO (operands[1])))
4697 {
4698 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
ef4bddc2 4699 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
110e1ccc
JG
4700 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4701 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4702 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4703 DONE;
4704 }
4705 /* Otherwise, we expand to a similar pattern which does not
4706 clobber CC_REGNUM. */
889b9412 4707 }
110e1ccc
JG
4708 [(set_attr "type" "neon_tst,multiple")]
4709)
4710
4711(define_insn "*aarch64_cmtstdi"
4712 [(set (match_operand:DI 0 "register_operand" "=w")
4713 (neg:DI
4714 (ne:DI
4715 (and:DI
4716 (match_operand:DI 1 "register_operand" "w")
4717 (match_operand:DI 2 "register_operand" "w"))
4718 (const_int 0))))]
4719 "TARGET_SIMD"
4720 "cmtst\t%d0, %d1, %d2"
78ec3036 4721 [(set_attr "type" "neon_tst")]
889b9412
JG
4722)
4723
4724;; fcm(eq|ge|gt|le|lt)
4725
4726(define_insn "aarch64_cm<optab><mode>"
5f565314
RS
4727 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4728 (neg:<V_INT_EQUIV>
4729 (COMPARISONS:<V_INT_EQUIV>
d7f33f07
JW
4730 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4731 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
889b9412 4732 )))]
385eb93d
JG
4733 "TARGET_SIMD"
4734 "@
889b9412
JG
4735 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4736 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
daef0a8c 4737 [(set_attr "type" "neon_fp_compare_<stype><q>")]
385eb93d
JG
4738)
4739
75dd5ace
JG
4740;; fac(ge|gt)
4741;; Note we can also handle what would be fac(le|lt) by
4742;; generating fac(ge|gt).
4743
33d72b63 4744(define_insn "aarch64_fac<optab><mode>"
5f565314
RS
4745 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4746 (neg:<V_INT_EQUIV>
4747 (FAC_COMPARISONS:<V_INT_EQUIV>
68ad28c3
JW
4748 (abs:VHSDF_HSDF
4749 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4750 (abs:VHSDF_HSDF
4751 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
75dd5ace
JG
4752 )))]
4753 "TARGET_SIMD"
4754 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
33d72b63 4755 [(set_attr "type" "neon_fp_compare_<stype><q>")]
75dd5ace
JG
4756)
4757
43e9d192
IB
4758;; addp
4759
4760(define_insn "aarch64_addp<mode>"
4761 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4762 (unspec:VD_BHSI
4763 [(match_operand:VD_BHSI 1 "register_operand" "w")
4764 (match_operand:VD_BHSI 2 "register_operand" "w")]
4765 UNSPEC_ADDP))]
4766 "TARGET_SIMD"
4767 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 4768 [(set_attr "type" "neon_reduc_add<q>")]
43e9d192
IB
4769)
4770
4771(define_insn "aarch64_addpdi"
4772 [(set (match_operand:DI 0 "register_operand" "=w")
4773 (unspec:DI
4774 [(match_operand:V2DI 1 "register_operand" "w")]
4775 UNSPEC_ADDP))]
4776 "TARGET_SIMD"
4777 "addp\t%d0, %1.2d"
78ec3036 4778 [(set_attr "type" "neon_reduc_add")]
43e9d192
IB
4779)
4780
43e9d192
IB
4781;; sqrt
4782
98daafa0 4783(define_expand "sqrt<mode>2"
daef0a8c
JW
4784 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4785 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
98daafa0
EM
4786 "TARGET_SIMD"
4787{
4788 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4789 DONE;
4790})
4791
4792(define_insn "*sqrt<mode>2"
daef0a8c
JW
4793 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4794 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
43e9d192
IB
4795 "TARGET_SIMD"
4796 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 4797 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
43e9d192
IB
4798)
4799
43e9d192
IB
4800;; Patterns for vector struct loads and stores.
4801
668046d1 4802(define_insn "aarch64_simd_ld2<mode>"
43e9d192
IB
4803 [(set (match_operand:OI 0 "register_operand" "=w")
4804 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4805 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4806 UNSPEC_LD2))]
4807 "TARGET_SIMD"
4808 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
78ec3036
JG
4809 [(set_attr "type" "neon_load2_2reg<q>")]
4810)
43e9d192 4811
77efea31
FY
4812(define_insn "aarch64_simd_ld2r<mode>"
4813 [(set (match_operand:OI 0 "register_operand" "=w")
abf47511 4814 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
77efea31
FY
4815 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4816 UNSPEC_LD2_DUP))]
4817 "TARGET_SIMD"
4818 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4819 [(set_attr "type" "neon_load2_all_lanes<q>")]
4820)
4821
3ec1be97
CB
4822(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4823 [(set (match_operand:OI 0 "register_operand" "=w")
abf47511 4824 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
3ec1be97
CB
4825 (match_operand:OI 2 "register_operand" "0")
4826 (match_operand:SI 3 "immediate_operand" "i")
4d0a0237 4827 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3ec1be97
CB
4828 UNSPEC_LD2_LANE))]
4829 "TARGET_SIMD"
4d0a0237 4830 {
7ac29c0f 4831 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4d0a0237
CB
4832 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4833 }
3ec1be97
CB
4834 [(set_attr "type" "neon_load2_one_lane")]
4835)
4836
668046d1
DS
4837(define_expand "vec_load_lanesoi<mode>"
4838 [(set (match_operand:OI 0 "register_operand" "=w")
4839 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4840 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4841 UNSPEC_LD2))]
4842 "TARGET_SIMD"
4843{
4844 if (BYTES_BIG_ENDIAN)
4845 {
4846 rtx tmp = gen_reg_rtx (OImode);
73e3da51 4847 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
668046d1
DS
4848 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4849 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4850 }
4851 else
4852 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4853 DONE;
4854})
4855
4856(define_insn "aarch64_simd_st2<mode>"
43e9d192
IB
4857 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4858 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4859 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4860 UNSPEC_ST2))]
4861 "TARGET_SIMD"
4862 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
78ec3036
JG
4863 [(set_attr "type" "neon_store2_2reg<q>")]
4864)
43e9d192 4865
aaf3de7a 4866;; RTL uses GCC vector extension indices, so flip only for assembly.
d30ab8e0 4867(define_insn "aarch64_vec_store_lanesoi_lane<mode>"
abf47511
AL
4868 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4869 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4d0a0237 4870 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
ba081b77 4871 (match_operand:SI 2 "immediate_operand" "i")]
4d0a0237 4872 UNSPEC_ST2_LANE))]
ba081b77 4873 "TARGET_SIMD"
aaf3de7a 4874 {
7ac29c0f 4875 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
aaf3de7a
AL
4876 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4877 }
b1db706a 4878 [(set_attr "type" "neon_store2_one_lane<q>")]
ba081b77
JG
4879)
4880
668046d1
DS
4881(define_expand "vec_store_lanesoi<mode>"
4882 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4883 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4884 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4885 UNSPEC_ST2))]
4886 "TARGET_SIMD"
4887{
4888 if (BYTES_BIG_ENDIAN)
4889 {
4890 rtx tmp = gen_reg_rtx (OImode);
73e3da51 4891 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
668046d1
DS
4892 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4893 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4894 }
4895 else
4896 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4897 DONE;
4898})
4899
4900(define_insn "aarch64_simd_ld3<mode>"
43e9d192
IB
4901 [(set (match_operand:CI 0 "register_operand" "=w")
4902 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4903 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4904 UNSPEC_LD3))]
4905 "TARGET_SIMD"
4906 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
78ec3036
JG
4907 [(set_attr "type" "neon_load3_3reg<q>")]
4908)
43e9d192 4909
77efea31
FY
4910(define_insn "aarch64_simd_ld3r<mode>"
4911 [(set (match_operand:CI 0 "register_operand" "=w")
f4720b94 4912 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
77efea31
FY
4913 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4914 UNSPEC_LD3_DUP))]
4915 "TARGET_SIMD"
4916 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4917 [(set_attr "type" "neon_load3_all_lanes<q>")]
4918)
4919
3ec1be97
CB
4920(define_insn "aarch64_vec_load_lanesci_lane<mode>"
4921 [(set (match_operand:CI 0 "register_operand" "=w")
f4720b94 4922 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
3ec1be97
CB
4923 (match_operand:CI 2 "register_operand" "0")
4924 (match_operand:SI 3 "immediate_operand" "i")
4d0a0237 4925 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3ec1be97
CB
4926 UNSPEC_LD3_LANE))]
4927 "TARGET_SIMD"
4d0a0237 4928{
7ac29c0f 4929 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4d0a0237
CB
4930 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4931}
3ec1be97
CB
4932 [(set_attr "type" "neon_load3_one_lane")]
4933)
4934
668046d1
DS
4935(define_expand "vec_load_lanesci<mode>"
4936 [(set (match_operand:CI 0 "register_operand" "=w")
4937 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4938 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4939 UNSPEC_LD3))]
4940 "TARGET_SIMD"
4941{
4942 if (BYTES_BIG_ENDIAN)
4943 {
4944 rtx tmp = gen_reg_rtx (CImode);
73e3da51 4945 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
668046d1
DS
4946 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4947 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4948 }
4949 else
4950 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4951 DONE;
4952})
4953
4954(define_insn "aarch64_simd_st3<mode>"
43e9d192
IB
4955 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4956 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4957 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4958 UNSPEC_ST3))]
4959 "TARGET_SIMD"
4960 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
78ec3036
JG
4961 [(set_attr "type" "neon_store3_3reg<q>")]
4962)
43e9d192 4963
aaf3de7a 4964;; RTL uses GCC vector extension indices, so flip only for assembly.
d30ab8e0 4965(define_insn "aarch64_vec_store_lanesci_lane<mode>"
f4720b94
AL
4966 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4967 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4968 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4969 (match_operand:SI 2 "immediate_operand" "i")]
4970 UNSPEC_ST3_LANE))]
ba081b77 4971 "TARGET_SIMD"
aaf3de7a 4972 {
7ac29c0f 4973 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
aaf3de7a
AL
4974 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4975 }
ba081b77
JG
4976 [(set_attr "type" "neon_store3_one_lane<q>")]
4977)
4978
668046d1
DS
4979(define_expand "vec_store_lanesci<mode>"
4980 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4981 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4982 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4983 UNSPEC_ST3))]
4984 "TARGET_SIMD"
4985{
4986 if (BYTES_BIG_ENDIAN)
4987 {
4988 rtx tmp = gen_reg_rtx (CImode);
73e3da51 4989 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
668046d1
DS
4990 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4991 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4992 }
4993 else
4994 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4995 DONE;
4996})
4997
4998(define_insn "aarch64_simd_ld4<mode>"
43e9d192
IB
4999 [(set (match_operand:XI 0 "register_operand" "=w")
5000 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5001 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5002 UNSPEC_LD4))]
5003 "TARGET_SIMD"
5004 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
78ec3036
JG
5005 [(set_attr "type" "neon_load4_4reg<q>")]
5006)
43e9d192 5007
77efea31
FY
5008(define_insn "aarch64_simd_ld4r<mode>"
5009 [(set (match_operand:XI 0 "register_operand" "=w")
d866f024 5010 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
77efea31
FY
5011 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5012 UNSPEC_LD4_DUP))]
5013 "TARGET_SIMD"
5014 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5015 [(set_attr "type" "neon_load4_all_lanes<q>")]
5016)
5017
3ec1be97
CB
5018(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5019 [(set (match_operand:XI 0 "register_operand" "=w")
d866f024 5020 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
3ec1be97
CB
5021 (match_operand:XI 2 "register_operand" "0")
5022 (match_operand:SI 3 "immediate_operand" "i")
4d0a0237 5023 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3ec1be97
CB
5024 UNSPEC_LD4_LANE))]
5025 "TARGET_SIMD"
4d0a0237 5026{
7ac29c0f 5027 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4d0a0237
CB
5028 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5029}
3ec1be97
CB
5030 [(set_attr "type" "neon_load4_one_lane")]
5031)
5032
668046d1
DS
5033(define_expand "vec_load_lanesxi<mode>"
5034 [(set (match_operand:XI 0 "register_operand" "=w")
5035 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5036 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5037 UNSPEC_LD4))]
5038 "TARGET_SIMD"
5039{
5040 if (BYTES_BIG_ENDIAN)
5041 {
5042 rtx tmp = gen_reg_rtx (XImode);
73e3da51 5043 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
668046d1
DS
5044 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5045 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5046 }
5047 else
5048 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5049 DONE;
5050})
5051
5052(define_insn "aarch64_simd_st4<mode>"
43e9d192
IB
5053 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5054 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5055 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5056 UNSPEC_ST4))]
5057 "TARGET_SIMD"
5058 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
78ec3036
JG
5059 [(set_attr "type" "neon_store4_4reg<q>")]
5060)
43e9d192 5061
aaf3de7a 5062;; RTL uses GCC vector extension indices, so flip only for assembly.
d30ab8e0 5063(define_insn "aarch64_vec_store_lanesxi_lane<mode>"
d866f024
AL
5064 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5065 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5066 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5067 (match_operand:SI 2 "immediate_operand" "i")]
5068 UNSPEC_ST4_LANE))]
ba081b77 5069 "TARGET_SIMD"
aaf3de7a 5070 {
7ac29c0f 5071 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
aaf3de7a
AL
5072 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5073 }
ba081b77
JG
5074 [(set_attr "type" "neon_store4_one_lane<q>")]
5075)
5076
668046d1
DS
5077(define_expand "vec_store_lanesxi<mode>"
5078 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5079 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5080 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5081 UNSPEC_ST4))]
5082 "TARGET_SIMD"
5083{
5084 if (BYTES_BIG_ENDIAN)
5085 {
5086 rtx tmp = gen_reg_rtx (XImode);
73e3da51 5087 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
668046d1
DS
5088 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5089 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5090 }
5091 else
5092 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5093 DONE;
5094})
5095
5096(define_insn_and_split "aarch64_rev_reglist<mode>"
5097[(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5098 (unspec:VSTRUCT
5099 [(match_operand:VSTRUCT 1 "register_operand" "w")
5100 (match_operand:V16QI 2 "register_operand" "w")]
5101 UNSPEC_REV_REGLIST))]
5102 "TARGET_SIMD"
5103 "#"
5104 "&& reload_completed"
5105 [(const_int 0)]
5106{
5107 int i;
5108 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5109 for (i = 0; i < nregs; i++)
5110 {
5111 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5112 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5113 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5114 }
5115 DONE;
5116}
5117 [(set_attr "type" "neon_tbl1_q")
5118 (set_attr "length" "<insn_count>")]
5119)
5120
43e9d192
IB
5121;; Reload patterns for AdvSIMD register list operands.
5122
5123(define_expand "mov<mode>"
2d8c6dc1
AH
5124 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5125 (match_operand:VSTRUCT 1 "general_operand" ""))]
43e9d192
IB
5126 "TARGET_SIMD"
5127{
5128 if (can_create_pseudo_p ())
5129 {
5130 if (GET_CODE (operands[0]) != REG)
5131 operands[1] = force_reg (<MODE>mode, operands[1]);
5132 }
5133})
5134
568421ba
SD
5135
5136(define_expand "aarch64_ld1x3<VALLDIF:mode>"
5137 [(match_operand:CI 0 "register_operand" "=w")
5138 (match_operand:DI 1 "register_operand" "r")
5139 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140 "TARGET_SIMD"
5141{
5142 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5143 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5144 DONE;
5145})
5146
5147(define_insn "aarch64_ld1_x3_<mode>"
5148 [(set (match_operand:CI 0 "register_operand" "=w")
5149 (unspec:CI
5150 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5151 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5152 "TARGET_SIMD"
5153 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5154 [(set_attr "type" "neon_load1_3reg<q>")]
5155)
5156
5157(define_expand "aarch64_st1x2<VALLDIF:mode>"
5158 [(match_operand:DI 0 "register_operand" "")
5159 (match_operand:OI 1 "register_operand" "")
5160 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5161 "TARGET_SIMD"
5162{
5163 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5164 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5165 DONE;
5166})
5167
5168(define_insn "aarch64_st1_x2_<mode>"
5169 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5170 (unspec:OI
5171 [(match_operand:OI 1 "register_operand" "w")
5172 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5173 "TARGET_SIMD"
5174 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5175 [(set_attr "type" "neon_store1_2reg<q>")]
5176)
5177
5178(define_expand "aarch64_st1x3<VALLDIF:mode>"
5179 [(match_operand:DI 0 "register_operand" "")
5180 (match_operand:CI 1 "register_operand" "")
5181 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5182 "TARGET_SIMD"
5183{
5184 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5185 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5186 DONE;
5187})
5188
5189(define_insn "aarch64_st1_x3_<mode>"
5190 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5191 (unspec:CI
5192 [(match_operand:CI 1 "register_operand" "w")
5193 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5194 "TARGET_SIMD"
5195 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5196 [(set_attr "type" "neon_store1_3reg<q>")]
5197)
5198
43e9d192
IB
5199(define_insn "*aarch64_mov<mode>"
5200 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
2d8c6dc1
AH
5201 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5202 "TARGET_SIMD && !BYTES_BIG_ENDIAN
43e9d192
IB
5203 && (register_operand (operands[0], <MODE>mode)
5204 || register_operand (operands[1], <MODE>mode))"
2d8c6dc1
AH
5205 "@
5206 #
5207 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5208 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5209 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5210 neon_load<nregs>_<nregs>reg_q")
cd78b3dd 5211 (set_attr "length" "<insn_count>,4,4")]
78ec3036 5212)
43e9d192 5213
89b4515c 5214(define_insn "aarch64_be_ld1<mode>"
71a11456
AL
5215 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5216 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5217 "aarch64_simd_struct_operand" "Utv")]
89b4515c
AV
5218 UNSPEC_LD1))]
5219 "TARGET_SIMD"
5220 "ld1\\t{%0<Vmtype>}, %1"
5221 [(set_attr "type" "neon_load1_1reg<q>")]
5222)
5223
5224(define_insn "aarch64_be_st1<mode>"
71a11456
AL
5225 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5226 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
89b4515c
AV
5227 UNSPEC_ST1))]
5228 "TARGET_SIMD"
5229 "st1\\t{%1<Vmtype>}, %0"
5230 [(set_attr "type" "neon_store1_1reg<q>")]
5231)
5232
2d8c6dc1
AH
5233(define_insn "*aarch64_be_movoi"
5234 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5235 (match_operand:OI 1 "general_operand" " w,w,m"))]
5236 "TARGET_SIMD && BYTES_BIG_ENDIAN
5237 && (register_operand (operands[0], OImode)
5238 || register_operand (operands[1], OImode))"
5239 "@
5240 #
5241 stp\\t%q1, %R1, %0
5242 ldp\\t%q0, %R0, %1"
5c4b7f1c 5243 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
cd78b3dd 5244 (set_attr "length" "8,4,4")]
2d8c6dc1
AH
5245)
5246
5247(define_insn "*aarch64_be_movci"
5248 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5249 (match_operand:CI 1 "general_operand" " w,w,o"))]
5250 "TARGET_SIMD && BYTES_BIG_ENDIAN
5251 && (register_operand (operands[0], CImode)
5252 || register_operand (operands[1], CImode))"
5253 "#"
5254 [(set_attr "type" "multiple")
cd78b3dd 5255 (set_attr "length" "12,4,4")]
2d8c6dc1
AH
5256)
5257
5258(define_insn "*aarch64_be_movxi"
5259 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5260 (match_operand:XI 1 "general_operand" " w,w,o"))]
5261 "TARGET_SIMD && BYTES_BIG_ENDIAN
5262 && (register_operand (operands[0], XImode)
5263 || register_operand (operands[1], XImode))"
5264 "#"
5265 [(set_attr "type" "multiple")
cd78b3dd 5266 (set_attr "length" "16,4,4")]
2d8c6dc1
AH
5267)
5268
43e9d192 5269(define_split
2d8c6dc1
AH
5270 [(set (match_operand:OI 0 "register_operand")
5271 (match_operand:OI 1 "register_operand"))]
43e9d192 5272 "TARGET_SIMD && reload_completed"
2d8c6dc1 5273 [(const_int 0)]
43e9d192 5274{
2d8c6dc1
AH
5275 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5276 DONE;
43e9d192
IB
5277})
5278
5279(define_split
2d8c6dc1
AH
5280 [(set (match_operand:CI 0 "nonimmediate_operand")
5281 (match_operand:CI 1 "general_operand"))]
43e9d192 5282 "TARGET_SIMD && reload_completed"
2d8c6dc1 5283 [(const_int 0)]
43e9d192 5284{
2d8c6dc1
AH
5285 if (register_operand (operands[0], CImode)
5286 && register_operand (operands[1], CImode))
5287 {
5288 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5289 DONE;
5290 }
5291 else if (BYTES_BIG_ENDIAN)
5292 {
5293 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5294 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5295 emit_move_insn (gen_lowpart (V16QImode,
5296 simplify_gen_subreg (TImode, operands[0],
5297 CImode, 32)),
5298 gen_lowpart (V16QImode,
5299 simplify_gen_subreg (TImode, operands[1],
5300 CImode, 32)));
5301 DONE;
5302 }
5303 else
5304 FAIL;
43e9d192
IB
5305})
5306
5307(define_split
2d8c6dc1
AH
5308 [(set (match_operand:XI 0 "nonimmediate_operand")
5309 (match_operand:XI 1 "general_operand"))]
43e9d192 5310 "TARGET_SIMD && reload_completed"
2d8c6dc1 5311 [(const_int 0)]
43e9d192 5312{
2d8c6dc1
AH
5313 if (register_operand (operands[0], XImode)
5314 && register_operand (operands[1], XImode))
5315 {
5316 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5317 DONE;
5318 }
5319 else if (BYTES_BIG_ENDIAN)
5320 {
5321 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5322 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5323 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5324 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5325 DONE;
5326 }
5327 else
5328 FAIL;
43e9d192
IB
5329})
5330
97755701
AL
5331(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5332 [(match_operand:VSTRUCT 0 "register_operand" "=w")
77efea31
FY
5333 (match_operand:DI 1 "register_operand" "w")
5334 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5335 "TARGET_SIMD"
5336{
abf47511 5337 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
97755701
AL
5338 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5339 * <VSTRUCT:nregs>);
77efea31 5340
97755701
AL
5341 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5342 mem));
77efea31
FY
5343 DONE;
5344})
5345
fca7d0a4 5346(define_insn "aarch64_ld2<mode>_dreg"
43e9d192 5347 [(set (match_operand:OI 0 "register_operand" "=w")
fca7d0a4
RS
5348 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5349 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5350 UNSPEC_LD2_DREG))]
5351 "TARGET_SIMD"
43e9d192 5352 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
78ec3036
JG
5353 [(set_attr "type" "neon_load2_2reg<q>")]
5354)
43e9d192 5355
fca7d0a4 5356(define_insn "aarch64_ld2<mode>_dreg"
ac45b2ba 5357 [(set (match_operand:OI 0 "register_operand" "=w")
fca7d0a4
RS
5358 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5359 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5360 UNSPEC_LD2_DREG))]
5361 "TARGET_SIMD"
ac45b2ba
TC
5362 "ld1\\t{%S0.1d - %T0.1d}, %1"
5363 [(set_attr "type" "neon_load1_2reg<q>")]
5364)
5365
fca7d0a4 5366(define_insn "aarch64_ld3<mode>_dreg"
ac45b2ba 5367 [(set (match_operand:CI 0 "register_operand" "=w")
fca7d0a4
RS
5368 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5369 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5370 UNSPEC_LD3_DREG))]
5371 "TARGET_SIMD"
ac45b2ba
TC
5372 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5373 [(set_attr "type" "neon_load3_3reg<q>")]
5374)
5375
fca7d0a4 5376(define_insn "aarch64_ld3<mode>_dreg"
ac45b2ba 5377 [(set (match_operand:CI 0 "register_operand" "=w")
fca7d0a4
RS
5378 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5379 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5380 UNSPEC_LD3_DREG))]
5381 "TARGET_SIMD"
43e9d192 5382 "ld1\\t{%S0.1d - %U0.1d}, %1"
78ec3036
JG
5383 [(set_attr "type" "neon_load1_3reg<q>")]
5384)
43e9d192 5385
fca7d0a4 5386(define_insn "aarch64_ld4<mode>_dreg"
43e9d192 5387 [(set (match_operand:XI 0 "register_operand" "=w")
fca7d0a4
RS
5388 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5389 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5390 UNSPEC_LD4_DREG))]
5391 "TARGET_SIMD"
ac45b2ba
TC
5392 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5393 [(set_attr "type" "neon_load4_4reg<q>")]
5394)
5395
fca7d0a4 5396(define_insn "aarch64_ld4<mode>_dreg"
ac45b2ba 5397 [(set (match_operand:XI 0 "register_operand" "=w")
fca7d0a4
RS
5398 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5399 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5400 UNSPEC_LD4_DREG))]
5401 "TARGET_SIMD"
43e9d192 5402 "ld1\\t{%S0.1d - %V0.1d}, %1"
78ec3036
JG
5403 [(set_attr "type" "neon_load1_4reg<q>")]
5404)
43e9d192
IB
5405
5406(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5407 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5408 (match_operand:DI 1 "register_operand" "r")
5409 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5410 "TARGET_SIMD"
5411{
6960bf55
AL
5412 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5413 set_mem_size (mem, <VSTRUCT:nregs> * 8);
43e9d192 5414
fca7d0a4 5415 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
43e9d192
IB
5416 DONE;
5417})
5418
71a11456
AL
5419(define_expand "aarch64_ld1<VALL_F16:mode>"
5420 [(match_operand:VALL_F16 0 "register_operand")
dec11868
JG
5421 (match_operand:DI 1 "register_operand")]
5422 "TARGET_SIMD"
5423{
71a11456 5424 machine_mode mode = <VALL_F16:MODE>mode;
dec11868 5425 rtx mem = gen_rtx_MEM (mode, operands[1]);
89b4515c
AV
5426
5427 if (BYTES_BIG_ENDIAN)
71a11456 5428 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
89b4515c
AV
5429 else
5430 emit_move_insn (operands[0], mem);
dec11868
JG
5431 DONE;
5432})
5433
43e9d192
IB
5434(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5435 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5436 (match_operand:DI 1 "register_operand" "r")
5437 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5438 "TARGET_SIMD"
5439{
ef4bddc2 5440 machine_mode mode = <VSTRUCT:MODE>mode;
43e9d192
IB
5441 rtx mem = gen_rtx_MEM (mode, operands[1]);
5442
7e4713f8 5443 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
43e9d192
IB
5444 DONE;
5445})
5446
467e6f1b
KV
5447(define_expand "aarch64_ld1x2<VQ:mode>"
5448 [(match_operand:OI 0 "register_operand" "=w")
5449 (match_operand:DI 1 "register_operand" "r")
5450 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5451 "TARGET_SIMD"
5452{
5453 machine_mode mode = OImode;
5454 rtx mem = gen_rtx_MEM (mode, operands[1]);
5455
5456 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5457 DONE;
5458})
5459
5460(define_expand "aarch64_ld1x2<VDC:mode>"
5461 [(match_operand:OI 0 "register_operand" "=w")
5462 (match_operand:DI 1 "register_operand" "r")
5463 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5464 "TARGET_SIMD"
5465{
5466 machine_mode mode = OImode;
5467 rtx mem = gen_rtx_MEM (mode, operands[1]);
5468
5469 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5470 DONE;
5471})
5472
5473
97755701
AL
5474(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5475 [(match_operand:VSTRUCT 0 "register_operand" "=w")
3ec1be97 5476 (match_operand:DI 1 "register_operand" "w")
97755701 5477 (match_operand:VSTRUCT 2 "register_operand" "0")
3ec1be97 5478 (match_operand:SI 3 "immediate_operand" "i")
4d0a0237 5479 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3ec1be97
CB
5480 "TARGET_SIMD"
5481{
f4720b94 5482 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
97755701
AL
5483 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5484 * <VSTRUCT:nregs>);
5485
6a70badb 5486 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
97755701
AL
5487 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5488 operands[0], mem, operands[2], operands[3]));
3ec1be97
CB
5489 DONE;
5490})
5491
43e9d192
IB
5492;; Expanders for builtins to extract vector registers from large
5493;; opaque integer modes.
5494
5495;; D-register list.
5496
5497(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5498 [(match_operand:VDC 0 "register_operand" "=w")
5499 (match_operand:VSTRUCT 1 "register_operand" "w")
5500 (match_operand:SI 2 "immediate_operand" "i")]
5501 "TARGET_SIMD"
5502{
5503 int part = INTVAL (operands[2]);
5504 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5505 int offset = part * 16;
5506
5507 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5508 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5509 DONE;
5510})
5511
5512;; Q-register list.
5513
5514(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5515 [(match_operand:VQ 0 "register_operand" "=w")
5516 (match_operand:VSTRUCT 1 "register_operand" "w")
5517 (match_operand:SI 2 "immediate_operand" "i")]
5518 "TARGET_SIMD"
5519{
5520 int part = INTVAL (operands[2]);
5521 int offset = part * 16;
5522
5523 emit_move_insn (operands[0],
5524 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5525 DONE;
5526})
5527
5528;; Permuted-store expanders for neon intrinsics.
5529
88b08073
JG
5530;; Permute instructions
5531
5532;; vec_perm support
5533
88b08073
JG
5534(define_expand "vec_perm<mode>"
5535 [(match_operand:VB 0 "register_operand")
5536 (match_operand:VB 1 "register_operand")
5537 (match_operand:VB 2 "register_operand")
5538 (match_operand:VB 3 "register_operand")]
f7c4e5b8 5539 "TARGET_SIMD"
88b08073
JG
5540{
5541 aarch64_expand_vec_perm (operands[0], operands[1],
80940017 5542 operands[2], operands[3], <nunits>);
88b08073
JG
5543 DONE;
5544})
5545
5546(define_insn "aarch64_tbl1<mode>"
5547 [(set (match_operand:VB 0 "register_operand" "=w")
5548 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5549 (match_operand:VB 2 "register_operand" "w")]
5550 UNSPEC_TBL))]
5551 "TARGET_SIMD"
5552 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
78ec3036 5553 [(set_attr "type" "neon_tbl1<q>")]
88b08073
JG
5554)
5555
5556;; Two source registers.
5557
5558(define_insn "aarch64_tbl2v16qi"
5559 [(set (match_operand:V16QI 0 "register_operand" "=w")
5560 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5561 (match_operand:V16QI 2 "register_operand" "w")]
5562 UNSPEC_TBL))]
5563 "TARGET_SIMD"
5564 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
78ec3036 5565 [(set_attr "type" "neon_tbl2_q")]
88b08073
JG
5566)
5567
246cc060
CL
5568(define_insn "aarch64_tbl3<mode>"
5569 [(set (match_operand:VB 0 "register_operand" "=w")
5570 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5571 (match_operand:VB 2 "register_operand" "w")]
9371aecc
CL
5572 UNSPEC_TBL))]
5573 "TARGET_SIMD"
246cc060 5574 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
9371aecc
CL
5575 [(set_attr "type" "neon_tbl3")]
5576)
5577
246cc060
CL
5578(define_insn "aarch64_tbx4<mode>"
5579 [(set (match_operand:VB 0 "register_operand" "=w")
5580 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
9371aecc 5581 (match_operand:OI 2 "register_operand" "w")
246cc060
CL
5582 (match_operand:VB 3 "register_operand" "w")]
5583 UNSPEC_TBX))]
5584 "TARGET_SIMD"
5585 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5586 [(set_attr "type" "neon_tbl4")]
5587)
5588
5589;; Three source registers.
5590
5591(define_insn "aarch64_qtbl3<mode>"
5592 [(set (match_operand:VB 0 "register_operand" "=w")
5593 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5594 (match_operand:VB 2 "register_operand" "w")]
5595 UNSPEC_TBL))]
5596 "TARGET_SIMD"
5597 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5598 [(set_attr "type" "neon_tbl3")]
5599)
5600
5601(define_insn "aarch64_qtbx3<mode>"
5602 [(set (match_operand:VB 0 "register_operand" "=w")
5603 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5604 (match_operand:CI 2 "register_operand" "w")
5605 (match_operand:VB 3 "register_operand" "w")]
5606 UNSPEC_TBX))]
5607 "TARGET_SIMD"
5608 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5609 [(set_attr "type" "neon_tbl3")]
5610)
5611
5612;; Four source registers.
5613
5614(define_insn "aarch64_qtbl4<mode>"
5615 [(set (match_operand:VB 0 "register_operand" "=w")
5616 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5617 (match_operand:VB 2 "register_operand" "w")]
5618 UNSPEC_TBL))]
5619 "TARGET_SIMD"
5620 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5621 [(set_attr "type" "neon_tbl4")]
5622)
5623
5624(define_insn "aarch64_qtbx4<mode>"
5625 [(set (match_operand:VB 0 "register_operand" "=w")
5626 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5627 (match_operand:XI 2 "register_operand" "w")
5628 (match_operand:VB 3 "register_operand" "w")]
9371aecc
CL
5629 UNSPEC_TBX))]
5630 "TARGET_SIMD"
246cc060 5631 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
9371aecc
CL
5632 [(set_attr "type" "neon_tbl4")]
5633)
5634
88b08073
JG
5635(define_insn_and_split "aarch64_combinev16qi"
5636 [(set (match_operand:OI 0 "register_operand" "=w")
5637 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5638 (match_operand:V16QI 2 "register_operand" "w")]
5639 UNSPEC_CONCAT))]
5640 "TARGET_SIMD"
5641 "#"
5642 "&& reload_completed"
5643 [(const_int 0)]
5644{
5645 aarch64_split_combinev16qi (operands);
5646 DONE;
0f686aa9
JG
5647}
5648[(set_attr "type" "multiple")]
5649)
88b08073 5650
3f8334a5
RS
5651;; This instruction's pattern is generated directly by
5652;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5653;; need corresponding changes there.
cc4d934f 5654(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
358decd5
JW
5655 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5656 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5657 (match_operand:VALL_F16 2 "register_operand" "w")]
5658 PERMUTE))]
cc4d934f
JG
5659 "TARGET_SIMD"
5660 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 5661 [(set_attr "type" "neon_permute<q>")]
cc4d934f
JG
5662)
5663
3f8334a5
RS
5664;; This instruction's pattern is generated directly by
5665;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5666;; need corresponding changes there. Note that the immediate (third)
5667;; operand is a lane index not a byte index.
ae0533da 5668(define_insn "aarch64_ext<mode>"
358decd5
JW
5669 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5670 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5671 (match_operand:VALL_F16 2 "register_operand" "w")
5672 (match_operand:SI 3 "immediate_operand" "i")]
5673 UNSPEC_EXT))]
ae0533da
AL
5674 "TARGET_SIMD"
5675{
5676 operands[3] = GEN_INT (INTVAL (operands[3])
cb5ca315 5677 * GET_MODE_UNIT_SIZE (<MODE>mode));
ae0533da
AL
5678 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5679}
5680 [(set_attr "type" "neon_ext<q>")]
5681)
5682
3f8334a5
RS
5683;; This instruction's pattern is generated directly by
5684;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5685;; need corresponding changes there.
923fcec3 5686(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
358decd5
JW
5687 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5688 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
923fcec3
AL
5689 REVERSE))]
5690 "TARGET_SIMD"
5691 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5692 [(set_attr "type" "neon_rev<q>")]
5693)
5694
43e9d192 5695(define_insn "aarch64_st2<mode>_dreg"
6960bf55
AL
5696 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5697 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
43e9d192
IB
5698 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5699 UNSPEC_ST2))]
5700 "TARGET_SIMD"
5701 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
78ec3036
JG
5702 [(set_attr "type" "neon_store2_2reg")]
5703)
43e9d192
IB
5704
5705(define_insn "aarch64_st2<mode>_dreg"
6960bf55
AL
5706 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5707 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
43e9d192
IB
5708 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5709 UNSPEC_ST2))]
5710 "TARGET_SIMD"
5711 "st1\\t{%S1.1d - %T1.1d}, %0"
78ec3036
JG
5712 [(set_attr "type" "neon_store1_2reg")]
5713)
43e9d192
IB
5714
5715(define_insn "aarch64_st3<mode>_dreg"
6960bf55
AL
5716 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5717 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
43e9d192
IB
5718 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5719 UNSPEC_ST3))]
5720 "TARGET_SIMD"
5721 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
78ec3036
JG
5722 [(set_attr "type" "neon_store3_3reg")]
5723)
43e9d192
IB
5724
5725(define_insn "aarch64_st3<mode>_dreg"
6960bf55
AL
5726 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5727 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
43e9d192
IB
5728 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5729 UNSPEC_ST3))]
5730 "TARGET_SIMD"
5731 "st1\\t{%S1.1d - %U1.1d}, %0"
78ec3036
JG
5732 [(set_attr "type" "neon_store1_3reg")]
5733)
43e9d192
IB
5734
5735(define_insn "aarch64_st4<mode>_dreg"
6960bf55
AL
5736 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5737 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
43e9d192
IB
5738 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5739 UNSPEC_ST4))]
5740 "TARGET_SIMD"
5741 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
78ec3036
JG
5742 [(set_attr "type" "neon_store4_4reg")]
5743)
43e9d192
IB
5744
5745(define_insn "aarch64_st4<mode>_dreg"
6960bf55
AL
5746 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5747 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
43e9d192
IB
5748 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5749 UNSPEC_ST4))]
5750 "TARGET_SIMD"
5751 "st1\\t{%S1.1d - %V1.1d}, %0"
78ec3036
JG
5752 [(set_attr "type" "neon_store1_4reg")]
5753)
43e9d192
IB
5754
5755(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5756 [(match_operand:DI 0 "register_operand" "r")
5757 (match_operand:VSTRUCT 1 "register_operand" "w")
5758 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759 "TARGET_SIMD"
5760{
6960bf55
AL
5761 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5762 set_mem_size (mem, <VSTRUCT:nregs> * 8);
43e9d192
IB
5763
5764 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5765 DONE;
5766})
5767
5768(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5769 [(match_operand:DI 0 "register_operand" "r")
5770 (match_operand:VSTRUCT 1 "register_operand" "w")
5771 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5772 "TARGET_SIMD"
5773{
ef4bddc2 5774 machine_mode mode = <VSTRUCT:MODE>mode;
43e9d192
IB
5775 rtx mem = gen_rtx_MEM (mode, operands[0]);
5776
7e4713f8 5777 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
43e9d192
IB
5778 DONE;
5779})
5780
97755701 5781(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
ba081b77 5782 [(match_operand:DI 0 "register_operand" "r")
97755701 5783 (match_operand:VSTRUCT 1 "register_operand" "w")
4d0a0237 5784 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
ba081b77
JG
5785 (match_operand:SI 2 "immediate_operand")]
5786 "TARGET_SIMD"
5787{
d866f024 5788 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
97755701
AL
5789 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5790 * <VSTRUCT:nregs>);
ba081b77 5791
97755701
AL
5792 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5793 mem, operands[1], operands[2]));
ba081b77
JG
5794 DONE;
5795})
5796
71a11456 5797(define_expand "aarch64_st1<VALL_F16:mode>"
dec11868 5798 [(match_operand:DI 0 "register_operand")
71a11456 5799 (match_operand:VALL_F16 1 "register_operand")]
dec11868
JG
5800 "TARGET_SIMD"
5801{
71a11456 5802 machine_mode mode = <VALL_F16:MODE>mode;
dec11868 5803 rtx mem = gen_rtx_MEM (mode, operands[0]);
89b4515c
AV
5804
5805 if (BYTES_BIG_ENDIAN)
71a11456 5806 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
89b4515c
AV
5807 else
5808 emit_move_insn (mem, operands[1]);
dec11868
JG
5809 DONE;
5810})
5811
43e9d192
IB
5812;; Expander for builtins to insert vector registers into large
5813;; opaque integer modes.
5814
5815;; Q-register list. We don't need a D-reg inserter as we zero
5816;; extend them in arm_neon.h and insert the resulting Q-regs.
5817
5818(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5819 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5820 (match_operand:VSTRUCT 1 "register_operand" "0")
5821 (match_operand:VQ 2 "register_operand" "w")
5822 (match_operand:SI 3 "immediate_operand" "i")]
5823 "TARGET_SIMD"
5824{
5825 int part = INTVAL (operands[3]);
5826 int offset = part * 16;
5827
5828 emit_move_insn (operands[0], operands[1]);
5829 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5830 operands[2]);
5831 DONE;
5832})
5833
ff03930a 5834;; Standard pattern name vec_init<mode><Vel>.
4369c11e 5835
ff03930a 5836(define_expand "vec_init<mode><Vel>"
862abc04 5837 [(match_operand:VALL_F16 0 "register_operand" "")
4369c11e
TB
5838 (match_operand 1 "" "")]
5839 "TARGET_SIMD"
5840{
5841 aarch64_expand_vector_init (operands[0], operands[1]);
5842 DONE;
5843})
5844
a50344cb 5845(define_insn "*aarch64_simd_ld1r<mode>"
862abc04
AL
5846 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5847 (vec_duplicate:VALL_F16
a50344cb
TB
5848 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5849 "TARGET_SIMD"
5850 "ld1r\\t{%0.<Vtype>}, %1"
78ec3036
JG
5851 [(set_attr "type" "neon_load1_all_lanes")]
5852)
0050faf8 5853
467e6f1b
KV
5854(define_insn "aarch64_simd_ld1<mode>_x2"
5855 [(set (match_operand:OI 0 "register_operand" "=w")
5856 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5857 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5858 UNSPEC_LD1))]
5859 "TARGET_SIMD"
5860 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5861 [(set_attr "type" "neon_load1_2reg<q>")]
5862)
5863
5864(define_insn "aarch64_simd_ld1<mode>_x2"
5865 [(set (match_operand:OI 0 "register_operand" "=w")
5866 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5867 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5868 UNSPEC_LD1))]
5869 "TARGET_SIMD"
5870 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5871 [(set_attr "type" "neon_load1_2reg<q>")]
5872)
5873
5874
0016d8d9 5875(define_insn "@aarch64_frecpe<mode>"
4663b943
RS
5876 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5877 (unspec:VHSDF_HSDF
5878 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
daef0a8c 5879 UNSPEC_FRECPE))]
0050faf8 5880 "TARGET_SIMD"
4663b943 5881 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
daef0a8c 5882 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
0050faf8
JG
5883)
5884
4663b943 5885(define_insn "aarch64_frecpx<mode>"
d7f33f07
JW
5886 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5887 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
4663b943 5888 UNSPEC_FRECPX))]
fe6f68e2 5889 "TARGET_SIMD"
4663b943
RS
5890 "frecpx\t%<s>0, %<s>1"
5891 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
fe6f68e2
JG
5892)
5893
0016d8d9 5894(define_insn "@aarch64_frecps<mode>"
68ad28c3
JW
5895 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5896 (unspec:VHSDF_HSDF
5897 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5898 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
33d72b63 5899 UNSPEC_FRECPS))]
0050faf8 5900 "TARGET_SIMD"
fe6f68e2 5901 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
33d72b63 5902 [(set_attr "type" "neon_fp_recps_<stype><q>")]
0050faf8
JG
5903)
5904
58a3bd25
FY
5905(define_insn "aarch64_urecpe<mode>"
5906 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5907 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5908 UNSPEC_URECPE))]
5909 "TARGET_SIMD"
5910 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5911 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5912
ff03930a 5913;; Standard pattern name vec_extract<mode><Vel>.
0f365c10 5914
ff03930a 5915(define_expand "vec_extract<mode><Vel>"
e58bf20a 5916 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
862abc04 5917 (match_operand:VALL_F16 1 "register_operand" "")
e58bf20a 5918 (match_operand:SI 2 "immediate_operand" "")]
0f365c10 5919 "TARGET_SIMD"
e58bf20a
TB
5920{
5921 emit_insn
5922 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5923 DONE;
5924})
5a7a4e80
TB
5925
5926;; aes
5927
5928(define_insn "aarch64_crypto_aes<aes_op>v16qi"
5929 [(set (match_operand:V16QI 0 "register_operand" "=w")
ff029883 5930 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5a7a4e80
TB
5931 (match_operand:V16QI 2 "register_operand" "w")]
5932 CRYPTO_AES))]
27086ea3 5933 "TARGET_SIMD && TARGET_AES"
5a7a4e80 5934 "aes<aes_op>\\t%0.16b, %2.16b"
b10baa95 5935 [(set_attr "type" "crypto_aese")]
5a7a4e80
TB
5936)
5937
9b57fd3d
AV
5938(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5939 [(set (match_operand:V16QI 0 "register_operand" "=w")
5940 (unspec:V16QI [(xor:V16QI
5941 (match_operand:V16QI 1 "register_operand" "%0")
5942 (match_operand:V16QI 2 "register_operand" "w"))
5943 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5944 CRYPTO_AES))]
5945 "TARGET_SIMD && TARGET_AES"
5946 "aes<aes_op>\\t%0.16b, %2.16b"
5947 [(set_attr "type" "crypto_aese")]
5948)
5949
5950(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5951 [(set (match_operand:V16QI 0 "register_operand" "=w")
5952 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5953 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5954 (match_operand:V16QI 2 "register_operand" "w"))]
5955 CRYPTO_AES))]
5956 "TARGET_SIMD && TARGET_AES"
5957 "aes<aes_op>\\t%0.16b, %2.16b"
5958 [(set_attr "type" "crypto_aese")]
5959)
5960
f2879a90
KT
5961;; When AES/AESMC fusion is enabled we want the register allocation to
5962;; look like:
5963;; AESE Vn, _
5964;; AESMC Vn, Vn
5965;; So prefer to tie operand 1 to operand 0 when fusing.
5966
5a7a4e80 5967(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
f2879a90
KT
5968 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5969 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5a7a4e80 5970 CRYPTO_AESMC))]
27086ea3 5971 "TARGET_SIMD && TARGET_AES"
5a7a4e80 5972 "aes<aesmc_op>\\t%0.16b, %1.16b"
f2879a90
KT
5973 [(set_attr "type" "crypto_aesmc")
5974 (set_attr_alternative "enabled"
5975 [(if_then_else (match_test
5976 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5977 (const_string "yes" )
5978 (const_string "no"))
5979 (const_string "yes")])]
5a7a4e80
TB
5980)
5981
10c54e5b
KT
5982;; When AESE/AESMC fusion is enabled we really want to keep the two together
5983;; and enforce the register dependency without scheduling or register
5984;; allocation messing up the order or introducing moves inbetween.
5985;; Mash the two together during combine.
5986
5987(define_insn "*aarch64_crypto_aese_fused"
5988 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5989 (unspec:V16QI
5990 [(unspec:V16QI
5991 [(match_operand:V16QI 1 "register_operand" "0")
5992 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5993 ] UNSPEC_AESMC))]
5994 "TARGET_SIMD && TARGET_AES
5995 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5996 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5997 [(set_attr "type" "crypto_aese")
5998 (set_attr "length" "8")]
5999)
6000
6001;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6002;; and enforce the register dependency without scheduling or register
6003;; allocation messing up the order or introducing moves inbetween.
6004;; Mash the two together during combine.
6005
6006(define_insn "*aarch64_crypto_aesd_fused"
6007 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6008 (unspec:V16QI
6009 [(unspec:V16QI
6010 [(match_operand:V16QI 1 "register_operand" "0")
6011 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6012 ] UNSPEC_AESIMC))]
6013 "TARGET_SIMD && TARGET_AES
6014 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6015 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6016 [(set_attr "type" "crypto_aese")
6017 (set_attr "length" "8")]
6018)
6019
30442682
TB
6020;; sha1
6021
6022(define_insn "aarch64_crypto_sha1hsi"
6023 [(set (match_operand:SI 0 "register_operand" "=w")
6024 (unspec:SI [(match_operand:SI 1
6025 "register_operand" "w")]
6026 UNSPEC_SHA1H))]
27086ea3 6027 "TARGET_SIMD && TARGET_SHA2"
30442682
TB
6028 "sha1h\\t%s0, %s1"
6029 [(set_attr "type" "crypto_sha1_fast")]
6030)
6031
5304d044
WD
6032(define_insn "aarch64_crypto_sha1hv4si"
6033 [(set (match_operand:SI 0 "register_operand" "=w")
6034 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6035 (parallel [(const_int 0)]))]
6036 UNSPEC_SHA1H))]
27086ea3 6037 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5304d044
WD
6038 "sha1h\\t%s0, %s1"
6039 [(set_attr "type" "crypto_sha1_fast")]
6040)
6041
6042(define_insn "aarch64_be_crypto_sha1hv4si"
6043 [(set (match_operand:SI 0 "register_operand" "=w")
6044 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6045 (parallel [(const_int 3)]))]
6046 UNSPEC_SHA1H))]
27086ea3 6047 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5304d044
WD
6048 "sha1h\\t%s0, %s1"
6049 [(set_attr "type" "crypto_sha1_fast")]
6050)
6051
30442682
TB
6052(define_insn "aarch64_crypto_sha1su1v4si"
6053 [(set (match_operand:V4SI 0 "register_operand" "=w")
6054 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6055 (match_operand:V4SI 2 "register_operand" "w")]
6056 UNSPEC_SHA1SU1))]
27086ea3 6057 "TARGET_SIMD && TARGET_SHA2"
30442682
TB
6058 "sha1su1\\t%0.4s, %2.4s"
6059 [(set_attr "type" "crypto_sha1_fast")]
6060)
6061
6062(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6063 [(set (match_operand:V4SI 0 "register_operand" "=w")
6064 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6065 (match_operand:SI 2 "register_operand" "w")
6066 (match_operand:V4SI 3 "register_operand" "w")]
6067 CRYPTO_SHA1))]
27086ea3 6068 "TARGET_SIMD && TARGET_SHA2"
30442682
TB
6069 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6070 [(set_attr "type" "crypto_sha1_slow")]
6071)
6072
6073(define_insn "aarch64_crypto_sha1su0v4si"
6074 [(set (match_operand:V4SI 0 "register_operand" "=w")
6075 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6076 (match_operand:V4SI 2 "register_operand" "w")
6077 (match_operand:V4SI 3 "register_operand" "w")]
6078 UNSPEC_SHA1SU0))]
27086ea3 6079 "TARGET_SIMD && TARGET_SHA2"
30442682
TB
6080 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6081 [(set_attr "type" "crypto_sha1_xor")]
6082)
b9cb0a44
TB
6083
6084;; sha256
6085
6086(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6087 [(set (match_operand:V4SI 0 "register_operand" "=w")
6088 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6089 (match_operand:V4SI 2 "register_operand" "w")
6090 (match_operand:V4SI 3 "register_operand" "w")]
6091 CRYPTO_SHA256))]
27086ea3 6092 "TARGET_SIMD && TARGET_SHA2"
b9cb0a44
TB
6093 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6094 [(set_attr "type" "crypto_sha256_slow")]
6095)
6096
6097(define_insn "aarch64_crypto_sha256su0v4si"
6098 [(set (match_operand:V4SI 0 "register_operand" "=w")
6099 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6100 (match_operand:V4SI 2 "register_operand" "w")]
6101 UNSPEC_SHA256SU0))]
27086ea3 6102 "TARGET_SIMD && TARGET_SHA2"
b9cb0a44
TB
6103 "sha256su0\\t%0.4s, %2.4s"
6104 [(set_attr "type" "crypto_sha256_fast")]
6105)
6106
6107(define_insn "aarch64_crypto_sha256su1v4si"
6108 [(set (match_operand:V4SI 0 "register_operand" "=w")
6109 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6110 (match_operand:V4SI 2 "register_operand" "w")
6111 (match_operand:V4SI 3 "register_operand" "w")]
6112 UNSPEC_SHA256SU1))]
27086ea3 6113 "TARGET_SIMD && TARGET_SHA2"
b9cb0a44
TB
6114 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6115 [(set_attr "type" "crypto_sha256_slow")]
6116)
7baa225d 6117
27086ea3
MC
6118;; sha512
6119
6120(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6121 [(set (match_operand:V2DI 0 "register_operand" "=w")
6122 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6123 (match_operand:V2DI 2 "register_operand" "w")
6124 (match_operand:V2DI 3 "register_operand" "w")]
6125 CRYPTO_SHA512))]
6126 "TARGET_SIMD && TARGET_SHA3"
6127 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6128 [(set_attr "type" "crypto_sha512")]
6129)
6130
6131(define_insn "aarch64_crypto_sha512su0qv2di"
6132 [(set (match_operand:V2DI 0 "register_operand" "=w")
6133 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6134 (match_operand:V2DI 2 "register_operand" "w")]
6135 UNSPEC_SHA512SU0))]
6136 "TARGET_SIMD && TARGET_SHA3"
6137 "sha512su0\\t%0.2d, %2.2d"
6138 [(set_attr "type" "crypto_sha512")]
6139)
6140
6141(define_insn "aarch64_crypto_sha512su1qv2di"
6142 [(set (match_operand:V2DI 0 "register_operand" "=w")
6143 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6144 (match_operand:V2DI 2 "register_operand" "w")
6145 (match_operand:V2DI 3 "register_operand" "w")]
6146 UNSPEC_SHA512SU1))]
6147 "TARGET_SIMD && TARGET_SHA3"
6148 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6149 [(set_attr "type" "crypto_sha512")]
6150)
6151
6152;; sha3
6153
d21052eb
TC
6154(define_insn "eor3q<mode>4"
6155 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6156 (xor:VQ_I
6157 (xor:VQ_I
6158 (match_operand:VQ_I 2 "register_operand" "w")
6159 (match_operand:VQ_I 3 "register_operand" "w"))
6160 (match_operand:VQ_I 1 "register_operand" "w")))]
27086ea3
MC
6161 "TARGET_SIMD && TARGET_SHA3"
6162 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6163 [(set_attr "type" "crypto_sha3")]
6164)
6165
6166(define_insn "aarch64_rax1qv2di"
6167 [(set (match_operand:V2DI 0 "register_operand" "=w")
6168 (xor:V2DI
6169 (rotate:V2DI
6170 (match_operand:V2DI 2 "register_operand" "w")
6171 (const_int 1))
6172 (match_operand:V2DI 1 "register_operand" "w")))]
6173 "TARGET_SIMD && TARGET_SHA3"
6174 "rax1\\t%0.2d, %1.2d, %2.2d"
6175 [(set_attr "type" "crypto_sha3")]
6176)
6177
6178(define_insn "aarch64_xarqv2di"
6179 [(set (match_operand:V2DI 0 "register_operand" "=w")
6180 (rotatert:V2DI
6181 (xor:V2DI
6182 (match_operand:V2DI 1 "register_operand" "%w")
6183 (match_operand:V2DI 2 "register_operand" "w"))
6184 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6185 "TARGET_SIMD && TARGET_SHA3"
6186 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6187 [(set_attr "type" "crypto_sha3")]
6188)
6189
d21052eb
TC
6190(define_insn "bcaxq<mode>4"
6191 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6192 (xor:VQ_I
6193 (and:VQ_I
6194 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6195 (match_operand:VQ_I 2 "register_operand" "w"))
6196 (match_operand:VQ_I 1 "register_operand" "w")))]
27086ea3
MC
6197 "TARGET_SIMD && TARGET_SHA3"
6198 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6199 [(set_attr "type" "crypto_sha3")]
6200)
6201
6202;; SM3
6203
6204(define_insn "aarch64_sm3ss1qv4si"
6205 [(set (match_operand:V4SI 0 "register_operand" "=w")
6206 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6207 (match_operand:V4SI 2 "register_operand" "w")
6208 (match_operand:V4SI 3 "register_operand" "w")]
6209 UNSPEC_SM3SS1))]
6210 "TARGET_SIMD && TARGET_SM4"
6211 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6212 [(set_attr "type" "crypto_sm3")]
6213)
6214
6215
6216(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6217 [(set (match_operand:V4SI 0 "register_operand" "=w")
6218 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6219 (match_operand:V4SI 2 "register_operand" "w")
6220 (match_operand:V4SI 3 "register_operand" "w")
6221 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6222 CRYPTO_SM3TT))]
6223 "TARGET_SIMD && TARGET_SM4"
6224 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6225 [(set_attr "type" "crypto_sm3")]
6226)
6227
6228(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6229 [(set (match_operand:V4SI 0 "register_operand" "=w")
6230 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6231 (match_operand:V4SI 2 "register_operand" "w")
6232 (match_operand:V4SI 3 "register_operand" "w")]
6233 CRYPTO_SM3PART))]
6234 "TARGET_SIMD && TARGET_SM4"
6235 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6236 [(set_attr "type" "crypto_sm3")]
6237)
6238
6239;; SM4
6240
6241(define_insn "aarch64_sm4eqv4si"
6242 [(set (match_operand:V4SI 0 "register_operand" "=w")
6243 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6244 (match_operand:V4SI 2 "register_operand" "w")]
6245 UNSPEC_SM4E))]
6246 "TARGET_SIMD && TARGET_SM4"
6247 "sm4e\\t%0.4s, %2.4s"
6248 [(set_attr "type" "crypto_sm4")]
6249)
6250
6251(define_insn "aarch64_sm4ekeyqv4si"
6252 [(set (match_operand:V4SI 0 "register_operand" "=w")
6253 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6254 (match_operand:V4SI 2 "register_operand" "w")]
6255 UNSPEC_SM4EKEY))]
6256 "TARGET_SIMD && TARGET_SM4"
6257 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6258 [(set_attr "type" "crypto_sm4")]
6259)
6260
6261;; fp16fml
6262
6263(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6264 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6265 (unspec:VDQSF
6266 [(match_operand:VDQSF 1 "register_operand" "0")
6267 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6268 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6269 VFMLA16_LOW))]
6270 "TARGET_F16FML"
6271{
f3bd9505
RS
6272 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6273 <nunits> * 2, false);
6274 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6275 <nunits> * 2, false);
27086ea3
MC
6276
6277 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6278 operands[1],
6279 operands[2],
6280 operands[3],
6281 p1, p2));
6282 DONE;
6283
6284})
6285
6286(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6287 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6288 (unspec:VDQSF
6289 [(match_operand:VDQSF 1 "register_operand" "0")
6290 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6291 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6292 VFMLA16_HIGH))]
6293 "TARGET_F16FML"
6294{
f3bd9505
RS
6295 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6296 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
27086ea3
MC
6297
6298 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6299 operands[1],
6300 operands[2],
6301 operands[3],
6302 p1, p2));
6303 DONE;
6304})
6305
6306(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6307 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6308 (fma:VDQSF
6309 (float_extend:VDQSF
6310 (vec_select:<VFMLA_SEL_W>
6311 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6312 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6313 (float_extend:VDQSF
6314 (vec_select:<VFMLA_SEL_W>
6315 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6316 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6317 (match_operand:VDQSF 1 "register_operand" "0")))]
6318 "TARGET_F16FML"
6319 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6320 [(set_attr "type" "neon_fp_mul_s")]
6321)
6322
6323(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6324 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6325 (fma:VDQSF
6326 (float_extend:VDQSF
6327 (neg:<VFMLA_SEL_W>
6328 (vec_select:<VFMLA_SEL_W>
6329 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6330 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6331 (float_extend:VDQSF
6332 (vec_select:<VFMLA_SEL_W>
6333 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6334 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6335 (match_operand:VDQSF 1 "register_operand" "0")))]
6336 "TARGET_F16FML"
6337 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6338 [(set_attr "type" "neon_fp_mul_s")]
6339)
6340
6341(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6342 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6343 (fma:VDQSF
6344 (float_extend:VDQSF
6345 (vec_select:<VFMLA_SEL_W>
6346 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6347 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6348 (float_extend:VDQSF
6349 (vec_select:<VFMLA_SEL_W>
6350 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6351 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6352 (match_operand:VDQSF 1 "register_operand" "0")))]
6353 "TARGET_F16FML"
6354 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6355 [(set_attr "type" "neon_fp_mul_s")]
6356)
6357
6358(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6359 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6360 (fma:VDQSF
6361 (float_extend:VDQSF
6362 (neg:<VFMLA_SEL_W>
6363 (vec_select:<VFMLA_SEL_W>
6364 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6365 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6366 (float_extend:VDQSF
6367 (vec_select:<VFMLA_SEL_W>
6368 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6369 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6370 (match_operand:VDQSF 1 "register_operand" "0")))]
6371 "TARGET_F16FML"
6372 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6373 [(set_attr "type" "neon_fp_mul_s")]
6374)
6375
6376(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6377 [(set (match_operand:V2SF 0 "register_operand" "")
6378 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6379 (match_operand:V4HF 2 "register_operand" "")
6380 (match_operand:V4HF 3 "register_operand" "")
6381 (match_operand:SI 4 "aarch64_imm2" "")]
6382 VFMLA16_LOW))]
6383 "TARGET_F16FML"
6384{
f3bd9505 6385 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
27086ea3
MC
6386 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6387
6388 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6389 operands[1],
6390 operands[2],
6391 operands[3],
6392 p1, lane));
6393 DONE;
6394}
6395)
6396
6397(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6398 [(set (match_operand:V2SF 0 "register_operand" "")
6399 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6400 (match_operand:V4HF 2 "register_operand" "")
6401 (match_operand:V4HF 3 "register_operand" "")
6402 (match_operand:SI 4 "aarch64_imm2" "")]
6403 VFMLA16_HIGH))]
6404 "TARGET_F16FML"
6405{
f3bd9505 6406 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
27086ea3
MC
6407 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6408
6409 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6410 operands[1],
6411 operands[2],
6412 operands[3],
6413 p1, lane));
6414 DONE;
6415})
6416
6417(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6418 [(set (match_operand:V2SF 0 "register_operand" "=w")
6419 (fma:V2SF
6420 (float_extend:V2SF
6421 (vec_select:V2HF
6422 (match_operand:V4HF 2 "register_operand" "w")
6423 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6424 (float_extend:V2SF
6425 (vec_duplicate:V2HF
6426 (vec_select:HF
6427 (match_operand:V4HF 3 "register_operand" "x")
6428 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6429 (match_operand:V2SF 1 "register_operand" "0")))]
6430 "TARGET_F16FML"
6431 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6432 [(set_attr "type" "neon_fp_mul_s")]
6433)
6434
6435(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6436 [(set (match_operand:V2SF 0 "register_operand" "=w")
6437 (fma:V2SF
6438 (float_extend:V2SF
6439 (neg:V2HF
6440 (vec_select:V2HF
6441 (match_operand:V4HF 2 "register_operand" "w")
6442 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6443 (float_extend:V2SF
6444 (vec_duplicate:V2HF
6445 (vec_select:HF
6446 (match_operand:V4HF 3 "register_operand" "x")
6447 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6448 (match_operand:V2SF 1 "register_operand" "0")))]
6449 "TARGET_F16FML"
6450 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6451 [(set_attr "type" "neon_fp_mul_s")]
6452)
6453
6454(define_insn "aarch64_simd_fmlal_lane_highv2sf"
6455 [(set (match_operand:V2SF 0 "register_operand" "=w")
6456 (fma:V2SF
6457 (float_extend:V2SF
6458 (vec_select:V2HF
6459 (match_operand:V4HF 2 "register_operand" "w")
6460 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6461 (float_extend:V2SF
6462 (vec_duplicate:V2HF
6463 (vec_select:HF
6464 (match_operand:V4HF 3 "register_operand" "x")
6465 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6466 (match_operand:V2SF 1 "register_operand" "0")))]
6467 "TARGET_F16FML"
6468 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6469 [(set_attr "type" "neon_fp_mul_s")]
6470)
6471
6472(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6473 [(set (match_operand:V2SF 0 "register_operand" "=w")
6474 (fma:V2SF
6475 (float_extend:V2SF
6476 (neg:V2HF
6477 (vec_select:V2HF
6478 (match_operand:V4HF 2 "register_operand" "w")
6479 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6480 (float_extend:V2SF
6481 (vec_duplicate:V2HF
6482 (vec_select:HF
6483 (match_operand:V4HF 3 "register_operand" "x")
6484 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6485 (match_operand:V2SF 1 "register_operand" "0")))]
6486 "TARGET_F16FML"
6487 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6488 [(set_attr "type" "neon_fp_mul_s")]
6489)
6490
6491(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6492 [(set (match_operand:V4SF 0 "register_operand" "")
6493 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6494 (match_operand:V8HF 2 "register_operand" "")
6495 (match_operand:V8HF 3 "register_operand" "")
6496 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6497 VFMLA16_LOW))]
6498 "TARGET_F16FML"
6499{
f3bd9505 6500 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
27086ea3
MC
6501 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6502
6503 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6504 operands[1],
6505 operands[2],
6506 operands[3],
6507 p1, lane));
6508 DONE;
6509})
6510
6511(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6512 [(set (match_operand:V4SF 0 "register_operand" "")
6513 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6514 (match_operand:V8HF 2 "register_operand" "")
6515 (match_operand:V8HF 3 "register_operand" "")
6516 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6517 VFMLA16_HIGH))]
6518 "TARGET_F16FML"
6519{
f3bd9505 6520 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
27086ea3
MC
6521 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6522
6523 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6524 operands[1],
6525 operands[2],
6526 operands[3],
6527 p1, lane));
6528 DONE;
6529})
6530
6531(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6532 [(set (match_operand:V4SF 0 "register_operand" "=w")
6533 (fma:V4SF
6534 (float_extend:V4SF
6535 (vec_select:V4HF
6536 (match_operand:V8HF 2 "register_operand" "w")
6537 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6538 (float_extend:V4SF
6539 (vec_duplicate:V4HF
6540 (vec_select:HF
6541 (match_operand:V8HF 3 "register_operand" "x")
6542 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6543 (match_operand:V4SF 1 "register_operand" "0")))]
6544 "TARGET_F16FML"
6545 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6546 [(set_attr "type" "neon_fp_mul_s")]
6547)
6548
6549(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6550 [(set (match_operand:V4SF 0 "register_operand" "=w")
6551 (fma:V4SF
6552 (float_extend:V4SF
6553 (neg:V4HF
6554 (vec_select:V4HF
6555 (match_operand:V8HF 2 "register_operand" "w")
6556 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6557 (float_extend:V4SF
6558 (vec_duplicate:V4HF
6559 (vec_select:HF
6560 (match_operand:V8HF 3 "register_operand" "x")
6561 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6562 (match_operand:V4SF 1 "register_operand" "0")))]
6563 "TARGET_F16FML"
6564 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6565 [(set_attr "type" "neon_fp_mul_s")]
6566)
6567
6568(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6569 [(set (match_operand:V4SF 0 "register_operand" "=w")
6570 (fma:V4SF
6571 (float_extend:V4SF
6572 (vec_select:V4HF
6573 (match_operand:V8HF 2 "register_operand" "w")
6574 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6575 (float_extend:V4SF
6576 (vec_duplicate:V4HF
6577 (vec_select:HF
6578 (match_operand:V8HF 3 "register_operand" "x")
6579 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6580 (match_operand:V4SF 1 "register_operand" "0")))]
6581 "TARGET_F16FML"
6582 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6583 [(set_attr "type" "neon_fp_mul_s")]
6584)
6585
6586(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6587 [(set (match_operand:V4SF 0 "register_operand" "=w")
6588 (fma:V4SF
6589 (float_extend:V4SF
6590 (neg:V4HF
6591 (vec_select:V4HF
6592 (match_operand:V8HF 2 "register_operand" "w")
6593 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6594 (float_extend:V4SF
6595 (vec_duplicate:V4HF
6596 (vec_select:HF
6597 (match_operand:V8HF 3 "register_operand" "x")
6598 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6599 (match_operand:V4SF 1 "register_operand" "0")))]
6600 "TARGET_F16FML"
6601 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6602 [(set_attr "type" "neon_fp_mul_s")]
6603)
6604
6605(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6606 [(set (match_operand:V2SF 0 "register_operand" "")
6607 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6608 (match_operand:V4HF 2 "register_operand" "")
6609 (match_operand:V8HF 3 "register_operand" "")
6610 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6611 VFMLA16_LOW))]
6612 "TARGET_F16FML"
6613{
f3bd9505 6614 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
27086ea3
MC
6615 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6616
6617 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6618 operands[1],
6619 operands[2],
6620 operands[3],
6621 p1, lane));
6622 DONE;
6623
6624})
6625
6626(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6627 [(set (match_operand:V2SF 0 "register_operand" "")
6628 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6629 (match_operand:V4HF 2 "register_operand" "")
6630 (match_operand:V8HF 3 "register_operand" "")
6631 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6632 VFMLA16_HIGH))]
6633 "TARGET_F16FML"
6634{
f3bd9505 6635 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
27086ea3
MC
6636 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6637
6638 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6639 operands[1],
6640 operands[2],
6641 operands[3],
6642 p1, lane));
6643 DONE;
6644
6645})
6646
6647(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6648 [(set (match_operand:V2SF 0 "register_operand" "=w")
6649 (fma:V2SF
6650 (float_extend:V2SF
6651 (vec_select:V2HF
6652 (match_operand:V4HF 2 "register_operand" "w")
6653 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6654 (float_extend:V2SF
6655 (vec_duplicate:V2HF
6656 (vec_select:HF
6657 (match_operand:V8HF 3 "register_operand" "x")
6658 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6659 (match_operand:V2SF 1 "register_operand" "0")))]
6660 "TARGET_F16FML"
6661 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6662 [(set_attr "type" "neon_fp_mul_s")]
6663)
6664
6665(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6666 [(set (match_operand:V2SF 0 "register_operand" "=w")
6667 (fma:V2SF
6668 (float_extend:V2SF
6669 (neg:V2HF
6670 (vec_select:V2HF
6671 (match_operand:V4HF 2 "register_operand" "w")
6672 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6673 (float_extend:V2SF
6674 (vec_duplicate:V2HF
6675 (vec_select:HF
6676 (match_operand:V8HF 3 "register_operand" "x")
6677 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6678 (match_operand:V2SF 1 "register_operand" "0")))]
6679 "TARGET_F16FML"
6680 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6681 [(set_attr "type" "neon_fp_mul_s")]
6682)
6683
6684(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6685 [(set (match_operand:V2SF 0 "register_operand" "=w")
6686 (fma:V2SF
6687 (float_extend:V2SF
6688 (vec_select:V2HF
6689 (match_operand:V4HF 2 "register_operand" "w")
6690 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6691 (float_extend:V2SF
6692 (vec_duplicate:V2HF
6693 (vec_select:HF
6694 (match_operand:V8HF 3 "register_operand" "x")
6695 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6696 (match_operand:V2SF 1 "register_operand" "0")))]
6697 "TARGET_F16FML"
6698 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6699 [(set_attr "type" "neon_fp_mul_s")]
6700)
6701
6702(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6703 [(set (match_operand:V2SF 0 "register_operand" "=w")
6704 (fma:V2SF
6705 (float_extend:V2SF
6706 (neg:V2HF
6707 (vec_select:V2HF
6708 (match_operand:V4HF 2 "register_operand" "w")
6709 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6710 (float_extend:V2SF
6711 (vec_duplicate:V2HF
6712 (vec_select:HF
6713 (match_operand:V8HF 3 "register_operand" "x")
6714 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6715 (match_operand:V2SF 1 "register_operand" "0")))]
6716 "TARGET_F16FML"
6717 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6718 [(set_attr "type" "neon_fp_mul_s")]
6719)
6720
6721(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6722 [(set (match_operand:V4SF 0 "register_operand" "")
6723 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6724 (match_operand:V8HF 2 "register_operand" "")
6725 (match_operand:V4HF 3 "register_operand" "")
6726 (match_operand:SI 4 "aarch64_imm2" "")]
6727 VFMLA16_LOW))]
6728 "TARGET_F16FML"
6729{
f3bd9505 6730 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
27086ea3
MC
6731 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6732
6733 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6734 operands[1],
6735 operands[2],
6736 operands[3],
6737 p1, lane));
6738 DONE;
6739})
6740
6741(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6742 [(set (match_operand:V4SF 0 "register_operand" "")
6743 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6744 (match_operand:V8HF 2 "register_operand" "")
6745 (match_operand:V4HF 3 "register_operand" "")
6746 (match_operand:SI 4 "aarch64_imm2" "")]
6747 VFMLA16_HIGH))]
6748 "TARGET_F16FML"
6749{
f3bd9505 6750 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
27086ea3
MC
6751 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6752
6753 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6754 operands[1],
6755 operands[2],
6756 operands[3],
6757 p1, lane));
6758 DONE;
6759})
6760
6761(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6762 [(set (match_operand:V4SF 0 "register_operand" "=w")
6763 (fma:V4SF
6764 (float_extend:V4SF
6765 (vec_select:V4HF
6766 (match_operand:V8HF 2 "register_operand" "w")
6767 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6768 (float_extend:V4SF
6769 (vec_duplicate:V4HF
6770 (vec_select:HF
6771 (match_operand:V4HF 3 "register_operand" "x")
6772 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6773 (match_operand:V4SF 1 "register_operand" "0")))]
6774 "TARGET_F16FML"
6775 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6776 [(set_attr "type" "neon_fp_mul_s")]
6777)
6778
6779(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6780 [(set (match_operand:V4SF 0 "register_operand" "=w")
6781 (fma:V4SF
6782 (float_extend:V4SF
6783 (neg:V4HF
6784 (vec_select:V4HF
6785 (match_operand:V8HF 2 "register_operand" "w")
6786 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6787 (float_extend:V4SF
6788 (vec_duplicate:V4HF
6789 (vec_select:HF
6790 (match_operand:V4HF 3 "register_operand" "x")
6791 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6792 (match_operand:V4SF 1 "register_operand" "0")))]
6793 "TARGET_F16FML"
6794 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6795 [(set_attr "type" "neon_fp_mul_s")]
6796)
6797
6798(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6799 [(set (match_operand:V4SF 0 "register_operand" "=w")
6800 (fma:V4SF
6801 (float_extend:V4SF
6802 (vec_select:V4HF
6803 (match_operand:V8HF 2 "register_operand" "w")
6804 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6805 (float_extend:V4SF
6806 (vec_duplicate:V4HF
6807 (vec_select:HF
6808 (match_operand:V4HF 3 "register_operand" "x")
6809 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6810 (match_operand:V4SF 1 "register_operand" "0")))]
6811 "TARGET_F16FML"
6812 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6813 [(set_attr "type" "neon_fp_mul_s")]
6814)
6815
6816(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6817 [(set (match_operand:V4SF 0 "register_operand" "=w")
6818 (fma:V4SF
6819 (float_extend:V4SF
6820 (neg:V4HF
6821 (vec_select:V4HF
6822 (match_operand:V8HF 2 "register_operand" "w")
6823 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6824 (float_extend:V4SF
6825 (vec_duplicate:V4HF
6826 (vec_select:HF
6827 (match_operand:V4HF 3 "register_operand" "x")
6828 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6829 (match_operand:V4SF 1 "register_operand" "0")))]
6830 "TARGET_F16FML"
6831 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6832 [(set_attr "type" "neon_fp_mul_s")]
6833)
6834
7baa225d
TB
6835;; pmull
6836
6837(define_insn "aarch64_crypto_pmulldi"
6838 [(set (match_operand:TI 0 "register_operand" "=w")
6839 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6840 (match_operand:DI 2 "register_operand" "w")]
6841 UNSPEC_PMULL))]
27086ea3 6842 "TARGET_SIMD && TARGET_AES"
7baa225d 6843 "pmull\\t%0.1q, %1.1d, %2.1d"
a2074e9c 6844 [(set_attr "type" "crypto_pmull")]
7baa225d
TB
6845)
6846
6847(define_insn "aarch64_crypto_pmullv2di"
6848 [(set (match_operand:TI 0 "register_operand" "=w")
6849 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6850 (match_operand:V2DI 2 "register_operand" "w")]
6851 UNSPEC_PMULL2))]
27086ea3 6852 "TARGET_SIMD && TARGET_AES"
7baa225d 6853 "pmull2\\t%0.1q, %1.2d, %2.2d"
a2074e9c 6854 [(set_attr "type" "crypto_pmull")]
7baa225d 6855)