]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-simd.md
aarch64: Fix up bfmlal lane pattern [PR104921]
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
CommitLineData
43e9d192 1;; Machine description for AArch64 AdvSIMD architecture.
7adcbafe 2;; Copyright (C) 2011-2022 Free Software Foundation, Inc.
43e9d192
IB
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
43e9d192 21(define_expand "mov<mode>"
6a770526
RS
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
23 (match_operand:VALL_F16 1 "general_operand"))]
43e9d192
IB
24 "TARGET_SIMD"
25 "
bb5d9711
JW
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
6a70badb 34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
bb5d9711 35 && aarch64_mem_pair_operand (operands[0], DImode))
6a70badb 36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
43e9d192 37 operands[1] = force_reg (<MODE>mode, operands[1]);
1b5f74e8
RS
38
39 /* If a constant is too complex to force to memory (e.g. because it
40 contains CONST_POLY_INTs), build it up from individual elements instead.
41 We should only need to do this before RA; aarch64_legitimate_constant_p
42 should ensure that we don't try to rematerialize the constant later. */
43 if (GET_CODE (operands[1]) == CONST_VECTOR
44 && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
45 {
46 aarch64_expand_vector_init (operands[0], operands[1]);
47 DONE;
48 }
43e9d192
IB
49 "
50)
51
52(define_expand "movmisalign<mode>"
7e4f89a2
RS
53 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
54 (match_operand:VALL_F16 1 "general_operand"))]
c2a4bf2d 55 "TARGET_SIMD && !STRICT_ALIGNMENT"
43e9d192
IB
56{
57 /* This pattern is not permitted to fail during expansion: if both arguments
58 are non-registers (e.g. memory := constant, which can be created by the
59 auto-vectorizer), force operand 1 into a register. */
60 if (!register_operand (operands[0], <MODE>mode)
61 && !register_operand (operands[1], <MODE>mode))
62 operands[1] = force_reg (<MODE>mode, operands[1]);
63})
64
65(define_insn "aarch64_simd_dup<mode>"
a844a695
AL
66 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
67 (vec_duplicate:VDQ_I
dca19fe1 68 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
43e9d192 69 "TARGET_SIMD"
91bd4114 70 "@
dca19fe1 71 dup\\t%0.<Vtype>, %1.<Vetype>[0]
c744ae08 72 dup\\t%0.<Vtype>, %<vwcore>1"
dca19fe1 73 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
91bd4114
JG
74)
75
76(define_insn "aarch64_simd_dup<mode>"
1d5c43db 77 [(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
862abc04 78 (vec_duplicate:VDQF_F16
1d5c43db 79 (match_operand:<VEL> 1 "register_operand" "w,r")))]
91bd4114 80 "TARGET_SIMD"
1d5c43db
TC
81 "@
82 dup\\t%0.<Vtype>, %1.<Vetype>[0]
c744ae08 83 dup\\t%0.<Vtype>, %<vwcore>1"
1d5c43db 84 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
43e9d192
IB
85)
86
87(define_insn "aarch64_dup_lane<mode>"
862abc04
AL
88 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
89 (vec_duplicate:VALL_F16
43e9d192 90 (vec_select:<VEL>
862abc04 91 (match_operand:VALL_F16 1 "register_operand" "w")
43e9d192
IB
92 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
93 )))]
94 "TARGET_SIMD"
1dd055a2 95 {
7ac29c0f 96 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1dd055a2
JG
97 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
98 }
78ec3036 99 [(set_attr "type" "neon_dup<q>")]
43e9d192
IB
100)
101
91bd4114 102(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
88119b46
KT
103 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
104 (vec_duplicate:VALL_F16_NO_V2Q
91bd4114
JG
105 (vec_select:<VEL>
106 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
107 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
108 )))]
43e9d192 109 "TARGET_SIMD"
1dd055a2 110 {
7ac29c0f 111 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1dd055a2
JG
112 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
113 }
78ec3036 114 [(set_attr "type" "neon_dup<q>")]
43e9d192
IB
115)
116
abbe1ed2
SMW
117(define_insn "*aarch64_simd_mov<VDMOV:mode>"
118 [(set (match_operand:VDMOV 0 "nonimmediate_operand"
ea58eb88 119 "=w, m, m, w, ?r, ?w, ?r, w")
abbe1ed2 120 (match_operand:VDMOV 1 "general_operand"
ea58eb88 121 "m, Dz, w, w, w, r, r, Dn"))]
43e9d192
IB
122 "TARGET_SIMD
123 && (register_operand (operands[0], <MODE>mode)
ea58eb88 124 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
43e9d192
IB
125{
126 switch (which_alternative)
127 {
ea58eb88
JW
128 case 0: return "ldr\t%d0, %1";
129 case 1: return "str\txzr, %0";
130 case 2: return "str\t%d1, %0";
131 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
132 case 4: return "umov\t%0, %1.d[0]";
133 case 5: return "fmov\t%d0, %1";
134 case 6: return "mov\t%0, %1";
135 case 7:
b187677b 136 return aarch64_output_simd_mov_immediate (operands[1], 64);
43e9d192
IB
137 default: gcc_unreachable ();
138 }
139}
0d1cf538 140 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
d78607f5
TC
141 neon_logic<q>, neon_to_gp<q>, f_mcr,\
142 mov_reg, neon_move<q>")]
43e9d192
IB
143)
144
abbe1ed2
SMW
145(define_insn "*aarch64_simd_mov<VQMOV:mode>"
146 [(set (match_operand:VQMOV 0 "nonimmediate_operand"
a25831ac 147 "=w, Umn, m, w, ?r, ?w, ?r, w")
abbe1ed2 148 (match_operand:VQMOV 1 "general_operand"
ea58eb88 149 "m, Dz, w, w, w, r, r, Dn"))]
43e9d192
IB
150 "TARGET_SIMD
151 && (register_operand (operands[0], <MODE>mode)
ea58eb88 152 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
43e9d192 153{
3520f7cc
JG
154 switch (which_alternative)
155 {
c59b7e28 156 case 0:
ea58eb88 157 return "ldr\t%q0, %1";
c59b7e28 158 case 1:
ea58eb88 159 return "stp\txzr, xzr, %0";
c59b7e28 160 case 2:
ea58eb88 161 return "str\t%q1, %0";
c59b7e28 162 case 3:
ea58eb88 163 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
c59b7e28
SN
164 case 4:
165 case 5:
3520f7cc 166 case 6:
ea58eb88
JW
167 return "#";
168 case 7:
b187677b 169 return aarch64_output_simd_mov_immediate (operands[1], 128);
c59b7e28
SN
170 default:
171 gcc_unreachable ();
3520f7cc 172 }
43e9d192 173}
0d1cf538
DI
174 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
175 neon_logic<q>, multiple, multiple,\
ea58eb88
JW
176 multiple, neon_move<q>")
177 (set_attr "length" "4,4,4,4,8,8,8,4")]
43e9d192
IB
178)
179
d8395013
KT
180;; When storing lane zero we can use the normal STR and its more permissive
181;; addressing modes.
182
183(define_insn "aarch64_store_lane0<mode>"
184 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
185 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
186 (parallel [(match_operand 2 "const_int_operand" "n")])))]
187 "TARGET_SIMD
441b4d0f 188 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
d8395013
KT
189 "str\\t%<Vetype>1, %0"
190 [(set_attr "type" "neon_store1_1reg<q>")]
191)
192
dfe1da23
JW
193(define_insn "load_pair<DREG:mode><DREG2:mode>"
194 [(set (match_operand:DREG 0 "register_operand" "=w")
195 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
196 (set (match_operand:DREG2 2 "register_operand" "=w")
197 (match_operand:DREG2 3 "memory_operand" "m"))]
abc52318
KT
198 "TARGET_SIMD
199 && rtx_equal_p (XEXP (operands[3], 0),
200 plus_constant (Pmode,
201 XEXP (operands[1], 0),
dfe1da23 202 GET_MODE_SIZE (<DREG:MODE>mode)))"
2c272091 203 "ldp\\t%d0, %d2, %z1"
abc52318
KT
204 [(set_attr "type" "neon_ldp")]
205)
206
dfe1da23
JW
207(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
208 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
209 (match_operand:DREG 1 "register_operand" "w"))
210 (set (match_operand:DREG2 2 "memory_operand" "=m")
211 (match_operand:DREG2 3 "register_operand" "w"))]
abc52318
KT
212 "TARGET_SIMD
213 && rtx_equal_p (XEXP (operands[2], 0),
214 plus_constant (Pmode,
215 XEXP (operands[0], 0),
dfe1da23 216 GET_MODE_SIZE (<DREG:MODE>mode)))"
2c272091 217 "stp\\t%d1, %d3, %z0"
abc52318
KT
218 [(set_attr "type" "neon_stp")]
219)
220
9f5361c8
KT
221(define_insn "load_pair<VQ:mode><VQ2:mode>"
222 [(set (match_operand:VQ 0 "register_operand" "=w")
223 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
224 (set (match_operand:VQ2 2 "register_operand" "=w")
225 (match_operand:VQ2 3 "memory_operand" "m"))]
226 "TARGET_SIMD
227 && rtx_equal_p (XEXP (operands[3], 0),
228 plus_constant (Pmode,
229 XEXP (operands[1], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
2c272091 231 "ldp\\t%q0, %q2, %z1"
9f5361c8
KT
232 [(set_attr "type" "neon_ldp_q")]
233)
234
235(define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
236 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
237 (match_operand:VQ 1 "register_operand" "w"))
238 (set (match_operand:VQ2 2 "memory_operand" "=m")
239 (match_operand:VQ2 3 "register_operand" "w"))]
240 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
241 plus_constant (Pmode,
242 XEXP (operands[0], 0),
243 GET_MODE_SIZE (<VQ:MODE>mode)))"
2c272091 244 "stp\\t%q1, %q3, %z0"
9f5361c8
KT
245 [(set_attr "type" "neon_stp_q")]
246)
247
248
43e9d192 249(define_split
abbe1ed2
SMW
250 [(set (match_operand:VQMOV 0 "register_operand" "")
251 (match_operand:VQMOV 1 "register_operand" ""))]
43e9d192
IB
252 "TARGET_SIMD && reload_completed
253 && GP_REGNUM_P (REGNO (operands[0]))
254 && GP_REGNUM_P (REGNO (operands[1]))"
2d8c6dc1 255 [(const_int 0)]
43e9d192 256{
2d8c6dc1
AH
257 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
258 DONE;
43e9d192
IB
259})
260
fd4842cd 261(define_split
abbe1ed2
SMW
262 [(set (match_operand:VQMOV 0 "register_operand" "")
263 (match_operand:VQMOV 1 "register_operand" ""))]
fd4842cd
SN
264 "TARGET_SIMD && reload_completed
265 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
266 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
267 [(const_int 0)]
268{
269 aarch64_split_simd_move (operands[0], operands[1]);
270 DONE;
271})
272
0016d8d9 273(define_expand "@aarch64_split_simd_mov<mode>"
abbe1ed2 274 [(set (match_operand:VQMOV 0)
bce43c04 275 (match_operand:VQMOV 1))]
fd4842cd
SN
276 "TARGET_SIMD"
277 {
278 rtx dst = operands[0];
279 rtx src = operands[1];
280
281 if (GP_REGNUM_P (REGNO (src)))
282 {
bce43c04
RS
283 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
284 rtx src_high_part = gen_highpart (<VHALF>mode, src);
285 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
fd4842cd 286
bce43c04
RS
287 emit_move_insn (dst_low_part, src_low_part);
288 emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
289 src_high_part));
fd4842cd 290 }
fd4842cd
SN
291 else
292 {
bce43c04
RS
293 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
294 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
f5cbabc1
RS
295 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
296 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
bce43c04
RS
297 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
298 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
fd4842cd
SN
299 }
300 DONE;
301 }
302)
303
c15893df
RS
304(define_expand "aarch64_get_half<mode>"
305 [(set (match_operand:<VHALF> 0 "register_operand")
fd4842cd 306 (vec_select:<VHALF>
c15893df
RS
307 (match_operand:VQMOV 1 "register_operand")
308 (match_operand 2 "ascending_int_parallel")))]
309 "TARGET_SIMD"
310)
311
b6e7a749
KT
312(define_expand "aarch64_get_low<mode>"
313 [(match_operand:<VHALF> 0 "register_operand")
314 (match_operand:VQMOV 1 "register_operand")]
315 "TARGET_SIMD"
316 {
317 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
318 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
319 DONE;
320 }
321)
322
d9bb5215
KT
323(define_expand "aarch64_get_high<mode>"
324 [(match_operand:<VHALF> 0 "register_operand")
325 (match_operand:VQMOV 1 "register_operand")]
326 "TARGET_SIMD"
327 {
328 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
329 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
330 DONE;
331 }
332)
333
c15893df
RS
334(define_insn_and_split "aarch64_simd_mov_from_<mode>low"
335 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
336 (vec_select:<VHALF>
337 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
338 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
339 "TARGET_SIMD"
340 "@
341 #
342 umov\t%0, %1.d[0]"
343 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
344 [(set (match_dup 0) (match_dup 1))]
345 {
346 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
347 }
348 [(set_attr "type" "mov_reg,neon_to_gp<q>")
349 (set_attr "length" "4")]
350)
fd4842cd
SN
351
352(define_insn "aarch64_simd_mov_from_<mode>high"
c15893df 353 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
fd4842cd 354 (vec_select:<VHALF>
c15893df
RS
355 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
356 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
357 "TARGET_SIMD"
358 "@
359 dup\\t%d0, %1.d[1]
360 umov\t%0, %1.d[1]"
361 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
362 (set_attr "length" "4")]
363)
fd4842cd 364
43e9d192 365(define_insn "orn<mode>3"
a844a695
AL
366 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
367 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
368 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
369 "TARGET_SIMD"
370 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
78ec3036 371 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
372)
373
374(define_insn "bic<mode>3"
a844a695
AL
375 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
376 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
377 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
378 "TARGET_SIMD"
379 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
78ec3036 380 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
381)
382
383(define_insn "add<mode>3"
a844a695
AL
384 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
385 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
386 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
387 "TARGET_SIMD"
388 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 389 [(set_attr "type" "neon_add<q>")]
43e9d192
IB
390)
391
392(define_insn "sub<mode>3"
a844a695
AL
393 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
394 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
395 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
396 "TARGET_SIMD"
397 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 398 [(set_attr "type" "neon_sub<q>")]
43e9d192
IB
399)
400
401(define_insn "mul<mode>3"
a844a695
AL
402 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
403 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
404 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
43e9d192
IB
405 "TARGET_SIMD"
406 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 407 [(set_attr "type" "neon_mul_<Vetype><q>")]
43e9d192
IB
408)
409
7eb2bd57 410(define_insn "bswap<mode>2"
c7f28cd5
KT
411 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
412 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
413 "TARGET_SIMD"
414 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
415 [(set_attr "type" "neon_rev<q>")]
416)
417
cf465d71
AL
418(define_insn "aarch64_rbit<mode>"
419 [(set (match_operand:VB 0 "register_operand" "=w")
420 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
421 UNSPEC_RBIT))]
422 "TARGET_SIMD"
423 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
424 [(set_attr "type" "neon_rbit")]
425)
426
5e32e83b
JW
427(define_expand "ctz<mode>2"
428 [(set (match_operand:VS 0 "register_operand")
429 (ctz:VS (match_operand:VS 1 "register_operand")))]
430 "TARGET_SIMD"
431 {
7eb2bd57 432 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
5e32e83b
JW
433 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
434 <MODE>mode, 0);
435 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
436 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
437 DONE;
438 }
439)
440
4261463d
TC
441(define_expand "xorsign<mode>3"
442 [(match_operand:VHSDF 0 "register_operand")
443 (match_operand:VHSDF 1 "register_operand")
444 (match_operand:VHSDF 2 "register_operand")]
445 "TARGET_SIMD"
446{
447
5f565314 448 machine_mode imode = <V_INT_EQUIV>mode;
4261463d
TC
449 rtx v_bitmask = gen_reg_rtx (imode);
450 rtx op1x = gen_reg_rtx (imode);
451 rtx op2x = gen_reg_rtx (imode);
452
453 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
454 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
455
456 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
457
458 emit_move_insn (v_bitmask,
5f565314 459 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
4261463d
TC
460 HOST_WIDE_INT_M1U << bits));
461
5f565314
RS
462 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
463 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
4261463d
TC
464 emit_move_insn (operands[0],
465 lowpart_subreg (<MODE>mode, op1x, imode));
466 DONE;
467}
468)
469
9d63f43b
TC
470;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
471;; fact that their usage need to guarantee that the source vectors are
472;; contiguous. It would be wrong to describe the operation without being able
473;; to describe the permute that is also required, but even if that is done
474;; the permute would have been created as a LOAD_LANES which means the values
475;; in the registers are in the wrong order.
476(define_insn "aarch64_fcadd<rot><mode>"
477 [(set (match_operand:VHSDF 0 "register_operand" "=w")
478 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
479 (match_operand:VHSDF 2 "register_operand" "w")]
480 FCADD))]
481 "TARGET_COMPLEX"
482 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
483 [(set_attr "type" "neon_fcadd")]
484)
485
84747acf
TC
486(define_expand "cadd<rot><mode>3"
487 [(set (match_operand:VHSDF 0 "register_operand")
488 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
489 (match_operand:VHSDF 2 "register_operand")]
490 FCADD))]
491 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
492)
493
9d63f43b
TC
494(define_insn "aarch64_fcmla<rot><mode>"
495 [(set (match_operand:VHSDF 0 "register_operand" "=w")
496 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
497 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
498 (match_operand:VHSDF 3 "register_operand" "w")]
499 FCMLA)))]
500 "TARGET_COMPLEX"
501 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
502 [(set_attr "type" "neon_fcmla")]
503)
504
505
506(define_insn "aarch64_fcmla_lane<rot><mode>"
507 [(set (match_operand:VHSDF 0 "register_operand" "=w")
508 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
509 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
510 (match_operand:VHSDF 3 "register_operand" "w")
511 (match_operand:SI 4 "const_int_operand" "n")]
512 FCMLA)))]
513 "TARGET_COMPLEX"
33b5a38c
TC
514{
515 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
516 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
517}
9d63f43b
TC
518 [(set_attr "type" "neon_fcmla")]
519)
520
521(define_insn "aarch64_fcmla_laneq<rot>v4hf"
522 [(set (match_operand:V4HF 0 "register_operand" "=w")
523 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
524 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
525 (match_operand:V8HF 3 "register_operand" "w")
526 (match_operand:SI 4 "const_int_operand" "n")]
527 FCMLA)))]
528 "TARGET_COMPLEX"
33b5a38c
TC
529{
530 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
531 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
532}
9d63f43b
TC
533 [(set_attr "type" "neon_fcmla")]
534)
535
536(define_insn "aarch64_fcmlaq_lane<rot><mode>"
537 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
538 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
539 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
540 (match_operand:<VHALF> 3 "register_operand" "w")
541 (match_operand:SI 4 "const_int_operand" "n")]
542 FCMLA)))]
543 "TARGET_COMPLEX"
33b5a38c
TC
544{
545 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
546 operands[4]
547 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
548 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
549}
9d63f43b
TC
550 [(set_attr "type" "neon_fcmla")]
551)
552
ad260343
TC
553;; The complex mla/mls operations always need to expand to two instructions.
554;; The first operation does half the computation and the second does the
555;; remainder. Because of this, expand early.
556(define_expand "cml<fcmac1><conj_op><mode>4"
557 [(set (match_operand:VHSDF 0 "register_operand")
ab95fe61
TC
558 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
559 (match_operand:VHSDF 2 "register_operand")]
560 FCMLA_OP)
561 (match_operand:VHSDF 3 "register_operand")))]
ad260343
TC
562 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
563{
564 rtx tmp = gen_reg_rtx (<MODE>mode);
ab95fe61
TC
565 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
566 operands[2], operands[1]));
ad260343 567 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
ab95fe61 568 operands[2], operands[1]));
ad260343
TC
569 DONE;
570})
571
572;; The complex mul operations always need to expand to two instructions.
573;; The first operation does half the computation and the second does the
574;; remainder. Because of this, expand early.
575(define_expand "cmul<conj_op><mode>3"
576 [(set (match_operand:VHSDF 0 "register_operand")
577 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
578 (match_operand:VHSDF 2 "register_operand")]
579 FCMUL_OP))]
580 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
581{
582 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
583 rtx res1 = gen_reg_rtx (<MODE>mode);
584 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
585 operands[2], operands[1]));
586 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
587 operands[2], operands[1]));
588 DONE;
589})
590
1ab22700
TC
591;; These expands map to the Dot Product optab the vectorizer checks for
592;; and to the intrinsics patttern.
5402023f
TC
593;; The auto-vectorizer expects a dot product builtin that also does an
594;; accumulation into the provided register.
595;; Given the following pattern
596;;
597;; for (i=0; i<len; i++) {
598;; c = a[i] * b[i];
599;; r += c;
600;; }
601;; return result;
602;;
603;; This can be auto-vectorized to
604;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
605;;
606;; given enough iterations. However the vectorizer can keep unrolling the loop
607;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
608;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
609;; ...
610;;
611;; and so the vectorizer provides r, in which the result has to be accumulated.
1ab22700
TC
612(define_insn "<sur>dot_prod<vsi2qi>"
613 [(set (match_operand:VS 0 "register_operand" "=w")
614 (plus:VS
615 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
616 (match_operand:<VSI2QI> 2 "register_operand" "w")]
617 DOTPROD)
618 (match_operand:VS 3 "register_operand" "0")))]
5402023f 619 "TARGET_DOTPROD"
1ab22700
TC
620 "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
621 [(set_attr "type" "neon_dot<q>")]
622)
5402023f 623
2050ac1a
TC
624;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
625;; (vector) Dot Product operation and the vectorized optab.
626(define_insn "usdot_prod<vsi2qi>"
627 [(set (match_operand:VS 0 "register_operand" "=w")
628 (plus:VS
629 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
630 (match_operand:<VSI2QI> 2 "register_operand" "w")]
631 UNSPEC_USDOT)
632 (match_operand:VS 3 "register_operand" "0")))]
633 "TARGET_I8MM"
634 "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
635 [(set_attr "type" "neon_dot<q>")]
636)
637
7a08d813
TC
638;; These instructions map to the __builtins for the Dot Product
639;; indexed operations.
640(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
641 [(set (match_operand:VS 0 "register_operand" "=w")
1ab22700
TC
642 (plus:VS
643 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
644 (match_operand:V8QI 3 "register_operand" "<h_con>")
645 (match_operand:SI 4 "immediate_operand" "i")]
646 DOTPROD)
647 (match_operand:VS 1 "register_operand" "0")))]
7a08d813
TC
648 "TARGET_DOTPROD"
649 {
441b4d0f 650 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
7a08d813
TC
651 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
652 }
3ed8fcfa 653 [(set_attr "type" "neon_dot<q>")]
7a08d813
TC
654)
655
656(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
657 [(set (match_operand:VS 0 "register_operand" "=w")
1ab22700
TC
658 (plus:VS
659 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
660 (match_operand:V16QI 3 "register_operand" "<h_con>")
661 (match_operand:SI 4 "immediate_operand" "i")]
662 DOTPROD)
663 (match_operand:VS 1 "register_operand" "0")))]
7a08d813
TC
664 "TARGET_DOTPROD"
665 {
441b4d0f 666 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
7a08d813
TC
667 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
668 }
3ed8fcfa 669 [(set_attr "type" "neon_dot<q>")]
7a08d813
TC
670)
671
8c197c85
SMW
672;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
673;; (by element) Dot Product operations.
674(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
675 [(set (match_operand:VS 0 "register_operand" "=w")
676 (plus:VS
677 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
678 (match_operand:VB 3 "register_operand" "w")
679 (match_operand:SI 4 "immediate_operand" "i")]
680 DOTPROD_I8MM)
681 (match_operand:VS 1 "register_operand" "0")))]
682 "TARGET_I8MM"
683 {
684 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
685 int lane = INTVAL (operands[4]);
686 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
687 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
688 }
689 [(set_attr "type" "neon_dot<VS:q>")]
690)
691
ab014eb3
TC
692(define_expand "copysign<mode>3"
693 [(match_operand:VHSDF 0 "register_operand")
694 (match_operand:VHSDF 1 "register_operand")
695 (match_operand:VHSDF 2 "register_operand")]
696 "TARGET_FLOAT && TARGET_SIMD"
697{
5f565314 698 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
ab014eb3
TC
699 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
700
701 emit_move_insn (v_bitmask,
5f565314 702 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
ab014eb3
TC
703 HOST_WIDE_INT_M1U << bits));
704 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
705 operands[2], operands[1]));
706 DONE;
707}
708)
709
1baf4ed8 710(define_insn "mul_lane<mode>3"
28de75d2
RS
711 [(set (match_operand:VMULD 0 "register_operand" "=w")
712 (mult:VMULD
713 (vec_duplicate:VMULD
1baf4ed8 714 (vec_select:<VEL>
28de75d2 715 (match_operand:<VCOND> 2 "register_operand" "<h_con>")
1baf4ed8 716 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
28de75d2 717 (match_operand:VMULD 1 "register_operand" "w")))]
779aea46 718 "TARGET_SIMD"
1dd055a2 719 {
28de75d2 720 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
1baf4ed8 721 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
1dd055a2 722 }
ab2e8f01 723 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
779aea46
JG
724)
725
d388179a 726(define_insn "mul_laneq<mode>3"
28de75d2
RS
727 [(set (match_operand:VMUL 0 "register_operand" "=w")
728 (mult:VMUL
729 (vec_duplicate:VMUL
779aea46 730 (vec_select:<VEL>
28de75d2
RS
731 (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
732 (parallel [(match_operand:SI 3 "immediate_operand")])))
733 (match_operand:VMUL 1 "register_operand" "w")))]
779aea46 734 "TARGET_SIMD"
1dd055a2 735 {
28de75d2
RS
736 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
737 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
1dd055a2 738 }
28de75d2 739 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
779aea46
JG
740)
741
60518e64 742(define_insn "mul_n<mode>3"
22330033 743 [(set (match_operand:VMUL 0 "register_operand" "=w")
60518e64
JW
744 (mult:VMUL
745 (vec_duplicate:VMUL
746 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
747 (match_operand:VMUL 1 "register_operand" "w")))]
779aea46 748 "TARGET_SIMD"
60518e64 749 "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
ab2e8f01 750 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
779aea46
JG
751)
752
0016d8d9 753(define_insn "@aarch64_rsqrte<mode>"
d7f33f07
JW
754 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
755 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
a6fc00da
BH
756 UNSPEC_RSQRTE))]
757 "TARGET_SIMD"
758 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
daef0a8c 759 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
a6fc00da 760
0016d8d9 761(define_insn "@aarch64_rsqrts<mode>"
68ad28c3
JW
762 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
763 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
764 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
33d72b63 765 UNSPEC_RSQRTS))]
a6fc00da
BH
766 "TARGET_SIMD"
767 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
33d72b63 768 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
a6fc00da 769
ee62a5a6 770(define_expand "rsqrt<mode>2"
1bbffb87
DZ
771 [(set (match_operand:VALLF 0 "register_operand")
772 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
a6fc00da
BH
773 UNSPEC_RSQRT))]
774 "TARGET_SIMD"
775{
98daafa0 776 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
a6fc00da
BH
777 DONE;
778})
779
d14cf89b
KT
780(define_insn "aarch64_ursqrte<mode>"
781[(set (match_operand:VDQ_SI 0 "register_operand" "=w")
782 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
783 UNSPEC_RSQRTE))]
784"TARGET_SIMD"
785"ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
786[(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
787
779aea46
JG
788(define_insn "*aarch64_mul3_elt_to_64v2df"
789 [(set (match_operand:DF 0 "register_operand" "=w")
790 (mult:DF
791 (vec_select:DF
792 (match_operand:V2DF 1 "register_operand" "w")
793 (parallel [(match_operand:SI 2 "immediate_operand")]))
794 (match_operand:DF 3 "register_operand" "w")))]
795 "TARGET_SIMD"
1dd055a2 796 {
7ac29c0f 797 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1dd055a2
JG
798 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
799 }
78ec3036 800 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
779aea46
JG
801)
802
43e9d192 803(define_insn "neg<mode>2"
a844a695
AL
804 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
805 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
43e9d192
IB
806 "TARGET_SIMD"
807 "neg\t%0.<Vtype>, %1.<Vtype>"
78ec3036 808 [(set_attr "type" "neon_neg<q>")]
43e9d192
IB
809)
810
811(define_insn "abs<mode>2"
a844a695
AL
812 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
813 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
43e9d192
IB
814 "TARGET_SIMD"
815 "abs\t%0.<Vtype>, %1.<Vtype>"
78ec3036 816 [(set_attr "type" "neon_abs<q>")]
43e9d192
IB
817)
818
285398d2
JG
819;; The intrinsic version of integer ABS must not be allowed to
820;; combine with any operation with an integerated ABS step, such
821;; as SABD.
822(define_insn "aarch64_abs<mode>"
823 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
824 (unspec:VSDQ_I_DI
825 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
826 UNSPEC_ABS))]
827 "TARGET_SIMD"
828 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
829 [(set_attr "type" "neon_abs<q>")]
830)
831
8544ed6e
KT
832;; It's tempting to represent SABD as ABS (MINUS op1 op2).
833;; This isn't accurate as ABS treats always its input as a signed value.
834;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
835;; Whereas SABD would return 192 (-64 signed) on the above example.
836;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
79db5945 837(define_insn "aarch64_<su>abd<mode>"
fa2e9a58 838 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
8544ed6e
KT
839 (minus:VDQ_BHSI
840 (USMAX:VDQ_BHSI
841 (match_operand:VDQ_BHSI 1 "register_operand" "w")
842 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
75df257b
RS
843 (<max_opp>:VDQ_BHSI
844 (match_dup 1)
845 (match_dup 2))))]
fa2e9a58 846 "TARGET_SIMD"
8544ed6e 847 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 848 [(set_attr "type" "neon_abd<q>")]
fa2e9a58
N
849)
850
6b2034c4
KT
851
852(define_insn "aarch64_<sur>abdl<mode>"
853 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
854 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
855 (match_operand:VD_BHSI 2 "register_operand" "w")]
856 ABDL))]
857 "TARGET_SIMD"
858 "<sur>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
859 [(set_attr "type" "neon_abd<q>")]
860)
861
9b588cfb 862(define_insn "aarch64_<sur>abdl2<mode>"
75add2d0 863 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
9b588cfb
KT
864 (unspec:<VDBLW> [(match_operand:VQW 1 "register_operand" "w")
865 (match_operand:VQW 2 "register_operand" "w")]
75add2d0
KT
866 ABDL2))]
867 "TARGET_SIMD"
868 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
869 [(set_attr "type" "neon_abd<q>")]
870)
871
d5e0d1f1
KT
872(define_insn "aarch64_<sur>abal<mode>"
873 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
874 (unspec:<VWIDE> [(match_operand:VD_BHSI 2 "register_operand" "w")
875 (match_operand:VD_BHSI 3 "register_operand" "w")
876 (match_operand:<VWIDE> 1 "register_operand" "0")]
75add2d0
KT
877 ABAL))]
878 "TARGET_SIMD"
d5e0d1f1 879 "<sur>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
75add2d0
KT
880 [(set_attr "type" "neon_arith_acc<q>")]
881)
882
9f499a86
KT
883(define_insn "aarch64_<sur>abal2<mode>"
884 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
885 (unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
886 (match_operand:VQW 3 "register_operand" "w")
887 (match_operand:<VWIDE> 1 "register_operand" "0")]
888 ABAL2))]
889 "TARGET_SIMD"
890 "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
891 [(set_attr "type" "neon_arith_acc<q>")]
892)
893
e811f10b 894(define_insn "aarch64_<sur>adalp<mode>"
75add2d0 895 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
8e7f6e03
JW
896 (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
897 (match_operand:<VDBLW> 1 "register_operand" "0")]
75add2d0
KT
898 ADALP))]
899 "TARGET_SIMD"
e811f10b 900 "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
75add2d0
KT
901 [(set_attr "type" "neon_reduc_add<q>")]
902)
903
904;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
905;; inputs in operands 1 and 2. The sequence also has to perform a widening
906;; reduction of the difference into a V4SI vector and accumulate that into
907;; operand 3 before copying that into the result operand 0.
908;; Perform that with a sequence of:
909;; UABDL2 tmp.8h, op1.16b, op2.16b
d5e0d1f1 910;; UABAL tmp.8h, op1.8b, op2.8b
75add2d0
KT
911;; UADALP op3.4s, tmp.8h
912;; MOV op0, op3 // should be eliminated in later passes.
72215009
KT
913;;
914;; For TARGET_DOTPROD we do:
915;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
916;; UABD tmp2.16b, op1.16b, op2.16b
917;; UDOT op3.4s, tmp2.16b, tmp1.16b
918;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
919;;
920;; The signed version just uses the signed variants of the above instructions
921;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
922;; unsigned.
75add2d0
KT
923
924(define_expand "<sur>sadv16qi"
925 [(use (match_operand:V4SI 0 "register_operand"))
926 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
927 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
928 (use (match_operand:V4SI 3 "register_operand"))]
929 "TARGET_SIMD"
930 {
72215009
KT
931 if (TARGET_DOTPROD)
932 {
933 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
934 rtx abd = gen_reg_rtx (V16QImode);
79db5945 935 emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
1ab22700 936 emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
72215009
KT
937 DONE;
938 }
75add2d0 939 rtx reduc = gen_reg_rtx (V8HImode);
9b588cfb
KT
940 emit_insn (gen_aarch64_<sur>abdl2v16qi (reduc, operands[1],
941 operands[2]));
d5e0d1f1
KT
942 emit_insn (gen_aarch64_<sur>abalv8qi (reduc, reduc,
943 gen_lowpart (V8QImode, operands[1]),
944 gen_lowpart (V8QImode,
945 operands[2])));
e811f10b 946 emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
75add2d0
KT
947 emit_move_insn (operands[0], operands[3]);
948 DONE;
949 }
950)
951
c9d25aa7 952(define_insn "aarch64_<su>aba<mode>"
fa2e9a58 953 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
c9d25aa7
KT
954 (plus:VDQ_BHSI (minus:VDQ_BHSI
955 (USMAX:VDQ_BHSI
956 (match_operand:VDQ_BHSI 2 "register_operand" "w")
957 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
958 (<max_opp>:VDQ_BHSI
959 (match_dup 2)
960 (match_dup 3)))
961 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
962 "TARGET_SIMD"
963 "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 964 [(set_attr "type" "neon_arith_acc<q>")]
fa2e9a58
N
965)
966
a672fa12 967(define_insn "fabd<mode>3"
68ad28c3
JW
968 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
969 (abs:VHSDF_HSDF
970 (minus:VHSDF_HSDF
971 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
972 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1d0c8e5c 973 "TARGET_SIMD"
a672fa12 974 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
33d72b63 975 [(set_attr "type" "neon_fp_abd_<stype><q>")]
1d0c8e5c
VP
976)
977
ab6501d7 978;; For AND (vector, register) and BIC (vector, immediate)
43e9d192 979(define_insn "and<mode>3"
ab6501d7
SD
980 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
981 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
982 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
43e9d192 983 "TARGET_SIMD"
ab6501d7
SD
984 {
985 switch (which_alternative)
986 {
987 case 0:
988 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
989 case 1:
b187677b
RS
990 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
991 AARCH64_CHECK_BIC);
ab6501d7
SD
992 default:
993 gcc_unreachable ();
994 }
995 }
78ec3036 996 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
997)
998
ab6501d7 999;; For ORR (vector, register) and ORR (vector, immediate)
43e9d192 1000(define_insn "ior<mode>3"
ab6501d7
SD
1001 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1002 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
1003 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
43e9d192 1004 "TARGET_SIMD"
ab6501d7
SD
1005 {
1006 switch (which_alternative)
1007 {
1008 case 0:
1009 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
1010 case 1:
b187677b
RS
1011 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
1012 AARCH64_CHECK_ORR);
ab6501d7
SD
1013 default:
1014 gcc_unreachable ();
1015 }
1016 }
78ec3036 1017 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
1018)
1019
1020(define_insn "xor<mode>3"
a844a695
AL
1021 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1022 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1023 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
1024 "TARGET_SIMD"
1025 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
78ec3036 1026 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
1027)
1028
1029(define_insn "one_cmpl<mode>2"
a844a695
AL
1030 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1031 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
43e9d192
IB
1032 "TARGET_SIMD"
1033 "not\t%0.<Vbtype>, %1.<Vbtype>"
78ec3036 1034 [(set_attr "type" "neon_logic<q>")]
43e9d192
IB
1035)
1036
1037(define_insn "aarch64_simd_vec_set<mode>"
8364e58b
KT
1038 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1039 (vec_merge:VALL_F16
1040 (vec_duplicate:VALL_F16
c48a6819 1041 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
8364e58b 1042 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
960ceebc 1043 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
43e9d192 1044 "TARGET_SIMD"
e58bf20a 1045 {
7ac29c0f 1046 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
e58bf20a
TB
1047 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1048 switch (which_alternative)
1049 {
1050 case 0:
e58bf20a 1051 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
8364e58b
KT
1052 case 1:
1053 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
960ceebc
AL
1054 case 2:
1055 return "ld1\\t{%0.<Vetype>}[%p2], %1";
e58bf20a
TB
1056 default:
1057 gcc_unreachable ();
1058 }
1059 }
8364e58b 1060 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
43e9d192
IB
1061)
1062
c9c87e6f 1063(define_insn "@aarch64_simd_vec_copy_lane<mode>"
b160939b
KT
1064 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1065 (vec_merge:VALL_F16
1066 (vec_duplicate:VALL_F16
9bd62242 1067 (vec_select:<VEL>
b160939b 1068 (match_operand:VALL_F16 3 "register_operand" "w")
9bd62242
JG
1069 (parallel
1070 [(match_operand:SI 4 "immediate_operand" "i")])))
b160939b 1071 (match_operand:VALL_F16 1 "register_operand" "0")
9bd62242
JG
1072 (match_operand:SI 2 "immediate_operand" "i")))]
1073 "TARGET_SIMD"
1074 {
441b4d0f 1075 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
9bd62242 1076 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
441b4d0f 1077 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9bd62242
JG
1078
1079 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1080 }
1081 [(set_attr "type" "neon_ins<q>")]
1082)
1083
1084(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
88119b46
KT
1085 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1086 (vec_merge:VALL_F16_NO_V2Q
1087 (vec_duplicate:VALL_F16_NO_V2Q
9bd62242
JG
1088 (vec_select:<VEL>
1089 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1090 (parallel
1091 [(match_operand:SI 4 "immediate_operand" "i")])))
88119b46 1092 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
9bd62242
JG
1093 (match_operand:SI 2 "immediate_operand" "i")))]
1094 "TARGET_SIMD"
1095 {
441b4d0f 1096 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
9bd62242 1097 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
441b4d0f
RS
1098 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1099 INTVAL (operands[4]));
9bd62242
JG
1100
1101 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1102 }
1103 [(set_attr "type" "neon_ins<q>")]
1104)
1105
a52cf5cf
PW
1106(define_expand "signbit<mode>2"
1107 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1108 (use (match_operand:VDQSF 1 "register_operand"))]
1109 "TARGET_SIMD"
1110{
1111 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1112 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1113 shift_amount);
1114 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1115
1116 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1117 shift_vector));
1118 DONE;
1119})
1120
43e9d192 1121(define_insn "aarch64_simd_lshr<mode>"
a844a695
AL
1122 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1123 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1124 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
43e9d192
IB
1125 "TARGET_SIMD"
1126 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
78ec3036 1127 [(set_attr "type" "neon_shift_imm<q>")]
43e9d192
IB
1128)
1129
1130(define_insn "aarch64_simd_ashr<mode>"
ea464fd2
TC
1131 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1132 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
1133 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "D1,Dr")))]
43e9d192 1134 "TARGET_SIMD"
ea464fd2
TC
1135 "@
1136 cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1137 sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1138 [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
43e9d192
IB
1139)
1140
462e6f9a
ST
1141(define_insn "*aarch64_simd_sra<mode>"
1142 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1143 (plus:VDQ_I
1144 (SHIFTRT:VDQ_I
1145 (match_operand:VDQ_I 1 "register_operand" "w")
1146 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1147 (match_operand:VDQ_I 3 "register_operand" "0")))]
1148 "TARGET_SIMD"
1149 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1150 [(set_attr "type" "neon_shift_acc<q>")]
1151)
1152
43e9d192 1153(define_insn "aarch64_simd_imm_shl<mode>"
a844a695
AL
1154 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1155 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1156 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
43e9d192
IB
1157 "TARGET_SIMD"
1158 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
78ec3036 1159 [(set_attr "type" "neon_shift_imm<q>")]
43e9d192
IB
1160)
1161
1162(define_insn "aarch64_simd_reg_sshl<mode>"
a844a695
AL
1163 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1164 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1165 (match_operand:VDQ_I 2 "register_operand" "w")))]
43e9d192
IB
1166 "TARGET_SIMD"
1167 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 1168 [(set_attr "type" "neon_shift_reg<q>")]
43e9d192
IB
1169)
1170
1171(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
a844a695
AL
1172 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1173 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1174 (match_operand:VDQ_I 2 "register_operand" "w")]
43e9d192
IB
1175 UNSPEC_ASHIFT_UNSIGNED))]
1176 "TARGET_SIMD"
1177 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 1178 [(set_attr "type" "neon_shift_reg<q>")]
43e9d192
IB
1179)
1180
1181(define_insn "aarch64_simd_reg_shl<mode>_signed"
a844a695
AL
1182 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1183 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1184 (match_operand:VDQ_I 2 "register_operand" "w")]
43e9d192
IB
1185 UNSPEC_ASHIFT_SIGNED))]
1186 "TARGET_SIMD"
1187 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 1188 [(set_attr "type" "neon_shift_reg<q>")]
43e9d192
IB
1189)
1190
1191(define_expand "ashl<mode>3"
1bbffb87
DZ
1192 [(match_operand:VDQ_I 0 "register_operand")
1193 (match_operand:VDQ_I 1 "register_operand")
1194 (match_operand:SI 2 "general_operand")]
43e9d192
IB
1195 "TARGET_SIMD"
1196{
1197 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1198 int shift_amount;
1199
1200 if (CONST_INT_P (operands[2]))
1201 {
1202 shift_amount = INTVAL (operands[2]);
1203 if (shift_amount >= 0 && shift_amount < bit_width)
1204 {
1205 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1206 shift_amount);
1207 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1208 operands[1],
1209 tmp));
1210 DONE;
1211 }
43e9d192
IB
1212 }
1213
7a6588fe
JJ
1214 operands[2] = force_reg (SImode, operands[2]);
1215
1216 rtx tmp = gen_reg_rtx (<MODE>mode);
1217 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1218 operands[2],
1219 0)));
1220 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1221 DONE;
1222})
43e9d192
IB
1223
1224(define_expand "lshr<mode>3"
1bbffb87
DZ
1225 [(match_operand:VDQ_I 0 "register_operand")
1226 (match_operand:VDQ_I 1 "register_operand")
1227 (match_operand:SI 2 "general_operand")]
43e9d192
IB
1228 "TARGET_SIMD"
1229{
1230 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1231 int shift_amount;
1232
1233 if (CONST_INT_P (operands[2]))
1234 {
1235 shift_amount = INTVAL (operands[2]);
1236 if (shift_amount > 0 && shift_amount <= bit_width)
1237 {
1238 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1239 shift_amount);
1240 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1241 operands[1],
1242 tmp));
1243 DONE;
1244 }
43e9d192
IB
1245 }
1246
7a6588fe
JJ
1247 operands[2] = force_reg (SImode, operands[2]);
1248
1249 rtx tmp = gen_reg_rtx (SImode);
1250 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1251 emit_insn (gen_negsi2 (tmp, operands[2]));
1252 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1253 convert_to_mode (<VEL>mode, tmp, 0)));
1254 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1255 tmp1));
1256 DONE;
1257})
43e9d192
IB
1258
1259(define_expand "ashr<mode>3"
1bbffb87
DZ
1260 [(match_operand:VDQ_I 0 "register_operand")
1261 (match_operand:VDQ_I 1 "register_operand")
1262 (match_operand:SI 2 "general_operand")]
43e9d192
IB
1263 "TARGET_SIMD"
1264{
1265 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1266 int shift_amount;
1267
1268 if (CONST_INT_P (operands[2]))
1269 {
1270 shift_amount = INTVAL (operands[2]);
1271 if (shift_amount > 0 && shift_amount <= bit_width)
1272 {
1273 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1274 shift_amount);
1275 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1276 operands[1],
1277 tmp));
1278 DONE;
1279 }
43e9d192
IB
1280 }
1281
7a6588fe
JJ
1282 operands[2] = force_reg (SImode, operands[2]);
1283
1284 rtx tmp = gen_reg_rtx (SImode);
1285 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1286 emit_insn (gen_negsi2 (tmp, operands[2]));
1287 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1288 tmp, 0)));
1289 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1290 tmp1));
1291 DONE;
1292})
43e9d192
IB
1293
1294(define_expand "vashl<mode>3"
1bbffb87
DZ
1295 [(match_operand:VDQ_I 0 "register_operand")
1296 (match_operand:VDQ_I 1 "register_operand")
1297 (match_operand:VDQ_I 2 "register_operand")]
43e9d192
IB
1298 "TARGET_SIMD"
1299{
1300 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1301 operands[2]));
1302 DONE;
1303})
1304
43e9d192 1305(define_expand "vashr<mode>3"
e2e0b85c
TB
1306 [(match_operand:VDQ_I 0 "register_operand")
1307 (match_operand:VDQ_I 1 "register_operand")
1308 (match_operand:VDQ_I 2 "register_operand")]
43e9d192
IB
1309 "TARGET_SIMD"
1310{
1311 rtx neg = gen_reg_rtx (<MODE>mode);
1312 emit (gen_neg<mode>2 (neg, operands[2]));
1313 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1314 neg));
1315 DONE;
1316})
1317
f9a4c9a6
AV
1318;; DI vector shift
1319(define_expand "aarch64_ashr_simddi"
1bbffb87
DZ
1320 [(match_operand:DI 0 "register_operand")
1321 (match_operand:DI 1 "register_operand")
1322 (match_operand:SI 2 "aarch64_shift_imm64_di")]
f9a4c9a6
AV
1323 "TARGET_SIMD"
1324 {
b5b34d37
AL
1325 /* An arithmetic shift right by 64 fills the result with copies of the sign
1326 bit, just like asr by 63 - however the standard pattern does not handle
1327 a shift by 64. */
f9a4c9a6 1328 if (INTVAL (operands[2]) == 64)
b5b34d37
AL
1329 operands[2] = GEN_INT (63);
1330 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
f9a4c9a6
AV
1331 DONE;
1332 }
1333)
1334
43e9d192 1335(define_expand "vlshr<mode>3"
e2e0b85c
TB
1336 [(match_operand:VDQ_I 0 "register_operand")
1337 (match_operand:VDQ_I 1 "register_operand")
1338 (match_operand:VDQ_I 2 "register_operand")]
43e9d192
IB
1339 "TARGET_SIMD"
1340{
1341 rtx neg = gen_reg_rtx (<MODE>mode);
1342 emit (gen_neg<mode>2 (neg, operands[2]));
1343 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1344 neg));
1345 DONE;
1346})
1347
252c7556 1348(define_expand "aarch64_lshr_simddi"
1bbffb87
DZ
1349 [(match_operand:DI 0 "register_operand")
1350 (match_operand:DI 1 "register_operand")
1351 (match_operand:SI 2 "aarch64_shift_imm64_di")]
252c7556
AV
1352 "TARGET_SIMD"
1353 {
1354 if (INTVAL (operands[2]) == 64)
84488801 1355 emit_move_insn (operands[0], const0_rtx);
252c7556
AV
1356 else
1357 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1358 DONE;
1359 }
1360)
1361
0b4eefd5
AL
1362;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1363(define_insn "vec_shr_<mode>"
1364 [(set (match_operand:VD 0 "register_operand" "=w")
9c004c58
RL
1365 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1366 (match_operand:SI 2 "immediate_operand" "i")]
1367 UNSPEC_VEC_SHR))]
0b4eefd5
AL
1368 "TARGET_SIMD"
1369 {
1370 if (BYTES_BIG_ENDIAN)
9c004c58 1371 return "shl %d0, %d1, %2";
0b4eefd5
AL
1372 else
1373 return "ushr %d0, %d1, %2";
1374 }
1375 [(set_attr "type" "neon_shift_imm")]
1376)
1377
43e9d192 1378(define_expand "vec_set<mode>"
1bbffb87 1379 [(match_operand:VALL_F16 0 "register_operand")
fabc5d9b 1380 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
1bbffb87 1381 (match_operand:SI 2 "immediate_operand")]
43e9d192
IB
1382 "TARGET_SIMD"
1383 {
1384 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1385 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1386 GEN_INT (elem), operands[0]));
1387 DONE;
1388 }
1389)
1390
1391
1392(define_insn "aarch64_mla<mode>"
a844a695
AL
1393 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1394 (plus:VDQ_BHSI (mult:VDQ_BHSI
1395 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1396 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1397 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
43e9d192
IB
1398 "TARGET_SIMD"
1399 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 1400 [(set_attr "type" "neon_mla_<Vetype><q>")]
43e9d192
IB
1401)
1402
828e70c1
JG
1403(define_insn "*aarch64_mla_elt<mode>"
1404 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1405 (plus:VDQHS
1406 (mult:VDQHS
1407 (vec_duplicate:VDQHS
1408 (vec_select:<VEL>
1409 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1410 (parallel [(match_operand:SI 2 "immediate_operand")])))
1411 (match_operand:VDQHS 3 "register_operand" "w"))
1412 (match_operand:VDQHS 4 "register_operand" "0")))]
1413 "TARGET_SIMD"
1dd055a2 1414 {
7ac29c0f 1415 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3e5c062e 1416 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1dd055a2 1417 }
78ec3036 1418 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1419)
1420
1421(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1422 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1423 (plus:VDQHS
1424 (mult:VDQHS
1425 (vec_duplicate:VDQHS
1426 (vec_select:<VEL>
1427 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1428 (parallel [(match_operand:SI 2 "immediate_operand")])))
1429 (match_operand:VDQHS 3 "register_operand" "w"))
1430 (match_operand:VDQHS 4 "register_operand" "0")))]
1431 "TARGET_SIMD"
1dd055a2 1432 {
7ac29c0f 1433 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3e5c062e 1434 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1dd055a2 1435 }
78ec3036 1436 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1437)
1438
9d66505a
JW
1439(define_insn "aarch64_mla_n<mode>"
1440 [(set (match_operand:VDQHS 0 "register_operand" "=w")
4b40986c 1441 (plus:VDQHS
9d66505a
JW
1442 (mult:VDQHS
1443 (vec_duplicate:VDQHS
1444 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1445 (match_operand:VDQHS 2 "register_operand" "w"))
1446 (match_operand:VDQHS 1 "register_operand" "0")))]
4b40986c 1447 "TARGET_SIMD"
9d66505a 1448 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
4b40986c
JW
1449 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1450)
1451
43e9d192 1452(define_insn "aarch64_mls<mode>"
a844a695
AL
1453 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1454 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1455 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1456 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
43e9d192
IB
1457 "TARGET_SIMD"
1458 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 1459 [(set_attr "type" "neon_mla_<Vetype><q>")]
43e9d192
IB
1460)
1461
828e70c1
JG
1462(define_insn "*aarch64_mls_elt<mode>"
1463 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1464 (minus:VDQHS
1465 (match_operand:VDQHS 4 "register_operand" "0")
1466 (mult:VDQHS
1467 (vec_duplicate:VDQHS
1468 (vec_select:<VEL>
1469 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1470 (parallel [(match_operand:SI 2 "immediate_operand")])))
1471 (match_operand:VDQHS 3 "register_operand" "w"))))]
1472 "TARGET_SIMD"
1dd055a2 1473 {
7ac29c0f 1474 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3e5c062e 1475 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1dd055a2 1476 }
78ec3036 1477 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1478)
1479
1480(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1481 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1482 (minus:VDQHS
1483 (match_operand:VDQHS 4 "register_operand" "0")
1484 (mult:VDQHS
1485 (vec_duplicate:VDQHS
1486 (vec_select:<VEL>
1487 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1488 (parallel [(match_operand:SI 2 "immediate_operand")])))
1489 (match_operand:VDQHS 3 "register_operand" "w"))))]
1490 "TARGET_SIMD"
1dd055a2 1491 {
7ac29c0f 1492 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3e5c062e 1493 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1dd055a2 1494 }
78ec3036 1495 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
828e70c1
JG
1496)
1497
3fd10728 1498(define_insn "aarch64_mls_n<mode>"
4b40986c
JW
1499 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1500 (minus:VDQHS
1501 (match_operand:VDQHS 1 "register_operand" "0")
3fd10728
JW
1502 (mult:VDQHS
1503 (vec_duplicate:VDQHS
1504 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1505 (match_operand:VDQHS 2 "register_operand" "w"))))]
4b40986c 1506 "TARGET_SIMD"
3fd10728 1507 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
4b40986c
JW
1508 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1509)
1510
43e9d192 1511;; Max/Min operations.
998eaf97 1512(define_insn "<su><maxmin><mode>3"
a844a695
AL
1513 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1514 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1515 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
43e9d192 1516 "TARGET_SIMD"
998eaf97 1517 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 1518 [(set_attr "type" "neon_minmax<q>")]
43e9d192
IB
1519)
1520
da9c88d8 1521(define_expand "<su><maxmin>v2di3"
1bbffb87
DZ
1522 [(set (match_operand:V2DI 0 "register_operand")
1523 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1524 (match_operand:V2DI 2 "register_operand")))]
da9c88d8
RL
1525 "TARGET_SIMD"
1526{
1527 enum rtx_code cmp_operator;
1528 rtx cmp_fmt;
1529
1530 switch (<CODE>)
1531 {
1532 case UMIN:
1533 cmp_operator = LTU;
1534 break;
1535 case SMIN:
1536 cmp_operator = LT;
1537 break;
1538 case UMAX:
1539 cmp_operator = GTU;
1540 break;
1541 case SMAX:
1542 cmp_operator = GT;
1543 break;
1544 default:
1545 gcc_unreachable ();
1546 }
1547
1548 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
6c553b76 1549 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
da9c88d8
RL
1550 operands[2], cmp_fmt, operands[1], operands[2]));
1551 DONE;
1552})
1553
7abab3d1 1554;; Pairwise Integer Max/Min operations.
6d331688 1555(define_insn "aarch64_<optab>p<mode>"
7abab3d1
FY
1556 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1557 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1558 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1559 MAXMINV))]
1560 "TARGET_SIMD"
1561 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1562 [(set_attr "type" "neon_minmax<q>")]
1563)
1564
1565;; Pairwise FP Max/Min operations.
6d331688 1566(define_insn "aarch64_<optab>p<mode>"
33d72b63
JW
1567 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1568 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1569 (match_operand:VHSDF 2 "register_operand" "w")]
1570 FMAXMINV))]
7abab3d1
FY
1571 "TARGET_SIMD"
1572 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1573 [(set_attr "type" "neon_minmax<q>")]
1574)
1575
5a908485
JG
1576;; vec_concat gives a new vector with the low elements from operand 1, and
1577;; the high elements from operand 2. That is to say, given op1 = { a, b }
1578;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1579;; What that means, is that the RTL descriptions of the below patterns
1580;; need to change depending on endianness.
43e9d192 1581
43e9d192
IB
1582;; Narrowing operations.
1583
d8a88cda
JW
1584(define_insn "aarch64_xtn<mode>_insn_le"
1585 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1586 (vec_concat:<VNARROWQ2>
1587 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1588 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
1589 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1590 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1591 [(set_attr "type" "neon_move_narrow_q")]
1592)
577d5819 1593
d8a88cda
JW
1594(define_insn "aarch64_xtn<mode>_insn_be"
1595 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1596 (vec_concat:<VNARROWQ2>
1597 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
1598 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
1599 "TARGET_SIMD && BYTES_BIG_ENDIAN"
577d5819 1600 "xtn\\t%0.<Vntype>, %1.<Vtype>"
45364338 1601 [(set_attr "type" "neon_move_narrow_q")]
43e9d192
IB
1602)
1603
d8a88cda
JW
1604(define_expand "aarch64_xtn<mode>"
1605 [(set (match_operand:<VNARROWQ> 0 "register_operand")
1606 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
1607 "TARGET_SIMD"
1608 {
1609 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1610 if (BYTES_BIG_ENDIAN)
1611 emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
1612 CONST0_RTX (<VNARROWQ>mode)));
1613 else
1614 emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
1615 CONST0_RTX (<VNARROWQ>mode)));
1616
1617 /* The intrinsic expects a narrow result, so emit a subreg that will get
1618 optimized away as appropriate. */
1619 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1620 <VNARROWQ2>mode));
1621 DONE;
1622 }
1623)
1624
1625(define_insn "aarch64_xtn2<mode>_insn_le"
577d5819
JW
1626 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1627 (vec_concat:<VNARROWQ2>
1628 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1629 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1630 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1631 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
45364338 1632 [(set_attr "type" "neon_move_narrow_q")]
577d5819
JW
1633)
1634
d8a88cda 1635(define_insn "aarch64_xtn2<mode>_insn_be"
577d5819
JW
1636 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1637 (vec_concat:<VNARROWQ2>
1638 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1639 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1640 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1641 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
45364338 1642 [(set_attr "type" "neon_move_narrow_q")]
577d5819
JW
1643)
1644
1645(define_expand "aarch64_xtn2<mode>"
1646 [(match_operand:<VNARROWQ2> 0 "register_operand")
1647 (match_operand:<VNARROWQ> 1 "register_operand")
1648 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1649 "TARGET_SIMD"
1650 {
1651 if (BYTES_BIG_ENDIAN)
d8a88cda
JW
1652 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1653 operands[2]));
577d5819 1654 else
d8a88cda
JW
1655 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1656 operands[2]));
577d5819
JW
1657 DONE;
1658 }
1659)
1660
52da40ff
TC
1661(define_insn "*aarch64_narrow_trunc<mode>"
1662 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1663 (vec_concat:<VNARROWQ2>
1664 (truncate:<VNARROWQ>
1665 (match_operand:VQN 1 "register_operand" "w"))
1666 (truncate:<VNARROWQ>
1667 (match_operand:VQN 2 "register_operand" "w"))))]
1668 "TARGET_SIMD"
1669{
1670 if (!BYTES_BIG_ENDIAN)
1671 return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1672 else
1673 return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1674}
1675 [(set_attr "type" "neon_permute<q>")]
1676)
1677
d8a88cda
JW
1678;; Packing doubles.
1679
43e9d192 1680(define_expand "vec_pack_trunc_<mode>"
1bbffb87 1681 [(match_operand:<VNARROWD> 0 "register_operand")
bce43c04
RS
1682 (match_operand:VDN 1 "general_operand")
1683 (match_operand:VDN 2 "general_operand")]
43e9d192
IB
1684 "TARGET_SIMD"
1685{
1686 rtx tempreg = gen_reg_rtx (<VDBL>mode);
bce43c04 1687 emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
d8a88cda 1688 emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
43e9d192
IB
1689 DONE;
1690})
1691
d8a88cda
JW
1692;; Packing quads.
1693
1694(define_expand "vec_pack_trunc_<mode>"
1695 [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1696 (vec_concat:<VNARROWQ2>
1697 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1698 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1699 "TARGET_SIMD"
1700 {
1701 rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1702 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1703 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1704
1705 emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1706
1707 if (BYTES_BIG_ENDIAN)
1708 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1709 operands[hi]));
1710 else
1711 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1712 operands[hi]));
1713 DONE;
1714 }
1715)
1716
fdb904a1
KT
1717(define_insn "aarch64_shrn<mode>_insn_le"
1718 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1719 (vec_concat:<VNARROWQ2>
1720 (truncate:<VNARROWQ>
1721 (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
0d9a70ea 1722 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
fdb904a1
KT
1723 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1724 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1725 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1726 [(set_attr "type" "neon_shift_imm_narrow_q")]
1727)
1728
1729(define_insn "aarch64_shrn<mode>_insn_be"
1730 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1731 (vec_concat:<VNARROWQ2>
1732 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1733 (truncate:<VNARROWQ>
1734 (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
0d9a70ea 1735 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
fdb904a1
KT
1736 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1737 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1738 [(set_attr "type" "neon_shift_imm_narrow_q")]
1739)
1740
e33aef11
TC
1741(define_insn "*aarch64_<srn_op>shrn<mode>_vect"
1742 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1743 (truncate:<VNARROWQ>
1744 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1745 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
1746 "TARGET_SIMD"
1747 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1748 [(set_attr "type" "neon_shift_imm_narrow_q")]
1749)
1750
1751(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
1752 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1753 (vec_concat:<VNARROWQ2>
1754 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1755 (truncate:<VNARROWQ>
1756 (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
1757 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1758 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1759 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1760 [(set_attr "type" "neon_shift_imm_narrow_q")]
1761)
1762
1763(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_be"
1764 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1765 (vec_concat:<VNARROWQ2>
1766 (truncate:<VNARROWQ>
1767 (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
1768 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
1769 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1770 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1771 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1772 [(set_attr "type" "neon_shift_imm_narrow_q")]
1773)
1774
41812e5e
TC
1775(define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1776 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1777 (vec_concat:<VNARROWQ2>
1778 (truncate:<VNARROWQ>
1779 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1780 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1781 (truncate:<VNARROWQ>
1782 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1783 (match_dup 2)))))]
1784 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1785 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1786 [(set_attr "type" "neon_permute<q>")]
1787)
1788
41812e5e
TC
1789(define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1790 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1791 (vec_concat:<VNARROWQ2>
1792 (truncate:<VNARROWQ>
1793 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1794 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1795 (truncate:<VNARROWQ>
1796 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1797 (match_dup 2)))))]
1798 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1799 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1800 [(set_attr "type" "neon_permute<q>")]
1801)
1802
fdb904a1
KT
1803(define_expand "aarch64_shrn<mode>"
1804 [(set (match_operand:<VNARROWQ> 0 "register_operand")
1805 (truncate:<VNARROWQ>
1806 (lshiftrt:VQN (match_operand:VQN 1 "register_operand")
1807 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
1808 "TARGET_SIMD"
1809 {
1810 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1811 INTVAL (operands[2]));
1812 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1813 if (BYTES_BIG_ENDIAN)
1814 emit_insn (gen_aarch64_shrn<mode>_insn_be (tmp, operands[1],
1815 operands[2], CONST0_RTX (<VNARROWQ>mode)));
1816 else
1817 emit_insn (gen_aarch64_shrn<mode>_insn_le (tmp, operands[1],
1818 operands[2], CONST0_RTX (<VNARROWQ>mode)));
1819
1820 /* The intrinsic expects a narrow result, so emit a subreg that will get
1821 optimized away as appropriate. */
1822 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1823 <VNARROWQ2>mode));
1824 DONE;
1825 }
1826)
1827
850e5878
KT
1828(define_insn "aarch64_rshrn<mode>_insn_le"
1829 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1830 (vec_concat:<VNARROWQ2>
1831 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
0d9a70ea
KT
1832 (match_operand:VQN 2
1833 "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
850e5878
KT
1834 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1835 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1836 "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1837 [(set_attr "type" "neon_shift_imm_narrow_q")]
1838)
1839
1840(define_insn "aarch64_rshrn<mode>_insn_be"
1841 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1842 (vec_concat:<VNARROWQ2>
1843 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1844 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
0d9a70ea
KT
1845 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
1846 UNSPEC_RSHRN)))]
850e5878
KT
1847 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1848 "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1849 [(set_attr "type" "neon_shift_imm_narrow_q")]
1850)
1851
1852(define_expand "aarch64_rshrn<mode>"
1853 [(match_operand:<VNARROWQ> 0 "register_operand")
1854 (match_operand:VQN 1 "register_operand")
1855 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
1856 "TARGET_SIMD"
1857 {
9b8830b6
TC
1858 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
1859 {
1860 rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
1861 emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
1862 }
850e5878 1863 else
9b8830b6
TC
1864 {
1865 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1866 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1867 INTVAL (operands[2]));
1868 if (BYTES_BIG_ENDIAN)
1869 emit_insn (
1870 gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
1871 operands[2],
1872 CONST0_RTX (<VNARROWQ>mode)));
1873 else
1874 emit_insn (
1875 gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
1876 operands[2],
1877 CONST0_RTX (<VNARROWQ>mode)));
1878
1879 /* The intrinsic expects a narrow result, so emit a subreg that will
1880 get optimized away as appropriate. */
1881 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1882 <VNARROWQ2>mode));
1883 }
850e5878
KT
1884 DONE;
1885 }
1886)
1887
d61ca09e
KT
1888(define_insn "aarch64_shrn2<mode>_insn_le"
1889 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1890 (vec_concat:<VNARROWQ2>
1891 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1892 (truncate:<VNARROWQ>
1893 (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
0d9a70ea 1894 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
d61ca09e
KT
1895 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1896 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1897 [(set_attr "type" "neon_shift_imm_narrow_q")]
1898)
1899
1900(define_insn "aarch64_shrn2<mode>_insn_be"
1901 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1902 (vec_concat:<VNARROWQ2>
1903 (truncate:<VNARROWQ>
1904 (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
0d9a70ea
KT
1905 (match_operand:VQN 3
1906 "aarch64_simd_shift_imm_vec_<vn_mode>")))
d61ca09e
KT
1907 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1908 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1909 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1910 [(set_attr "type" "neon_shift_imm_narrow_q")]
1911)
1912
1913(define_expand "aarch64_shrn2<mode>"
1914 [(match_operand:<VNARROWQ2> 0 "register_operand")
1915 (match_operand:<VNARROWQ> 1 "register_operand")
1916 (match_operand:VQN 2 "register_operand")
1917 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
1918 "TARGET_SIMD"
1919 {
1920 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1921 INTVAL (operands[3]));
1922 if (BYTES_BIG_ENDIAN)
1923 emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1],
1924 operands[2], operands[3]));
1925 else
1926 emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1],
1927 operands[2], operands[3]));
1928 DONE;
1929 }
1930)
1931
850e5878
KT
1932(define_insn "aarch64_rshrn2<mode>_insn_le"
1933 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1934 (vec_concat:<VNARROWQ2>
1935 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1936 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
0d9a70ea
KT
1937 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
1938 UNSPEC_RSHRN)))]
850e5878
KT
1939 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1940 "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1941 [(set_attr "type" "neon_shift_imm_narrow_q")]
1942)
1943
1944(define_insn "aarch64_rshrn2<mode>_insn_be"
1945 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1946 (vec_concat:<VNARROWQ2>
1947 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
0d9a70ea
KT
1948 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
1949 UNSPEC_RSHRN)
850e5878
KT
1950 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1951 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1952 "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1953 [(set_attr "type" "neon_shift_imm_narrow_q")]
1954)
1955
1956(define_expand "aarch64_rshrn2<mode>"
1957 [(match_operand:<VNARROWQ2> 0 "register_operand")
1958 (match_operand:<VNARROWQ> 1 "register_operand")
1959 (match_operand:VQN 2 "register_operand")
1960 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
1961 "TARGET_SIMD"
1962 {
9b8830b6
TC
1963 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ2>mode))
1964 {
1965 rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
1966 emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
1967 operands[2], tmp));
1968 }
850e5878 1969 else
9b8830b6
TC
1970 {
1971 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1972 INTVAL (operands[3]));
1973 if (BYTES_BIG_ENDIAN)
1974 emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0],
1975 operands[1],
1976 operands[2],
1977 operands[3]));
1978 else
1979 emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0],
1980 operands[1],
1981 operands[2],
1982 operands[3]));
1983 }
850e5878
KT
1984 DONE;
1985 }
1986)
fdb904a1 1987
43e9d192
IB
1988;; Widening operations.
1989
1990(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1991 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1992 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1993 (match_operand:VQW 1 "register_operand" "w")
1994 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1995 )))]
1996 "TARGET_SIMD"
77b8fb05 1997 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
78ec3036 1998 [(set_attr "type" "neon_shift_imm_long")]
43e9d192
IB
1999)
2000
2001(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
2002 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2003 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2004 (match_operand:VQW 1 "register_operand" "w")
2005 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
2006 )))]
2007 "TARGET_SIMD"
77b8fb05 2008 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
78ec3036 2009 [(set_attr "type" "neon_shift_imm_long")]
43e9d192
IB
2010)
2011
2012(define_expand "vec_unpack<su>_hi_<mode>"
1bbffb87 2013 [(match_operand:<VWIDE> 0 "register_operand")
43e9d192
IB
2014 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2015 "TARGET_SIMD"
2016 {
f5cbabc1 2017 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
2018 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
2019 operands[1], p));
2020 DONE;
2021 }
2022)
2023
2024(define_expand "vec_unpack<su>_lo_<mode>"
1bbffb87
DZ
2025 [(match_operand:<VWIDE> 0 "register_operand")
2026 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
43e9d192
IB
2027 "TARGET_SIMD"
2028 {
f5cbabc1 2029 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
43e9d192
IB
2030 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
2031 operands[1], p));
2032 DONE;
2033 }
2034)
2035
2036;; Widening arithmetic.
2037
24244e4d
VP
2038(define_insn "*aarch64_<su>mlal_lo<mode>"
2039 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2040 (plus:<VWIDE>
2041 (mult:<VWIDE>
2042 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2043 (match_operand:VQW 2 "register_operand" "w")
2044 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2045 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2046 (match_operand:VQW 4 "register_operand" "w")
2047 (match_dup 3))))
2048 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2049 "TARGET_SIMD"
2050 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
78ec3036 2051 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
2052)
2053
719877b0 2054(define_insn "aarch64_<su>mlal_hi<mode>_insn"
24244e4d
VP
2055 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2056 (plus:<VWIDE>
2057 (mult:<VWIDE>
2058 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2059 (match_operand:VQW 2 "register_operand" "w")
2060 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2061 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2062 (match_operand:VQW 4 "register_operand" "w")
2063 (match_dup 3))))
2064 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2065 "TARGET_SIMD"
2066 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
78ec3036 2067 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
2068)
2069
719877b0
JW
2070(define_expand "aarch64_<su>mlal_hi<mode>"
2071 [(match_operand:<VWIDE> 0 "register_operand")
2072 (match_operand:<VWIDE> 1 "register_operand")
2073 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2074 (match_operand:VQW 3 "register_operand")]
2075 "TARGET_SIMD"
2076{
2077 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2078 emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2079 operands[2], p, operands[3]));
2080 DONE;
2081}
2082)
2083
2dac6586
JW
2084(define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2085 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a
JW
2086 (plus:<VWIDE>
2087 (mult:<VWIDE>
2088 (ANY_EXTEND:<VWIDE>
2089 (vec_select:<VHALF>
2090 (match_operand:VQ_HSI 2 "register_operand" "w")
2091 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2092 (vec_duplicate:<VWIDE>
2093 (ANY_EXTEND:<VWIDE_S>
2094 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2095 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2dac6586
JW
2096 "TARGET_SIMD"
2097 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2098 [(set_attr "type" "neon_mla_<Vetype>_long")]
2099)
2100
2101(define_expand "aarch64_<su>mlal_hi_n<mode>"
2102 [(match_operand:<VWIDE> 0 "register_operand")
2103 (match_operand:<VWIDE> 1 "register_operand")
2104 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2105 (match_operand:<VEL> 3 "register_operand")]
2106 "TARGET_SIMD"
2107{
2108 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2109 emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2110 operands[1], operands[2], p, operands[3]));
2111 DONE;
2112}
2113)
2114
24244e4d
VP
2115(define_insn "*aarch64_<su>mlsl_lo<mode>"
2116 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2117 (minus:<VWIDE>
2118 (match_operand:<VWIDE> 1 "register_operand" "0")
2119 (mult:<VWIDE>
2120 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2121 (match_operand:VQW 2 "register_operand" "w")
2122 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2123 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2124 (match_operand:VQW 4 "register_operand" "w")
2125 (match_dup 3))))))]
2126 "TARGET_SIMD"
2127 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
78ec3036 2128 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
2129)
2130
d3959070 2131(define_insn "aarch64_<su>mlsl_hi<mode>_insn"
24244e4d
VP
2132 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2133 (minus:<VWIDE>
2134 (match_operand:<VWIDE> 1 "register_operand" "0")
2135 (mult:<VWIDE>
2136 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2137 (match_operand:VQW 2 "register_operand" "w")
2138 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2139 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2140 (match_operand:VQW 4 "register_operand" "w")
2141 (match_dup 3))))))]
2142 "TARGET_SIMD"
2143 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
78ec3036 2144 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
2145)
2146
d3959070
KT
2147(define_expand "aarch64_<su>mlsl_hi<mode>"
2148 [(match_operand:<VWIDE> 0 "register_operand")
2149 (match_operand:<VWIDE> 1 "register_operand")
2150 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2151 (match_operand:VQW 3 "register_operand")]
2152 "TARGET_SIMD"
2153{
2154 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2155 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2156 operands[2], p, operands[3]));
2157 DONE;
2158}
2159)
2160
b2c4cf7b
JW
2161(define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2162 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a
JW
2163 (minus:<VWIDE>
2164 (match_operand:<VWIDE> 1 "register_operand" "0")
2165 (mult:<VWIDE>
2166 (ANY_EXTEND:<VWIDE>
2167 (vec_select:<VHALF>
2168 (match_operand:VQ_HSI 2 "register_operand" "w")
2169 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2170 (vec_duplicate:<VWIDE>
2171 (ANY_EXTEND:<VWIDE_S>
2172 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
b2c4cf7b
JW
2173 "TARGET_SIMD"
2174 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2175 [(set_attr "type" "neon_mla_<Vetype>_long")]
2176)
2177
2178(define_expand "aarch64_<su>mlsl_hi_n<mode>"
2179 [(match_operand:<VWIDE> 0 "register_operand")
2180 (match_operand:<VWIDE> 1 "register_operand")
2181 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2182 (match_operand:<VEL> 3 "register_operand")]
2183 "TARGET_SIMD"
2184{
2185 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2186 emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2187 operands[1], operands[2], p, operands[3]));
2188 DONE;
2189}
2190)
2191
d53a4f9b 2192(define_insn "aarch64_<su>mlal<mode>"
24244e4d
VP
2193 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2194 (plus:<VWIDE>
2195 (mult:<VWIDE>
2196 (ANY_EXTEND:<VWIDE>
d53a4f9b 2197 (match_operand:VD_BHSI 2 "register_operand" "w"))
24244e4d 2198 (ANY_EXTEND:<VWIDE>
d53a4f9b
JW
2199 (match_operand:VD_BHSI 3 "register_operand" "w")))
2200 (match_operand:<VWIDE> 1 "register_operand" "0")))]
24244e4d 2201 "TARGET_SIMD"
d53a4f9b 2202 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 2203 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
2204)
2205
87301e39
JW
2206(define_insn "aarch64_<su>mlal_n<mode>"
2207 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a
JW
2208 (plus:<VWIDE>
2209 (mult:<VWIDE>
2210 (ANY_EXTEND:<VWIDE>
2211 (match_operand:VD_HSI 2 "register_operand" "w"))
2212 (vec_duplicate:<VWIDE>
2213 (ANY_EXTEND:<VWIDE_S>
2214 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2215 (match_operand:<VWIDE> 1 "register_operand" "0")))]
87301e39
JW
2216 "TARGET_SIMD"
2217 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2218 [(set_attr "type" "neon_mla_<Vetype>_long")]
2219)
2220
d3959070 2221(define_insn "aarch64_<su>mlsl<mode>"
24244e4d
VP
2222 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2223 (minus:<VWIDE>
2224 (match_operand:<VWIDE> 1 "register_operand" "0")
2225 (mult:<VWIDE>
2226 (ANY_EXTEND:<VWIDE>
a844a695 2227 (match_operand:VD_BHSI 2 "register_operand" "w"))
24244e4d 2228 (ANY_EXTEND:<VWIDE>
a844a695 2229 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
24244e4d
VP
2230 "TARGET_SIMD"
2231 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 2232 [(set_attr "type" "neon_mla_<Vetype>_long")]
24244e4d
VP
2233)
2234
8a8e515c
JW
2235(define_insn "aarch64_<su>mlsl_n<mode>"
2236 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a
JW
2237 (minus:<VWIDE>
2238 (match_operand:<VWIDE> 1 "register_operand" "0")
2239 (mult:<VWIDE>
2240 (ANY_EXTEND:<VWIDE>
2241 (match_operand:VD_HSI 2 "register_operand" "w"))
2242 (vec_duplicate:<VWIDE>
2243 (ANY_EXTEND:<VWIDE_S>
2244 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
8a8e515c
JW
2245 "TARGET_SIMD"
2246 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2247 [(set_attr "type" "neon_mla_<Vetype>_long")]
2248)
2249
43e9d192
IB
2250(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2251 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2252 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2253 (match_operand:VQW 1 "register_operand" "w")
2254 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2255 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2256 (match_operand:VQW 2 "register_operand" "w")
2257 (match_dup 3)))))]
2258 "TARGET_SIMD"
a02ad1aa 2259 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
78ec3036 2260 [(set_attr "type" "neon_mul_<Vetype>_long")]
43e9d192
IB
2261)
2262
71c82d0e
JG
2263(define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2264 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2265 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2266 (match_operand:VD_BHSI 1 "register_operand" "w"))
2267 (ANY_EXTEND:<VWIDE>
2268 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2269 "TARGET_SIMD"
2270 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2271 [(set_attr "type" "neon_mul_<Vetype>_long")]
2272)
2273
43e9d192 2274(define_expand "vec_widen_<su>mult_lo_<mode>"
1bbffb87
DZ
2275 [(match_operand:<VWIDE> 0 "register_operand")
2276 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2277 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
43e9d192
IB
2278 "TARGET_SIMD"
2279 {
f5cbabc1 2280 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
43e9d192
IB
2281 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2282 operands[1],
2283 operands[2], p));
2284 DONE;
2285 }
2286)
2287
2288(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2289 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2290 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2291 (match_operand:VQW 1 "register_operand" "w")
2292 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2293 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2294 (match_operand:VQW 2 "register_operand" "w")
2295 (match_dup 3)))))]
2296 "TARGET_SIMD"
a02ad1aa 2297 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 2298 [(set_attr "type" "neon_mul_<Vetype>_long")]
43e9d192
IB
2299)
2300
2301(define_expand "vec_widen_<su>mult_hi_<mode>"
1bbffb87
DZ
2302 [(match_operand:<VWIDE> 0 "register_operand")
2303 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2304 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
43e9d192
IB
2305 "TARGET_SIMD"
2306 {
f5cbabc1 2307 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
2308 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2309 operands[1],
2310 operands[2], p));
2311 DONE;
2312
2313 }
2314)
2315
0b839322
WD
2316;; vmull_lane_s16 intrinsics
2317(define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2318 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2319 (mult:<VWIDE>
2320 (ANY_EXTEND:<VWIDE>
2321 (match_operand:<VCOND> 1 "register_operand" "w"))
3bc9db6a
JW
2322 (vec_duplicate:<VWIDE>
2323 (ANY_EXTEND:<VWIDE_S>
0b839322
WD
2324 (vec_select:<VEL>
2325 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2326 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2327 "TARGET_SIMD"
2328 {
2329 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2330 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2331 }
2332 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
ee4c4fe2
JW
2333)
2334
aa652fb2
JW
2335(define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2336 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2337 (mult:<VWIDE>
3bc9db6a
JW
2338 (ANY_EXTEND:<VWIDE>
2339 (vec_select:<VHALF>
2340 (match_operand:VQ_HSI 1 "register_operand" "w")
2341 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2342 (vec_duplicate:<VWIDE>
2343 (ANY_EXTEND:<VWIDE_S>
2344 (vec_select:<VEL>
2345 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2346 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
aa652fb2
JW
2347 "TARGET_SIMD"
2348 {
2349 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2350 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2351 }
2352 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2353)
2354
2355(define_expand "aarch64_<su>mull_hi_lane<mode>"
2356 [(match_operand:<VWIDE> 0 "register_operand")
2357 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2358 (match_operand:<VCOND> 2 "register_operand")
2359 (match_operand:SI 3 "immediate_operand")]
2360 "TARGET_SIMD"
2361{
2362 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2363 emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2364 operands[1], p, operands[2], operands[3]));
2365 DONE;
2366}
2367)
2368
2369(define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2371 (mult:<VWIDE>
3bc9db6a
JW
2372 (ANY_EXTEND:<VWIDE>
2373 (vec_select:<VHALF>
2374 (match_operand:VQ_HSI 1 "register_operand" "w")
2375 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2376 (vec_duplicate:<VWIDE>
2377 (ANY_EXTEND:<VWIDE_S>
2378 (vec_select:<VEL>
2379 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2380 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
aa652fb2
JW
2381 "TARGET_SIMD"
2382 {
2383 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2384 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2385 }
2386 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2387)
2388
2389(define_expand "aarch64_<su>mull_hi_laneq<mode>"
2390 [(match_operand:<VWIDE> 0 "register_operand")
2391 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2392 (match_operand:<VCONQ> 2 "register_operand")
2393 (match_operand:SI 3 "immediate_operand")]
2394 "TARGET_SIMD"
2395{
2396 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2397 emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2398 operands[1], p, operands[2], operands[3]));
2399 DONE;
2400}
2401)
2402
ee4c4fe2
JW
2403(define_insn "aarch64_<su>mull_n<mode>"
2404 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a
JW
2405 (mult:<VWIDE>
2406 (ANY_EXTEND:<VWIDE>
2407 (match_operand:VD_HSI 1 "register_operand" "w"))
2408 (vec_duplicate:<VWIDE>
2409 (ANY_EXTEND:<VWIDE_S>
0a3eccb6 2410 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
ee4c4fe2
JW
2411 "TARGET_SIMD"
2412 "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2413 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
0b839322
WD
2414)
2415
1d622845
JW
2416(define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2417 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2418 (mult:<VWIDE>
1d622845 2419 (ANY_EXTEND:<VWIDE>
3bc9db6a
JW
2420 (vec_select:<VHALF>
2421 (match_operand:VQ_HSI 1 "register_operand" "w")
2422 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2423 (vec_duplicate:<VWIDE>
2424 (ANY_EXTEND:<VWIDE_S>
1d622845
JW
2425 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2426 "TARGET_SIMD"
2427 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2428 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2429)
2430
2431(define_expand "aarch64_<su>mull_hi_n<mode>"
2432 [(match_operand:<VWIDE> 0 "register_operand")
2433 (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2434 (match_operand:<VEL> 2 "register_operand")]
2435 "TARGET_SIMD"
2436 {
2437 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2438 emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2439 operands[2], p));
2440 DONE;
2441 }
2442)
2443
0b839322
WD
2444;; vmlal_lane_s16 intrinsics
2445(define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2446 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2447 (plus:<VWIDE>
2448 (mult:<VWIDE>
2449 (ANY_EXTEND:<VWIDE>
2450 (match_operand:<VCOND> 2 "register_operand" "w"))
3bc9db6a
JW
2451 (vec_duplicate:<VWIDE>
2452 (ANY_EXTEND:<VWIDE_S>
0b839322
WD
2453 (vec_select:<VEL>
2454 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2455 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2456 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2457 "TARGET_SIMD"
2458 {
2459 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2460 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2461 }
2462 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2463)
2464
9a00ff96
JW
2465(define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2466 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2467 (plus:<VWIDE>
2468 (mult:<VWIDE>
3bc9db6a
JW
2469 (ANY_EXTEND:<VWIDE>
2470 (vec_select:<VHALF>
2471 (match_operand:VQ_HSI 2 "register_operand" "w")
2472 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2473 (vec_duplicate:<VWIDE>
2474 (ANY_EXTEND:<VWIDE_S>
2475 (vec_select:<VEL>
2476 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2477 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
9a00ff96
JW
2478 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2479 "TARGET_SIMD"
2480 {
2481 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2482 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2483 }
2484 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2485)
2486
2487(define_expand "aarch64_<su>mlal_hi_lane<mode>"
2488 [(match_operand:<VWIDE> 0 "register_operand")
2489 (match_operand:<VWIDE> 1 "register_operand")
2490 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2491 (match_operand:<VCOND> 3 "register_operand")
2492 (match_operand:SI 4 "immediate_operand")]
2493 "TARGET_SIMD"
2494{
2495 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2496 emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2497 operands[1], operands[2], p, operands[3], operands[4]));
2498 DONE;
2499}
2500)
2501
2502(define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2503 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2504 (plus:<VWIDE>
2505 (mult:<VWIDE>
3bc9db6a
JW
2506 (ANY_EXTEND:<VWIDE>
2507 (vec_select:<VHALF>
2508 (match_operand:VQ_HSI 2 "register_operand" "w")
2509 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2510 (vec_duplicate:<VWIDE>
2511 (ANY_EXTEND:<VWIDE_S>
2512 (vec_select:<VEL>
2513 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2514 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
9a00ff96
JW
2515 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2516 "TARGET_SIMD"
2517 {
2518 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2519 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2520 }
2521 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2522)
2523
2524(define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2525 [(match_operand:<VWIDE> 0 "register_operand")
2526 (match_operand:<VWIDE> 1 "register_operand")
2527 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2528 (match_operand:<VCONQ> 3 "register_operand")
2529 (match_operand:SI 4 "immediate_operand")]
2530 "TARGET_SIMD"
2531{
2532 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2533 emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2534 operands[1], operands[2], p, operands[3], operands[4]));
2535 DONE;
2536}
2537)
2538
e053f96a
JW
2539(define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2540 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2541 (minus:<VWIDE>
2542 (match_operand:<VWIDE> 1 "register_operand" "0")
2543 (mult:<VWIDE>
2544 (ANY_EXTEND:<VWIDE>
2545 (match_operand:<VCOND> 2 "register_operand" "w"))
3bc9db6a
JW
2546 (vec_duplicate:<VWIDE>
2547 (ANY_EXTEND:<VWIDE_S>
e053f96a
JW
2548 (vec_select:<VEL>
2549 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2550 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2551 "TARGET_SIMD"
2552 {
2553 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2554 return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2555 }
2556 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2557)
2558
9633e579
JW
2559(define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2560 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2561 (minus:<VWIDE>
2562 (match_operand:<VWIDE> 1 "register_operand" "0")
2563 (mult:<VWIDE>
3bc9db6a
JW
2564 (ANY_EXTEND:<VWIDE>
2565 (vec_select:<VHALF>
2566 (match_operand:VQ_HSI 2 "register_operand" "w")
2567 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2568 (vec_duplicate:<VWIDE>
2569 (ANY_EXTEND:<VWIDE_S>
2570 (vec_select:<VEL>
2571 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2572 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
9633e579
JW
2573 )))]
2574 "TARGET_SIMD"
2575 {
2576 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2577 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2578 }
2579 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2580)
2581
2582(define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2583 [(match_operand:<VWIDE> 0 "register_operand")
2584 (match_operand:<VWIDE> 1 "register_operand")
2585 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2586 (match_operand:<VCOND> 3 "register_operand")
2587 (match_operand:SI 4 "immediate_operand")]
2588 "TARGET_SIMD"
2589{
2590 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2591 emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2592 operands[1], operands[2], p, operands[3], operands[4]));
2593 DONE;
2594}
2595)
2596
2597(define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2598 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2599 (minus:<VWIDE>
2600 (match_operand:<VWIDE> 1 "register_operand" "0")
2601 (mult:<VWIDE>
3bc9db6a
JW
2602 (ANY_EXTEND:<VWIDE>
2603 (vec_select:<VHALF>
2604 (match_operand:VQ_HSI 2 "register_operand" "w")
2605 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2606 (vec_duplicate:<VWIDE>
2607 (ANY_EXTEND:<VWIDE_S>
2608 (vec_select:<VEL>
2609 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2610 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
9633e579
JW
2611 )))]
2612 "TARGET_SIMD"
2613 {
2614 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2615 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2616 }
2617 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2618)
2619
2620(define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2621 [(match_operand:<VWIDE> 0 "register_operand")
2622 (match_operand:<VWIDE> 1 "register_operand")
2623 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2624 (match_operand:<VCONQ> 3 "register_operand")
2625 (match_operand:SI 4 "immediate_operand")]
2626 "TARGET_SIMD"
2627{
2628 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2629 emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2630 operands[1], operands[2], p, operands[3], operands[4]));
2631 DONE;
2632}
2633)
2634
43e9d192
IB
2635;; FP vector operations.
2636;; AArch64 AdvSIMD supports single-precision (32-bit) and
2637;; double-precision (64-bit) floating-point data types and arithmetic as
2638;; defined by the IEEE 754-2008 standard. This makes them vectorizable
2639;; without the need for -ffast-math or -funsafe-math-optimizations.
2640;;
2641;; Floating-point operations can raise an exception. Vectorizing such
2642;; operations are safe because of reasons explained below.
2643;;
2644;; ARMv8 permits an extension to enable trapped floating-point
2645;; exception handling, however this is an optional feature. In the
2646;; event of a floating-point exception being raised by vectorised
2647;; code then:
2648;; 1. If trapped floating-point exceptions are available, then a trap
2649;; will be taken when any lane raises an enabled exception. A trap
2650;; handler may determine which lane raised the exception.
2651;; 2. Alternatively a sticky exception flag is set in the
2652;; floating-point status register (FPSR). Software may explicitly
2653;; test the exception flags, in which case the tests will either
2654;; prevent vectorisation, allowing precise identification of the
2655;; failing operation, or if tested outside of vectorisable regions
2656;; then the specific operation and lane are not of interest.
2657
2658;; FP arithmetic operations.
2659
2660(define_insn "add<mode>3"
33d72b63
JW
2661 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2662 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2663 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192
IB
2664 "TARGET_SIMD"
2665 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 2666 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
43e9d192
IB
2667)
2668
2669(define_insn "sub<mode>3"
33d72b63
JW
2670 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2671 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2672 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192
IB
2673 "TARGET_SIMD"
2674 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 2675 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
43e9d192
IB
2676)
2677
2678(define_insn "mul<mode>3"
33d72b63
JW
2679 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2680 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2681 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192
IB
2682 "TARGET_SIMD"
2683 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 2684 [(set_attr "type" "neon_fp_mul_<stype><q>")]
43e9d192
IB
2685)
2686
79a2bc2d 2687(define_expand "div<mode>3"
1bbffb87
DZ
2688 [(set (match_operand:VHSDF 0 "register_operand")
2689 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2690 (match_operand:VHSDF 2 "register_operand")))]
79a2bc2d
EM
2691 "TARGET_SIMD"
2692{
2693 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2694 DONE;
2695
2696 operands[1] = force_reg (<MODE>mode, operands[1]);
2697})
2698
2699(define_insn "*div<mode>3"
33d72b63
JW
2700 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2701 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2702 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192
IB
2703 "TARGET_SIMD"
2704 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 2705 [(set_attr "type" "neon_fp_div_<stype><q>")]
43e9d192
IB
2706)
2707
2708(define_insn "neg<mode>2"
daef0a8c
JW
2709 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2710 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
43e9d192
IB
2711 "TARGET_SIMD"
2712 "fneg\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 2713 [(set_attr "type" "neon_fp_neg_<stype><q>")]
43e9d192
IB
2714)
2715
2716(define_insn "abs<mode>2"
daef0a8c
JW
2717 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2718 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
43e9d192
IB
2719 "TARGET_SIMD"
2720 "fabs\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 2721 [(set_attr "type" "neon_fp_abs_<stype><q>")]
43e9d192
IB
2722)
2723
b0d9aac8
JW
2724(define_expand "aarch64_float_mla<mode>"
2725 [(set (match_operand:VDQF_DF 0 "register_operand")
2726 (plus:VDQF_DF
2727 (mult:VDQF_DF
2728 (match_operand:VDQF_DF 2 "register_operand")
2729 (match_operand:VDQF_DF 3 "register_operand"))
2730 (match_operand:VDQF_DF 1 "register_operand")))]
2731 "TARGET_SIMD"
2732 {
2733 rtx scratch = gen_reg_rtx (<MODE>mode);
2734 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2735 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2736 DONE;
2737 }
2738)
2739
2740(define_expand "aarch64_float_mls<mode>"
2741 [(set (match_operand:VDQF_DF 0 "register_operand")
2742 (minus:VDQF_DF
2743 (match_operand:VDQF_DF 1 "register_operand")
2744 (mult:VDQF_DF
2745 (match_operand:VDQF_DF 2 "register_operand")
2746 (match_operand:VDQF_DF 3 "register_operand"))))]
2747 "TARGET_SIMD"
2748 {
2749 rtx scratch = gen_reg_rtx (<MODE>mode);
2750 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2751 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2752 DONE;
2753 }
2754)
2755
60518e64
JW
2756(define_expand "aarch64_float_mla_n<mode>"
2757 [(set (match_operand:VDQSF 0 "register_operand")
2758 (plus:VDQSF
2759 (mult:VDQSF
2760 (vec_duplicate:VDQSF
2761 (match_operand:<VEL> 3 "register_operand"))
2762 (match_operand:VDQSF 2 "register_operand"))
2763 (match_operand:VDQSF 1 "register_operand")))]
2764 "TARGET_SIMD"
2765 {
2766 rtx scratch = gen_reg_rtx (<MODE>mode);
2767 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2768 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2769 DONE;
2770 }
2771)
2772
2773(define_expand "aarch64_float_mls_n<mode>"
2774 [(set (match_operand:VDQSF 0 "register_operand")
2775 (minus:VDQSF
2776 (match_operand:VDQSF 1 "register_operand")
2777 (mult:VDQSF
2778 (vec_duplicate:VDQSF
2779 (match_operand:<VEL> 3 "register_operand"))
2780 (match_operand:VDQSF 2 "register_operand"))))]
2781 "TARGET_SIMD"
2782 {
2783 rtx scratch = gen_reg_rtx (<MODE>mode);
2784 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2785 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2786 DONE;
2787 }
2788)
2789
1baf4ed8
JW
2790(define_expand "aarch64_float_mla_lane<mode>"
2791 [(set (match_operand:VDQSF 0 "register_operand")
2792 (plus:VDQSF
2793 (mult:VDQSF
2794 (vec_duplicate:VDQSF
2795 (vec_select:<VEL>
2796 (match_operand:V2SF 3 "register_operand")
2797 (parallel [(match_operand:SI 4 "immediate_operand")])))
2798 (match_operand:VDQSF 2 "register_operand"))
2799 (match_operand:VDQSF 1 "register_operand")))]
2800 "TARGET_SIMD"
2801 {
2802 rtx scratch = gen_reg_rtx (<MODE>mode);
2803 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2804 operands[3], operands[4]));
2805 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2806 DONE;
2807 }
2808)
2809
2810(define_expand "aarch64_float_mls_lane<mode>"
2811 [(set (match_operand:VDQSF 0 "register_operand")
2812 (minus:VDQSF
2813 (match_operand:VDQSF 1 "register_operand")
2814 (mult:VDQSF
2815 (vec_duplicate:VDQSF
2816 (vec_select:<VEL>
2817 (match_operand:V2SF 3 "register_operand")
2818 (parallel [(match_operand:SI 4 "immediate_operand")])))
2819 (match_operand:VDQSF 2 "register_operand"))))]
2820 "TARGET_SIMD"
2821 {
2822 rtx scratch = gen_reg_rtx (<MODE>mode);
2823 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2824 operands[3], operands[4]));
2825 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2826 DONE;
2827 }
2828)
2829
d388179a
JW
2830(define_expand "aarch64_float_mla_laneq<mode>"
2831 [(set (match_operand:VDQSF 0 "register_operand")
2832 (plus:VDQSF
2833 (mult:VDQSF
2834 (vec_duplicate:VDQSF
2835 (vec_select:<VEL>
2836 (match_operand:V4SF 3 "register_operand")
2837 (parallel [(match_operand:SI 4 "immediate_operand")])))
2838 (match_operand:VDQSF 2 "register_operand"))
2839 (match_operand:VDQSF 1 "register_operand")))]
2840 "TARGET_SIMD"
2841 {
2842 rtx scratch = gen_reg_rtx (<MODE>mode);
2843 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2844 operands[3], operands[4]));
2845 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2846 DONE;
2847 }
2848)
2849
2850(define_expand "aarch64_float_mls_laneq<mode>"
2851 [(set (match_operand:VDQSF 0 "register_operand")
2852 (minus:VDQSF
2853 (match_operand:VDQSF 1 "register_operand")
2854 (mult:VDQSF
2855 (vec_duplicate:VDQSF
2856 (vec_select:<VEL>
2857 (match_operand:V4SF 3 "register_operand")
2858 (parallel [(match_operand:SI 4 "immediate_operand")])))
2859 (match_operand:VDQSF 2 "register_operand"))))]
2860 "TARGET_SIMD"
2861 {
2862 rtx scratch = gen_reg_rtx (<MODE>mode);
2863 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2864 operands[3], operands[4]));
2865 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2866 DONE;
2867 }
2868)
2869
43e9d192 2870(define_insn "fma<mode>4"
89ed6d5f
JW
2871 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2872 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2873 (match_operand:VHSDF 2 "register_operand" "w")
2874 (match_operand:VHSDF 3 "register_operand" "0")))]
43e9d192
IB
2875 "TARGET_SIMD"
2876 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
89ed6d5f 2877 [(set_attr "type" "neon_fp_mla_<stype><q>")]
43e9d192
IB
2878)
2879
828e70c1
JG
2880(define_insn "*aarch64_fma4_elt<mode>"
2881 [(set (match_operand:VDQF 0 "register_operand" "=w")
2882 (fma:VDQF
2883 (vec_duplicate:VDQF
2884 (vec_select:<VEL>
2885 (match_operand:VDQF 1 "register_operand" "<h_con>")
2886 (parallel [(match_operand:SI 2 "immediate_operand")])))
2887 (match_operand:VDQF 3 "register_operand" "w")
2888 (match_operand:VDQF 4 "register_operand" "0")))]
2889 "TARGET_SIMD"
1dd055a2 2890 {
7ac29c0f 2891 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3e5c062e 2892 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1dd055a2 2893 }
78ec3036 2894 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
828e70c1
JG
2895)
2896
2897(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2898 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2899 (fma:VDQSF
2900 (vec_duplicate:VDQSF
2901 (vec_select:<VEL>
2902 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2903 (parallel [(match_operand:SI 2 "immediate_operand")])))
2904 (match_operand:VDQSF 3 "register_operand" "w")
2905 (match_operand:VDQSF 4 "register_operand" "0")))]
2906 "TARGET_SIMD"
1dd055a2 2907 {
7ac29c0f 2908 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3e5c062e 2909 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1dd055a2 2910 }
78ec3036 2911 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
828e70c1
JG
2912)
2913
98b3a5f2
JW
2914(define_insn "*aarch64_fma4_elt_from_dup<mode>"
2915 [(set (match_operand:VMUL 0 "register_operand" "=w")
2916 (fma:VMUL
2917 (vec_duplicate:VMUL
6d06971d 2918 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
98b3a5f2
JW
2919 (match_operand:VMUL 2 "register_operand" "w")
2920 (match_operand:VMUL 3 "register_operand" "0")))]
828e70c1 2921 "TARGET_SIMD"
98b3a5f2 2922 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
ab2e8f01 2923 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
828e70c1
JG
2924)
2925
2926(define_insn "*aarch64_fma4_elt_to_64v2df"
2927 [(set (match_operand:DF 0 "register_operand" "=w")
2928 (fma:DF
2929 (vec_select:DF
2930 (match_operand:V2DF 1 "register_operand" "w")
2931 (parallel [(match_operand:SI 2 "immediate_operand")]))
2932 (match_operand:DF 3 "register_operand" "w")
2933 (match_operand:DF 4 "register_operand" "0")))]
2934 "TARGET_SIMD"
1dd055a2 2935 {
7ac29c0f 2936 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3e5c062e 2937 return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
1dd055a2 2938 }
78ec3036 2939 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
828e70c1
JG
2940)
2941
2942(define_insn "fnma<mode>4"
89ed6d5f
JW
2943 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2944 (fma:VHSDF
a7fef189
SE
2945 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2946 (match_operand:VHSDF 2 "register_operand" "w")
89ed6d5f 2947 (match_operand:VHSDF 3 "register_operand" "0")))]
828e70c1 2948 "TARGET_SIMD"
89ed6d5f
JW
2949 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2950 [(set_attr "type" "neon_fp_mla_<stype><q>")]
828e70c1
JG
2951)
2952
2953(define_insn "*aarch64_fnma4_elt<mode>"
2954 [(set (match_operand:VDQF 0 "register_operand" "=w")
2955 (fma:VDQF
2956 (neg:VDQF
2957 (match_operand:VDQF 3 "register_operand" "w"))
2958 (vec_duplicate:VDQF
2959 (vec_select:<VEL>
2960 (match_operand:VDQF 1 "register_operand" "<h_con>")
2961 (parallel [(match_operand:SI 2 "immediate_operand")])))
2962 (match_operand:VDQF 4 "register_operand" "0")))]
2963 "TARGET_SIMD"
1dd055a2 2964 {
7ac29c0f 2965 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3e5c062e 2966 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1dd055a2 2967 }
78ec3036 2968 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
828e70c1
JG
2969)
2970
2971(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2972 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2973 (fma:VDQSF
2974 (neg:VDQSF
2975 (match_operand:VDQSF 3 "register_operand" "w"))
2976 (vec_duplicate:VDQSF
2977 (vec_select:<VEL>
2978 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2979 (parallel [(match_operand:SI 2 "immediate_operand")])))
2980 (match_operand:VDQSF 4 "register_operand" "0")))]
2981 "TARGET_SIMD"
1dd055a2 2982 {
7ac29c0f 2983 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3e5c062e 2984 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1dd055a2 2985 }
78ec3036 2986 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
828e70c1
JG
2987)
2988
98b3a5f2
JW
2989(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2990 [(set (match_operand:VMUL 0 "register_operand" "=w")
2991 (fma:VMUL
2992 (neg:VMUL
2993 (match_operand:VMUL 2 "register_operand" "w"))
2994 (vec_duplicate:VMUL
6d06971d 2995 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
98b3a5f2 2996 (match_operand:VMUL 3 "register_operand" "0")))]
828e70c1 2997 "TARGET_SIMD"
98b3a5f2 2998 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
ab2e8f01 2999 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
828e70c1
JG
3000)
3001
3002(define_insn "*aarch64_fnma4_elt_to_64v2df"
3003 [(set (match_operand:DF 0 "register_operand" "=w")
3004 (fma:DF
3005 (vec_select:DF
3006 (match_operand:V2DF 1 "register_operand" "w")
3007 (parallel [(match_operand:SI 2 "immediate_operand")]))
3008 (neg:DF
3009 (match_operand:DF 3 "register_operand" "w"))
3010 (match_operand:DF 4 "register_operand" "0")))]
3011 "TARGET_SIMD"
1dd055a2 3012 {
7ac29c0f 3013 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3e5c062e 3014 return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
1dd055a2 3015 }
78ec3036 3016 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
828e70c1
JG
3017)
3018
0659ce6f 3019;; Vector versions of the floating-point frint patterns.
74dc11ed 3020;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
0659ce6f 3021(define_insn "<frint_pattern><mode>2"
daef0a8c
JW
3022 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3023 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3024 FRINT))]
42fc9a7f
JG
3025 "TARGET_SIMD"
3026 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 3027 [(set_attr "type" "neon_fp_round_<stype><q>")]
42fc9a7f
JG
3028)
3029
ce966824
JG
3030;; Vector versions of the fcvt standard patterns.
3031;; Expands to lbtrunc, lround, lceil, lfloor
daef0a8c 3032(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
42fc9a7f
JG
3033 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3034 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
daef0a8c 3035 [(match_operand:VHSDF 1 "register_operand" "w")]
42fc9a7f
JG
3036 FCVT)))]
3037 "TARGET_SIMD"
3038 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 3039 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
42fc9a7f
JG
3040)
3041
d7f33f07
JW
3042;; HF Scalar variants of related SIMD instructions.
3043(define_insn "l<fcvt_pattern><su_optab>hfhi2"
3044 [(set (match_operand:HI 0 "register_operand" "=w")
3045 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3046 FCVT)))]
3047 "TARGET_SIMD_F16INST"
3048 "fcvt<frint_suffix><su>\t%h0, %h1"
3049 [(set_attr "type" "neon_fp_to_int_s")]
3050)
3051
3052(define_insn "<optab>_trunchfhi2"
3053 [(set (match_operand:HI 0 "register_operand" "=w")
3054 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3055 "TARGET_SIMD_F16INST"
3056 "fcvtz<su>\t%h0, %h1"
3057 [(set_attr "type" "neon_fp_to_int_s")]
3058)
3059
3060(define_insn "<optab>hihf2"
3061 [(set (match_operand:HF 0 "register_operand" "=w")
3062 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3063 "TARGET_SIMD_F16INST"
3064 "<su_optab>cvtf\t%h0, %h1"
3065 [(set_attr "type" "neon_int_to_fp_s")]
3066)
3067
39252973
KT
3068(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3069 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3070 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3071 [(mult:VDQF
3072 (match_operand:VDQF 1 "register_operand" "w")
3073 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3074 UNSPEC_FRINTZ)))]
3075 "TARGET_SIMD
3076 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3077 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3078 {
3079 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3080 char buf[64];
3081 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3082 output_asm_insn (buf, operands);
3083 return "";
3084 }
3085 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3086)
3087
daef0a8c 3088(define_expand "<optab><VHSDF:mode><fcvt_target>2"
384be29f
JG
3089 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3090 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
daef0a8c
JW
3091 [(match_operand:VHSDF 1 "register_operand")]
3092 UNSPEC_FRINTZ)))]
384be29f
JG
3093 "TARGET_SIMD"
3094 {})
3095
daef0a8c 3096(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
384be29f
JG
3097 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3098 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
daef0a8c
JW
3099 [(match_operand:VHSDF 1 "register_operand")]
3100 UNSPEC_FRINTZ)))]
384be29f
JG
3101 "TARGET_SIMD"
3102 {})
3103
daef0a8c
JW
3104(define_expand "ftrunc<VHSDF:mode>2"
3105 [(set (match_operand:VHSDF 0 "register_operand")
3106 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3107 UNSPEC_FRINTZ))]
384be29f
JG
3108 "TARGET_SIMD"
3109 {})
3110
daef0a8c
JW
3111(define_insn "<optab><fcvt_target><VHSDF:mode>2"
3112 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3113 (FLOATUORS:VHSDF
1709ff9b
JG
3114 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3115 "TARGET_SIMD"
3116 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 3117 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1709ff9b
JG
3118)
3119
4c871069
JG
3120;; Conversions between vectors of floats and doubles.
3121;; Contains a mix of patterns to match standard pattern names
3122;; and those for intrinsics.
3123
3124;; Float widening operations.
3125
03873eb9
AL
3126(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3127 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3128 (float_extend:<VWIDE> (vec_select:<VHALF>
3129 (match_operand:VQ_HSF 1 "register_operand" "w")
3130 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3131 )))]
4c871069 3132 "TARGET_SIMD"
03873eb9 3133 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
78ec3036 3134 [(set_attr "type" "neon_fp_cvt_widen_s")]
4c871069
JG
3135)
3136
2644d4d9
JW
3137;; Convert between fixed-point and floating-point (vector modes)
3138
33d72b63
JW
3139(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3140 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3141 (unspec:<VHSDF:FCVT_TARGET>
3142 [(match_operand:VHSDF 1 "register_operand" "w")
3143 (match_operand:SI 2 "immediate_operand" "i")]
2644d4d9
JW
3144 FCVT_F2FIXED))]
3145 "TARGET_SIMD"
3146 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
33d72b63 3147 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2644d4d9
JW
3148)
3149
33d72b63
JW
3150(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3151 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3152 (unspec:<VDQ_HSDI:FCVT_TARGET>
3153 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3154 (match_operand:SI 2 "immediate_operand" "i")]
2644d4d9
JW
3155 FCVT_FIXED2F))]
3156 "TARGET_SIMD"
3157 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
33d72b63 3158 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2644d4d9
JW
3159)
3160
d5d27976
JG
3161;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3162;; is inconsistent with vector ordering elsewhere in the compiler, in that
3163;; the meaning of HI and LO changes depending on the target endianness.
3164;; While elsewhere we map the higher numbered elements of a vector to
3165;; the lower architectural lanes of the vector, for these patterns we want
3166;; to always treat "hi" as referring to the higher architectural lanes.
3167;; Consequently, while the patterns below look inconsistent with our
9c582551 3168;; other big-endian patterns their behavior is as required.
d5d27976 3169
03873eb9 3170(define_expand "vec_unpacks_lo_<mode>"
1bbffb87
DZ
3171 [(match_operand:<VWIDE> 0 "register_operand")
3172 (match_operand:VQ_HSF 1 "register_operand")]
4c871069 3173 "TARGET_SIMD"
03873eb9 3174 {
f5cbabc1 3175 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
03873eb9
AL
3176 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3177 operands[1], p));
3178 DONE;
3179 }
3180)
3181
3182(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3183 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3184 (float_extend:<VWIDE> (vec_select:<VHALF>
3185 (match_operand:VQ_HSF 1 "register_operand" "w")
3186 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3187 )))]
3188 "TARGET_SIMD"
3189 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
78ec3036 3190 [(set_attr "type" "neon_fp_cvt_widen_s")]
4c871069
JG
3191)
3192
03873eb9 3193(define_expand "vec_unpacks_hi_<mode>"
1bbffb87
DZ
3194 [(match_operand:<VWIDE> 0 "register_operand")
3195 (match_operand:VQ_HSF 1 "register_operand")]
03873eb9
AL
3196 "TARGET_SIMD"
3197 {
f5cbabc1 3198 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
03873eb9
AL
3199 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3200 operands[1], p));
3201 DONE;
3202 }
3203)
3204(define_insn "aarch64_float_extend_lo_<Vwide>"
3205 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3206 (float_extend:<VWIDE>
3207 (match_operand:VDF 1 "register_operand" "w")))]
4c871069 3208 "TARGET_SIMD"
03873eb9 3209 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
78ec3036 3210 [(set_attr "type" "neon_fp_cvt_widen_s")]
4c871069
JG
3211)
3212
3213;; Float narrowing operations.
3214
8456a4cd
JW
3215(define_insn "aarch64_float_trunc_rodd_df"
3216 [(set (match_operand:SF 0 "register_operand" "=w")
3217 (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3218 UNSPEC_FCVTXN))]
3219 "TARGET_SIMD"
3220 "fcvtxn\\t%s0, %d1"
3221 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3222)
3223
3224(define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3225 [(set (match_operand:V2SF 0 "register_operand" "=w")
3226 (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3227 UNSPEC_FCVTXN))]
3228 "TARGET_SIMD"
3229 "fcvtxn\\t%0.2s, %1.2d"
3230 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3231)
3232
3233(define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3234 [(set (match_operand:V4SF 0 "register_operand" "=w")
3235 (vec_concat:V4SF
3236 (match_operand:V2SF 1 "register_operand" "0")
3237 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3238 UNSPEC_FCVTXN)))]
3239 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3240 "fcvtxn2\\t%0.4s, %2.2d"
3241 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3242)
3243
3244(define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3245 [(set (match_operand:V4SF 0 "register_operand" "=w")
3246 (vec_concat:V4SF
3247 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3248 UNSPEC_FCVTXN)
3249 (match_operand:V2SF 1 "register_operand" "0")))]
3250 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3251 "fcvtxn2\\t%0.4s, %2.2d"
3252 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3253)
3254
3255(define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3256 [(match_operand:V4SF 0 "register_operand")
3257 (match_operand:V2SF 1 "register_operand")
3258 (match_operand:V2DF 2 "register_operand")]
3259 "TARGET_SIMD"
3260{
3261 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3262 ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3263 : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3264 emit_insn (gen (operands[0], operands[1], operands[2]));
3265 DONE;
3266}
3267)
3268
922f9c25
AL
3269(define_insn "aarch64_float_truncate_lo_<mode>"
3270 [(set (match_operand:VDF 0 "register_operand" "=w")
3271 (float_truncate:VDF
3272 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4c871069 3273 "TARGET_SIMD"
922f9c25 3274 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
78ec3036 3275 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
4c871069
JG
3276)
3277
d5d27976 3278(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
922f9c25
AL
3279 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3280 (vec_concat:<VDBL>
3281 (match_operand:VDF 1 "register_operand" "0")
3282 (float_truncate:VDF
3283 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
d5d27976 3284 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
922f9c25 3285 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
78ec3036 3286 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
4c871069
JG
3287)
3288
d5d27976
JG
3289(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3290 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3291 (vec_concat:<VDBL>
3292 (float_truncate:VDF
3293 (match_operand:<VWIDE> 2 "register_operand" "w"))
3294 (match_operand:VDF 1 "register_operand" "0")))]
3295 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3296 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3297 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3298)
3299
3300(define_expand "aarch64_float_truncate_hi_<Vdbl>"
1bbffb87
DZ
3301 [(match_operand:<VDBL> 0 "register_operand")
3302 (match_operand:VDF 1 "register_operand")
3303 (match_operand:<VWIDE> 2 "register_operand")]
d5d27976
JG
3304 "TARGET_SIMD"
3305{
3306 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3307 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3308 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3309 emit_insn (gen (operands[0], operands[1], operands[2]));
3310 DONE;
3311}
3312)
3313
4c871069
JG
3314(define_expand "vec_pack_trunc_v2df"
3315 [(set (match_operand:V4SF 0 "register_operand")
3316 (vec_concat:V4SF
3317 (float_truncate:V2SF
3318 (match_operand:V2DF 1 "register_operand"))
3319 (float_truncate:V2SF
3320 (match_operand:V2DF 2 "register_operand"))
3321 ))]
3322 "TARGET_SIMD"
3323 {
3324 rtx tmp = gen_reg_rtx (V2SFmode);
8fcc1c1f
TB
3325 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3326 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3327
3328 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
4c871069 3329 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
8fcc1c1f 3330 tmp, operands[hi]));
4c871069
JG
3331 DONE;
3332 }
3333)
3334
3335(define_expand "vec_pack_trunc_df"
3336 [(set (match_operand:V2SF 0 "register_operand")
bce43c04
RS
3337 (vec_concat:V2SF
3338 (float_truncate:SF (match_operand:DF 1 "general_operand"))
3339 (float_truncate:SF (match_operand:DF 2 "general_operand"))))]
4c871069
JG
3340 "TARGET_SIMD"
3341 {
3342 rtx tmp = gen_reg_rtx (V2SFmode);
bce43c04 3343 emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
4c871069
JG
3344 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3345 DONE;
3346 }
3347)
3348
43e9d192
IB
3349;; FP Max/Min
3350;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
3351;; expression like:
3352;; a = (b < c) ? b : c;
7a335530
IB
3353;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3354;; -fno-signed-zeros are enabled either explicitly or indirectly via
3355;; -ffast-math.
43e9d192
IB
3356;;
3357;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3358;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3359;; operand will be returned when both operands are zero (i.e. they may not
3360;; honour signed zeroes), or when either operand is NaN. Therefore GCC
3361;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3362;; NaNs.
3363
998eaf97 3364(define_insn "<su><maxmin><mode>3"
33d72b63
JW
3365 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3366 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3367 (match_operand:VHSDF 2 "register_operand" "w")))]
43e9d192 3368 "TARGET_SIMD"
998eaf97 3369 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 3370 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
43e9d192
IB
3371)
3372
1efafef3
TC
3373;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3374;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3375;; which implement the IEEE fmax ()/fmin () functions.
6d331688 3376(define_insn "<fmaxmin><mode>3"
33d72b63
JW
3377 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3378 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3379 (match_operand:VHSDF 2 "register_operand" "w")]
3380 FMAXMIN_UNS))]
43e9d192 3381 "TARGET_SIMD"
998eaf97 3382 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 3383 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
43e9d192
IB
3384)
3385
36054fab 3386;; 'across lanes' add.
43e9d192 3387
3629030e 3388(define_insn "aarch64_faddp<mode>"
33d72b63
JW
3389 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3390 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3391 (match_operand:VHSDF 2 "register_operand" "w")]
3392 UNSPEC_FADDV))]
3629030e
JW
3393 "TARGET_SIMD"
3394 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
33d72b63 3395 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3629030e
JW
3396)
3397
024edf08
TC
3398(define_insn "reduc_plus_scal_<mode>"
3399 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3400 (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
f5156c3e 3401 UNSPEC_ADDV))]
43e9d192 3402 "TARGET_SIMD"
92835317 3403 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
78ec3036 3404 [(set_attr "type" "neon_reduc_add<q>")]
cb995de6
KT
3405)
3406
024edf08
TC
3407(define_insn "reduc_plus_scal_v2si"
3408 [(set (match_operand:SI 0 "register_operand" "=w")
3409 (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
3410 UNSPEC_ADDV))]
fa18085a 3411 "TARGET_SIMD"
024edf08
TC
3412 "addp\\t%0.2s, %1.2s, %1.2s"
3413 [(set_attr "type" "neon_reduc_add")]
fa18085a
JW
3414)
3415
9921bbf9
WD
3416;; ADDV with result zero-extended to SI/DImode (for popcount).
3417(define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3418 [(set (match_operand:GPI 0 "register_operand" "=w")
3419 (zero_extend:GPI
3420 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3421 UNSPEC_ADDV)))]
3422 "TARGET_SIMD"
3423 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3424 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3425)
3426
628d13d9
JG
3427(define_insn "reduc_plus_scal_<mode>"
3428 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3429 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
92835317 3430 UNSPEC_FADDV))]
43e9d192 3431 "TARGET_SIMD"
36054fab 3432 "faddp\\t%<Vetype>0, %1.<Vtype>"
78ec3036 3433 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
43e9d192
IB
3434)
3435
f5156c3e
AL
3436(define_expand "reduc_plus_scal_v4sf"
3437 [(set (match_operand:SF 0 "register_operand")
024edf08 3438 (unspec:SF [(match_operand:V4SF 1 "register_operand")]
92835317 3439 UNSPEC_FADDV))]
36054fab
JG
3440 "TARGET_SIMD"
3441{
7ac29c0f 3442 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
f5156c3e 3443 rtx scratch = gen_reg_rtx (V4SFmode);
3629030e
JW
3444 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3445 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
f5156c3e 3446 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
36054fab
JG
3447 DONE;
3448})
3449
024edf08
TC
3450(define_insn "aarch64_<su>addlv<mode>"
3451 [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3452 (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
3453 USADDLV))]
3454 "TARGET_SIMD"
3455 "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3456 [(set_attr "type" "neon_reduc_add<q>")]
3457)
3458
3459(define_insn "aarch64_<su>addlp<mode>"
3460 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3461 (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")]
3462 USADDLP))]
3463 "TARGET_SIMD"
3464 "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3465 [(set_attr "type" "neon_reduc_add<q>")]
3466)
3467
a5e69cad
FY
3468(define_insn "clrsb<mode>2"
3469 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3470 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3471 "TARGET_SIMD"
3472 "cls\\t%0.<Vtype>, %1.<Vtype>"
3473 [(set_attr "type" "neon_cls<q>")]
3474)
3475
b5574232
VP
3476(define_insn "clz<mode>2"
3477 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3478 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3479 "TARGET_SIMD"
3480 "clz\\t%0.<Vtype>, %1.<Vtype>"
78ec3036 3481 [(set_attr "type" "neon_cls<q>")]
b5574232
VP
3482)
3483
a5e69cad
FY
3484(define_insn "popcount<mode>2"
3485 [(set (match_operand:VB 0 "register_operand" "=w")
3486 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3487 "TARGET_SIMD"
3488 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3489 [(set_attr "type" "neon_cnt<q>")]
3490)
3491
998eaf97
JG
3492;; 'across lanes' max and min ops.
3493
64b0f928 3494;; Template for outputting a scalar, so we can create __builtins which can be
16d24520 3495;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
6d331688 3496(define_expand "reduc_<optab>_scal_<mode>"
64b0f928 3497 [(match_operand:<VEL> 0 "register_operand")
e32b9eb3
RS
3498 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3499 FMAXMINV)]
64b0f928
AL
3500 "TARGET_SIMD"
3501 {
7ac29c0f 3502 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
64b0f928 3503 rtx scratch = gen_reg_rtx (<MODE>mode);
6d331688
RS
3504 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3505 operands[1]));
64b0f928
AL
3506 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3507 DONE;
3508 }
3509)
3510
e32b9eb3
RS
3511(define_expand "reduc_<fmaxmin>_scal_<mode>"
3512 [(match_operand:<VEL> 0 "register_operand")
3513 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3514 FMAXMINNMV)]
3515 "TARGET_SIMD"
3516 {
3517 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3518 DONE;
3519 }
3520)
3521
64b0f928 3522;; Likewise for integer cases, signed and unsigned.
6d331688 3523(define_expand "reduc_<optab>_scal_<mode>"
64b0f928
AL
3524 [(match_operand:<VEL> 0 "register_operand")
3525 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3526 MAXMINV)]
3527 "TARGET_SIMD"
3528 {
7ac29c0f 3529 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
64b0f928 3530 rtx scratch = gen_reg_rtx (<MODE>mode);
6d331688
RS
3531 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3532 operands[1]));
64b0f928
AL
3533 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3534 DONE;
3535 }
3536)
3537
6d331688 3538(define_insn "aarch64_reduc_<optab>_internal<mode>"
92835317
TB
3539 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3540 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
43e9d192
IB
3541 MAXMINV))]
3542 "TARGET_SIMD"
998eaf97 3543 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
78ec3036 3544 [(set_attr "type" "neon_reduc_minmax<q>")]
43e9d192
IB
3545)
3546
6d331688 3547(define_insn "aarch64_reduc_<optab>_internalv2si"
43e9d192
IB
3548 [(set (match_operand:V2SI 0 "register_operand" "=w")
3549 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3550 MAXMINV))]
3551 "TARGET_SIMD"
998eaf97 3552 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
78ec3036 3553 [(set_attr "type" "neon_reduc_minmax")]
43e9d192
IB
3554)
3555
6d331688 3556(define_insn "aarch64_reduc_<optab>_internal<mode>"
703bbcdf
JW
3557 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3558 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3559 FMAXMINV))]
998eaf97 3560 "TARGET_SIMD"
64b0f928 3561 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
703bbcdf 3562 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
998eaf97
JG
3563)
3564
09962a4a
JG
3565;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3566;; allocation.
3567;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3568;; to select.
3569;;
3570;; Thus our BSL is of the form:
3571;; op0 = bsl (mask, op2, op3)
4fda1ad1 3572;; We can use any of:
09962a4a
JG
3573;;
3574;; if (op0 = mask)
3575;; bsl mask, op1, op2
3576;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3577;; bit op0, op2, mask
3578;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3579;; bif op0, op1, mask
3297949e
KT
3580;;
3581;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3582;; Some forms of straight-line code may generate the equivalent form
3583;; in *aarch64_simd_bsl<mode>_alt.
4fda1ad1
JG
3584
3585(define_insn "aarch64_simd_bsl<mode>_internal"
aea4b54a
JG
3586 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3587 (xor:VDQ_I
3588 (and:VDQ_I
3589 (xor:VDQ_I
5f565314 3590 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
aea4b54a
JG
3591 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
3592 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
5f565314 3593 (match_dup:<V_INT_EQUIV> 3)
09962a4a 3594 ))]
4fda1ad1
JG
3595 "TARGET_SIMD"
3596 "@
3597 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3598 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3599 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
78ec3036 3600 [(set_attr "type" "neon_bsl<q>")]
4fda1ad1
JG
3601)
3602
3297949e
KT
3603;; We need this form in addition to the above pattern to match the case
3604;; when combine tries merging three insns such that the second operand of
3605;; the outer XOR matches the second operand of the inner XOR rather than
3606;; the first. The two are equivalent but since recog doesn't try all
3607;; permutations of commutative operations, we have to have a separate pattern.
3608
3609(define_insn "*aarch64_simd_bsl<mode>_alt"
aea4b54a
JG
3610 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3611 (xor:VDQ_I
3612 (and:VDQ_I
3613 (xor:VDQ_I
3614 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
3615 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
3616 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3617 (match_dup:<V_INT_EQUIV> 2)))]
3297949e
KT
3618 "TARGET_SIMD"
3619 "@
3620 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3621 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3622 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
3623 [(set_attr "type" "neon_bsl<q>")]
3624)
3625
aea4b54a
JG
3626;; DImode is special, we want to avoid computing operations which are
3627;; more naturally computed in general purpose registers in the vector
3628;; registers. If we do that, we need to move all three operands from general
3629;; purpose registers to vector registers, then back again. However, we
3630;; don't want to make this pattern an UNSPEC as we'd lose scope for
3631;; optimizations based on the component operations of a BSL.
3632;;
3633;; That means we need a splitter back to the individual operations, if they
3634;; would be better calculated on the integer side.
3635
3636(define_insn_and_split "aarch64_simd_bsldi_internal"
3637 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3638 (xor:DI
3639 (and:DI
3640 (xor:DI
3641 (match_operand:DI 3 "register_operand" "w,0,w,r")
3642 (match_operand:DI 2 "register_operand" "w,w,0,r"))
3643 (match_operand:DI 1 "register_operand" "0,w,w,r"))
3644 (match_dup:DI 3)
3645 ))]
3646 "TARGET_SIMD"
3647 "@
3648 bsl\\t%0.8b, %2.8b, %3.8b
3649 bit\\t%0.8b, %2.8b, %1.8b
3650 bif\\t%0.8b, %3.8b, %1.8b
3651 #"
6acc5948 3652 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
aea4b54a
JG
3653 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3654{
3655 /* Split back to individual operations. If we're before reload, and
3656 able to create a temporary register, do so. If we're after reload,
3657 we've got an early-clobber destination register, so use that.
3658 Otherwise, we can't create pseudos and we can't yet guarantee that
3659 operands[0] is safe to write, so FAIL to split. */
3660
3661 rtx scratch;
3662 if (reload_completed)
3663 scratch = operands[0];
3664 else if (can_create_pseudo_p ())
3665 scratch = gen_reg_rtx (DImode);
3666 else
3667 FAIL;
3668
3669 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3670 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3671 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3672 DONE;
3673}
3674 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3675 (set_attr "length" "4,4,4,12")]
3676)
3677
3678(define_insn_and_split "aarch64_simd_bsldi_alt"
3679 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3680 (xor:DI
3681 (and:DI
3682 (xor:DI
3683 (match_operand:DI 3 "register_operand" "w,w,0,r")
3684 (match_operand:DI 2 "register_operand" "w,0,w,r"))
3685 (match_operand:DI 1 "register_operand" "0,w,w,r"))
3686 (match_dup:DI 2)
3687 ))]
3688 "TARGET_SIMD"
3689 "@
3690 bsl\\t%0.8b, %3.8b, %2.8b
3691 bit\\t%0.8b, %3.8b, %1.8b
3692 bif\\t%0.8b, %2.8b, %1.8b
3693 #"
6acc5948 3694 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
aea4b54a
JG
3695 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3696{
3697 /* Split back to individual operations. If we're before reload, and
3698 able to create a temporary register, do so. If we're after reload,
3699 we've got an early-clobber destination register, so use that.
3700 Otherwise, we can't create pseudos and we can't yet guarantee that
3701 operands[0] is safe to write, so FAIL to split. */
3702
3703 rtx scratch;
3704 if (reload_completed)
3705 scratch = operands[0];
3706 else if (can_create_pseudo_p ())
3707 scratch = gen_reg_rtx (DImode);
3708 else
3709 FAIL;
3710
3711 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3712 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3713 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3714 DONE;
3715}
3716 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3717 (set_attr "length" "4,4,4,12")]
3718)
3719
4fda1ad1 3720(define_expand "aarch64_simd_bsl<mode>"
46e778c4 3721 [(match_operand:VALLDIF 0 "register_operand")
5f565314 3722 (match_operand:<V_INT_EQUIV> 1 "register_operand")
46e778c4
JG
3723 (match_operand:VALLDIF 2 "register_operand")
3724 (match_operand:VALLDIF 3 "register_operand")]
09962a4a 3725 "TARGET_SIMD"
4fda1ad1
JG
3726{
3727 /* We can't alias operands together if they have different modes. */
c8824f2c
JG
3728 rtx tmp = operands[0];
3729 if (FLOAT_MODE_P (<MODE>mode))
3730 {
5f565314
RS
3731 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3732 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3733 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
c8824f2c 3734 }
5f565314
RS
3735 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3736 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3737 operands[1],
3738 operands[2],
3739 operands[3]));
c8824f2c
JG
3740 if (tmp != operands[0])
3741 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3742
09962a4a 3743 DONE;
4fda1ad1
JG
3744})
3745
5f565314 3746(define_expand "vcond_mask_<mode><v_int_equiv>"
45d569f3
AL
3747 [(match_operand:VALLDI 0 "register_operand")
3748 (match_operand:VALLDI 1 "nonmemory_operand")
3749 (match_operand:VALLDI 2 "nonmemory_operand")
5f565314 3750 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
45d569f3
AL
3751 "TARGET_SIMD"
3752{
3753 /* If we have (a = (P) ? -1 : 0);
3754 Then we can simply move the generated mask (result must be int). */
3755 if (operands[1] == CONSTM1_RTX (<MODE>mode)
3756 && operands[2] == CONST0_RTX (<MODE>mode))
3757 emit_move_insn (operands[0], operands[3]);
3758 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
3759 else if (operands[1] == CONST0_RTX (<MODE>mode)
3760 && operands[2] == CONSTM1_RTX (<MODE>mode))
5f565314 3761 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
45d569f3
AL
3762 else
3763 {
3764 if (!REG_P (operands[1]))
3765 operands[1] = force_reg (<MODE>mode, operands[1]);
3766 if (!REG_P (operands[2]))
3767 operands[2] = force_reg (<MODE>mode, operands[2]);
3768 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3769 operands[1], operands[2]));
3770 }
3771
3772 DONE;
3773})
3774
3775;; Patterns comparing two vectors to produce a mask.
3776
3777(define_expand "vec_cmp<mode><mode>"
3778 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3779 (match_operator 1 "comparison_operator"
3780 [(match_operand:VSDQ_I_DI 2 "register_operand")
3781 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3782 "TARGET_SIMD"
3783{
3784 rtx mask = operands[0];
3785 enum rtx_code code = GET_CODE (operands[1]);
3786
3787 switch (code)
3788 {
3789 case NE:
3790 case LE:
3791 case LT:
3792 case GE:
3793 case GT:
3794 case EQ:
3795 if (operands[3] == CONST0_RTX (<MODE>mode))
3796 break;
3797
3798 /* Fall through. */
3799 default:
3800 if (!REG_P (operands[3]))
3801 operands[3] = force_reg (<MODE>mode, operands[3]);
3802
3803 break;
3804 }
3805
3806 switch (code)
3807 {
3808 case LT:
3809 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3810 break;
3811
3812 case GE:
3813 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3814 break;
3815
3816 case LE:
3817 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3818 break;
3819
3820 case GT:
3821 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3822 break;
3823
3824 case LTU:
3825 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3826 break;
3827
3828 case GEU:
3829 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3830 break;
3831
3832 case LEU:
3833 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3834 break;
3835
3836 case GTU:
3837 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3838 break;
3839
3840 case NE:
3841 /* Handle NE as !EQ. */
3842 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
5f565314 3843 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
45d569f3
AL
3844 break;
3845
3846 case EQ:
3847 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3848 break;
3849
3850 default:
3851 gcc_unreachable ();
3852 }
3853
3854 DONE;
3855})
3856
5f565314
RS
3857(define_expand "vec_cmp<mode><v_int_equiv>"
3858 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
45d569f3
AL
3859 (match_operator 1 "comparison_operator"
3860 [(match_operand:VDQF 2 "register_operand")
3861 (match_operand:VDQF 3 "nonmemory_operand")]))]
3862 "TARGET_SIMD"
3863{
3864 int use_zero_form = 0;
3865 enum rtx_code code = GET_CODE (operands[1]);
5f565314 3866 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
45d569f3 3867
bb276776 3868 rtx (*comparison) (rtx, rtx, rtx) = NULL;
45d569f3
AL
3869
3870 switch (code)
3871 {
3872 case LE:
3873 case LT:
3874 case GE:
3875 case GT:
3876 case EQ:
3877 if (operands[3] == CONST0_RTX (<MODE>mode))
3878 {
3879 use_zero_form = 1;
3880 break;
3881 }
3882 /* Fall through. */
3883 default:
3884 if (!REG_P (operands[3]))
3885 operands[3] = force_reg (<MODE>mode, operands[3]);
3886
3887 break;
3888 }
3889
3890 switch (code)
3891 {
3892 case LT:
3893 if (use_zero_form)
3894 {
3895 comparison = gen_aarch64_cmlt<mode>;
3896 break;
3897 }
0be56227 3898 /* Fall through. */
f7d884d4 3899 case UNLT:
45d569f3
AL
3900 std::swap (operands[2], operands[3]);
3901 /* Fall through. */
f7d884d4 3902 case UNGT:
45d569f3
AL
3903 case GT:
3904 comparison = gen_aarch64_cmgt<mode>;
3905 break;
3906 case LE:
3907 if (use_zero_form)
3908 {
3909 comparison = gen_aarch64_cmle<mode>;
3910 break;
3911 }
0be56227 3912 /* Fall through. */
f7d884d4 3913 case UNLE:
45d569f3
AL
3914 std::swap (operands[2], operands[3]);
3915 /* Fall through. */
f7d884d4 3916 case UNGE:
45d569f3
AL
3917 case GE:
3918 comparison = gen_aarch64_cmge<mode>;
3919 break;
3920 case NE:
3921 case EQ:
3922 comparison = gen_aarch64_cmeq<mode>;
3923 break;
3924 case UNEQ:
3925 case ORDERED:
3926 case UNORDERED:
8332c5ee 3927 case LTGT:
45d569f3
AL
3928 break;
3929 default:
3930 gcc_unreachable ();
3931 }
3932
3933 switch (code)
3934 {
3935 case UNGE:
3936 case UNGT:
3937 case UNLE:
3938 case UNLT:
f7d884d4
SD
3939 {
3940 /* All of the above must not raise any FP exceptions. Thus we first
3941 check each operand for NaNs and force any elements containing NaN to
3942 zero before using them in the compare.
3943 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
3944 (cm<cc> (isnan (a) ? 0.0 : a,
3945 isnan (b) ? 0.0 : b))
3946 We use the following transformations for doing the comparisions:
3947 a UNGE b -> a GE b
3948 a UNGT b -> a GT b
3949 a UNLE b -> b GE a
3950 a UNLT b -> b GT a. */
3951
3952 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
3953 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
3954 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
3955 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
3956 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
3957 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
3958 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
3959 lowpart_subreg (<V_INT_EQUIV>mode,
3960 operands[2],
3961 <MODE>mode)));
3962 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
3963 lowpart_subreg (<V_INT_EQUIV>mode,
3964 operands[3],
3965 <MODE>mode)));
3966 gcc_assert (comparison != NULL);
3967 emit_insn (comparison (operands[0],
3968 lowpart_subreg (<MODE>mode,
3969 tmp0, <V_INT_EQUIV>mode),
3970 lowpart_subreg (<MODE>mode,
3971 tmp1, <V_INT_EQUIV>mode)));
3972 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
3973 }
45d569f3
AL
3974 break;
3975
3976 case LT:
3977 case LE:
3978 case GT:
3979 case GE:
3980 case EQ:
f7d884d4 3981 case NE:
45d569f3
AL
3982 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
3983 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
3984 a GE b -> a GE b
3985 a GT b -> a GT b
3986 a LE b -> b GE a
3987 a LT b -> b GT a
f7d884d4
SD
3988 a EQ b -> a EQ b
3989 a NE b -> ~(a EQ b) */
bb276776 3990 gcc_assert (comparison != NULL);
45d569f3 3991 emit_insn (comparison (operands[0], operands[2], operands[3]));
f7d884d4
SD
3992 if (code == NE)
3993 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
45d569f3
AL
3994 break;
3995
8332c5ee
SD
3996 case LTGT:
3997 /* LTGT is not guranteed to not generate a FP exception. So let's
3998 go the faster way : ((a > b) || (b > a)). */
3999 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4000 operands[2], operands[3]));
4001 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4002 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4003 break;
4004
45d569f3 4005 case ORDERED:
f7d884d4
SD
4006 case UNORDERED:
4007 case UNEQ:
4008 /* cmeq (a, a) & cmeq (b, b). */
4009 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4010 operands[2], operands[2]));
4011 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4012 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4013
4014 if (code == UNORDERED)
4015 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4016 else if (code == UNEQ)
4017 {
4018 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4019 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
4020 }
45d569f3
AL
4021 break;
4022
4023 default:
4024 gcc_unreachable ();
4025 }
4026
4027 DONE;
4028})
4029
4030(define_expand "vec_cmpu<mode><mode>"
4031 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4032 (match_operator 1 "comparison_operator"
4033 [(match_operand:VSDQ_I_DI 2 "register_operand")
4034 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4035 "TARGET_SIMD"
4036{
4037 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4038 operands[2], operands[3]));
4039 DONE;
4040})
4041
4fda1ad1 4042(define_expand "vcond<mode><mode>"
8b5190ab
AL
4043 [(set (match_operand:VALLDI 0 "register_operand")
4044 (if_then_else:VALLDI
4fda1ad1 4045 (match_operator 3 "comparison_operator"
8b5190ab
AL
4046 [(match_operand:VALLDI 4 "register_operand")
4047 (match_operand:VALLDI 5 "nonmemory_operand")])
4048 (match_operand:VALLDI 1 "nonmemory_operand")
4049 (match_operand:VALLDI 2 "nonmemory_operand")))]
4fda1ad1
JG
4050 "TARGET_SIMD"
4051{
5f565314 4052 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3c556bc4 4053 enum rtx_code code = GET_CODE (operands[3]);
6c553b76 4054
3c556bc4
BC
4055 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4056 it as well as switch operands 1/2 in order to avoid the additional
4057 NOT instruction. */
4058 if (code == NE)
4059 {
4060 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4061 operands[4], operands[5]);
4062 std::swap (operands[1], operands[2]);
4063 }
5f565314
RS
4064 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4065 operands[4], operands[5]));
4066 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4067 operands[2], mask));
6c553b76 4068
4fda1ad1
JG
4069 DONE;
4070})
4071
6c553b76
BC
4072(define_expand "vcond<v_cmp_mixed><mode>"
4073 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
4074 (if_then_else:<V_cmp_mixed>
7c19979f 4075 (match_operator 3 "comparison_operator"
6c553b76
BC
4076 [(match_operand:VDQF_COND 4 "register_operand")
4077 (match_operand:VDQF_COND 5 "nonmemory_operand")])
4078 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
4079 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
7c19979f
JG
4080 "TARGET_SIMD"
4081{
5f565314 4082 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3c556bc4 4083 enum rtx_code code = GET_CODE (operands[3]);
6c553b76 4084
3c556bc4
BC
4085 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4086 it as well as switch operands 1/2 in order to avoid the additional
4087 NOT instruction. */
4088 if (code == NE)
4089 {
4090 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4091 operands[4], operands[5]);
4092 std::swap (operands[1], operands[2]);
4093 }
5f565314
RS
4094 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4095 operands[4], operands[5]));
4096 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
7c19979f 4097 operands[0], operands[1],
6c553b76
BC
4098 operands[2], mask));
4099
7c19979f
JG
4100 DONE;
4101})
4fda1ad1
JG
4102
4103(define_expand "vcondu<mode><mode>"
8b5190ab
AL
4104 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4105 (if_then_else:VSDQ_I_DI
4fda1ad1 4106 (match_operator 3 "comparison_operator"
8b5190ab
AL
4107 [(match_operand:VSDQ_I_DI 4 "register_operand")
4108 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
4109 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
4110 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4fda1ad1
JG
4111 "TARGET_SIMD"
4112{
6c553b76 4113 rtx mask = gen_reg_rtx (<MODE>mode);
3c556bc4 4114 enum rtx_code code = GET_CODE (operands[3]);
6c553b76 4115
3c556bc4
BC
4116 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4117 it as well as switch operands 1/2 in order to avoid the additional
4118 NOT instruction. */
4119 if (code == NE)
4120 {
4121 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4122 operands[4], operands[5]);
4123 std::swap (operands[1], operands[2]);
4124 }
6c553b76
BC
4125 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
4126 operands[4], operands[5]));
5f565314
RS
4127 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4128 operands[2], mask));
6c553b76
BC
4129 DONE;
4130})
4131
4132(define_expand "vcondu<mode><v_cmp_mixed>"
4133 [(set (match_operand:VDQF 0 "register_operand")
4134 (if_then_else:VDQF
4135 (match_operator 3 "comparison_operator"
4136 [(match_operand:<V_cmp_mixed> 4 "register_operand")
4137 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
4138 (match_operand:VDQF 1 "nonmemory_operand")
4139 (match_operand:VDQF 2 "nonmemory_operand")))]
4140 "TARGET_SIMD"
4141{
5f565314 4142 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3c556bc4 4143 enum rtx_code code = GET_CODE (operands[3]);
6c553b76 4144
3c556bc4
BC
4145 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4146 it as well as switch operands 1/2 in order to avoid the additional
4147 NOT instruction. */
4148 if (code == NE)
4149 {
4150 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4151 operands[4], operands[5]);
4152 std::swap (operands[1], operands[2]);
4153 }
6c553b76
BC
4154 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4155 mask, operands[3],
4156 operands[4], operands[5]));
5f565314
RS
4157 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4158 operands[2], mask));
4fda1ad1
JG
4159 DONE;
4160})
4161
43e9d192
IB
4162;; Patterns for AArch64 SIMD Intrinsics.
4163
66adb8eb
JG
4164;; Lane extraction with sign extension to general purpose register.
4165(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4166 [(set (match_operand:GPI 0 "register_operand" "=r")
4167 (sign_extend:GPI
e7ba492a 4168 (vec_select:<VDQQH:VEL>
66adb8eb 4169 (match_operand:VDQQH 1 "register_operand" "w")
43e9d192
IB
4170 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4171 "TARGET_SIMD"
e58bf20a 4172 {
e7ba492a
RS
4173 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4174 INTVAL (operands[2]));
e58bf20a
TB
4175 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4176 }
e7ba492a
RS
4177 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4178)
4179
4180(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4181 [(set (match_operand:GPI 0 "register_operand" "=r")
4182 (zero_extend:GPI
4183 (vec_select:<VDQQH:VEL>
4184 (match_operand:VDQQH 1 "register_operand" "w")
4185 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4186 "TARGET_SIMD"
4187 {
4188 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4189 INTVAL (operands[2]));
4190 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4191 }
4192 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
43e9d192
IB
4193)
4194
66adb8eb
JG
4195;; Lane extraction of a value, neither sign nor zero extension
4196;; is guaranteed so upper bits should be considered undefined.
ed225d0c 4197;; RTL uses GCC vector extension indices throughout so flip only for assembly.
e140f5fd
KT
4198;; Extracting lane zero is split into a simple move when it is between SIMD
4199;; registers or a store.
4200(define_insn_and_split "aarch64_get_lane<mode>"
2eb2847e 4201 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
43e9d192 4202 (vec_select:<VEL>
71a11456 4203 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
e58bf20a 4204 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
43e9d192 4205 "TARGET_SIMD"
e58bf20a 4206 {
7ac29c0f 4207 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
e58bf20a
TB
4208 switch (which_alternative)
4209 {
4210 case 0:
4211 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4212 case 1:
4213 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4214 case 2:
4215 return "st1\\t{%1.<Vetype>}[%2], %0";
4216 default:
4217 gcc_unreachable ();
4218 }
4219 }
e140f5fd
KT
4220 "&& reload_completed
4221 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4222 [(set (match_dup 0) (match_dup 1))]
4223 {
4224 operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4225 }
e58bf20a 4226 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
43e9d192
IB
4227)
4228
40757a25
KT
4229(define_insn "load_pair_lanes<mode>"
4230 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4231 (vec_concat:<VDBL>
83d7e720
RS
4232 (match_operand:VDCSIF 1 "memory_operand" "Utq")
4233 (match_operand:VDCSIF 2 "memory_operand" "m")))]
958448a9
RS
4234 "TARGET_SIMD
4235 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
83d7e720
RS
4236 "ldr\\t%<single_dtype>0, %1"
4237 [(set_attr "type" "neon_load1_1reg<dblq>")]
40757a25
KT
4238)
4239
85ac2fe4
RS
4240;; This STP pattern is a partial duplicate of the general vec_concat patterns
4241;; below. The reason for having both of them is that the alternatives of
4242;; the later patterns do not have consistent register preferences: the STP
4243;; alternatives have no preference between GPRs and FPRs (and if anything,
4244;; the GPR form is more natural for scalar integers) whereas the other
4245;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
4246;;
4247;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4248;; which the destination was always memory. On the other hand, expressing
4249;; the true preferences makes GPRs seem more palatable than they really are
4250;; for register destinations.
4251;;
4252;; Despite that, we do still want the general form to have STP alternatives,
4253;; in order to handle cases where a register destination is spilled.
4254;;
4255;; The best compromise therefore seemed to be to have a dedicated STP
4256;; pattern to catch cases in which the destination was always memory.
4257;; This dedicated pattern must come first.
4258
7692ce17 4259(define_insn "store_pair_lanes<mode>"
a25831ac 4260 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
7692ce17 4261 (vec_concat:<VDBL>
83d7e720
RS
4262 (match_operand:VDCSIF 1 "register_operand" "w, r")
4263 (match_operand:VDCSIF 2 "register_operand" "w, r")))]
7692ce17
KT
4264 "TARGET_SIMD"
4265 "@
83d7e720
RS
4266 stp\t%<single_type>1, %<single_type>2, %y0
4267 stp\t%<single_wx>1, %<single_wx>2, %y0"
7692ce17
KT
4268 [(set_attr "type" "neon_stp, store_16")]
4269)
4270
85ac2fe4
RS
4271;; Form a vector whose least significant half comes from operand 1 and whose
4272;; most significant half comes from operand 2. The register alternatives
4273;; tie the least significant half to the same register as the destination,
4274;; so that only the other half needs to be handled explicitly. For the
4275;; reasons given above, the STP alternatives use ? for constraints that
4276;; the register alternatives either don't accept or themselves disparage.
4277
4278(define_insn "*aarch64_combine_internal<mode>"
4279 [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
4280 (vec_concat:<VDBL>
83d7e720
RS
4281 (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")
4282 (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))]
85ac2fe4
RS
4283 "TARGET_SIMD
4284 && !BYTES_BIG_ENDIAN
4285 && (register_operand (operands[0], <VDBL>mode)
4286 || register_operand (operands[2], <MODE>mode))"
4287 "@
83d7e720
RS
4288 ins\t%0.<single_type>[1], %2.<single_type>[0]
4289 ins\t%0.<single_type>[1], %<single_wx>2
4290 ld1\t{%0.<single_type>}[1], %2
4291 stp\t%<single_type>1, %<single_type>2, %y0
4292 stp\t%<single_wx>1, %<single_wx>2, %y0"
4293 [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
85ac2fe4
RS
4294)
4295
4296(define_insn "*aarch64_combine_internal_be<mode>"
4297 [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
4298 (vec_concat:<VDBL>
83d7e720
RS
4299 (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r")
4300 (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")))]
85ac2fe4
RS
4301 "TARGET_SIMD
4302 && BYTES_BIG_ENDIAN
4303 && (register_operand (operands[0], <VDBL>mode)
4304 || register_operand (operands[2], <MODE>mode))"
4305 "@
83d7e720
RS
4306 ins\t%0.<single_type>[1], %2.<single_type>[0]
4307 ins\t%0.<single_type>[1], %<single_wx>2
4308 ld1\t{%0.<single_type>}[1], %2
4309 stp\t%<single_type>2, %<single_type>1, %y0
4310 stp\t%<single_wx>2, %<single_wx>1, %y0"
4311 [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
85ac2fe4
RS
4312)
4313
43e9d192
IB
4314;; In this insn, operand 1 should be low, and operand 2 the high part of the
4315;; dest vector.
4316
4057266c 4317(define_insn "*aarch64_combinez<mode>"
c0233c78 4318 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
6432f025 4319 (vec_concat:<VDBL>
83d7e720
RS
4320 (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")
4321 (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
5a908485 4322 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
c0233c78 4323 "@
83d7e720
RS
4324 fmov\\t%<single_type>0, %<single_type>1
4325 fmov\t%<single_type>0, %<single_wx>1
4326 ldr\\t%<single_type>0, %1"
c0233c78 4327 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
488461d8 4328 (set_attr "arch" "simd,fp,simd")]
5a908485
JG
4329)
4330
4057266c 4331(define_insn "*aarch64_combinez_be<mode>"
c0233c78 4332 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
5a908485 4333 (vec_concat:<VDBL>
83d7e720
RS
4334 (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
4335 (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))]
5a908485 4336 "TARGET_SIMD && BYTES_BIG_ENDIAN"
c0233c78 4337 "@
83d7e720
RS
4338 fmov\\t%<single_type>0, %<single_type>1
4339 fmov\t%<single_type>0, %<single_wx>1
4340 ldr\\t%<single_type>0, %1"
c0233c78 4341 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
488461d8 4342 (set_attr "arch" "simd,fp,simd")]
43e9d192
IB
4343)
4344
4057266c
RS
4345;; Form a vector whose first half (in array order) comes from operand 1
4346;; and whose second half (in array order) comes from operand 2.
4347;; This operand order follows the RTL vec_concat operation.
4348(define_expand "@aarch64_vec_concat<mode>"
4349 [(set (match_operand:<VDBL> 0 "register_operand")
4350 (vec_concat:<VDBL>
83d7e720
RS
4351 (match_operand:VDCSIF 1 "general_operand")
4352 (match_operand:VDCSIF 2 "general_operand")))]
5a908485
JG
4353 "TARGET_SIMD"
4354{
4057266c
RS
4355 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
4356 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
4357
4358 if (MEM_P (operands[1])
4359 && MEM_P (operands[2])
4360 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
4361 /* Use load_pair_lanes<mode>. */
4362 ;
4363 else if (operands[hi] == CONST0_RTX (<MODE>mode))
b41e6dd5 4364 {
4057266c
RS
4365 /* Use *aarch64_combinez<mode>. */
4366 if (!nonimmediate_operand (operands[lo], <MODE>mode))
4367 operands[lo] = force_reg (<MODE>mode, operands[lo]);
b41e6dd5
RS
4368 }
4369 else
4057266c
RS
4370 {
4371 /* Use *aarch64_combine_general<mode>. */
4372 operands[lo] = force_reg (<MODE>mode, operands[lo]);
4373 if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4374 {
4375 if (MEM_P (operands[hi]))
4376 {
4377 rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
4378 operands[hi] = replace_equiv_address (operands[hi], addr);
4379 }
4380 else
4381 operands[hi] = force_reg (<MODE>mode, operands[hi]);
4382 }
4383 }
4384})
8b033a8a 4385
4057266c
RS
4386;; Form a vector whose least significant half comes from operand 1 and whose
4387;; most significant half comes from operand 2. This operand order follows
4388;; arm_neon.h vcombine* intrinsics.
4389(define_expand "aarch64_combine<mode>"
5a908485 4390 [(match_operand:<VDBL> 0 "register_operand")
4057266c
RS
4391 (match_operand:VDC 1 "general_operand")
4392 (match_operand:VDC 2 "general_operand")]
8b033a8a 4393 "TARGET_SIMD"
4057266c
RS
4394{
4395 if (BYTES_BIG_ENDIAN)
4396 std::swap (operands[1], operands[2]);
4397 emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
4398 operands[2]));
4399 DONE;
4400}
0f686aa9 4401)
43e9d192
IB
4402
4403;; <su><addsub>l<q>.
4404
8da00d65 4405(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
43e9d192
IB
4406 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4407 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4408 (match_operand:VQW 1 "register_operand" "w")
4409 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4410 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4411 (match_operand:VQW 2 "register_operand" "w")
4412 (match_dup 3)))))]
4413 "TARGET_SIMD"
8da00d65 4414 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 4415 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
43e9d192
IB
4416)
4417
8da00d65
VP
4418(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4419 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4420 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4421 (match_operand:VQW 1 "register_operand" "w")
4422 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4423 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4424 (match_operand:VQW 2 "register_operand" "w")
4425 (match_dup 3)))))]
4426 "TARGET_SIMD"
4427 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
78ec3036 4428 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
8da00d65
VP
4429)
4430
ec46904e
JH
4431(define_expand "vec_widen_<su>addl_lo_<mode>"
4432 [(match_operand:<VWIDE> 0 "register_operand")
4433 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4434 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4435 "TARGET_SIMD"
4436{
4437 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4438 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4439 operands[2], p));
4440 DONE;
4441})
4442
4443(define_expand "vec_widen_<su>addl_hi_<mode>"
4444 [(match_operand:<VWIDE> 0 "register_operand")
4445 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4446 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4447 "TARGET_SIMD"
4448{
4449 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4450 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4451 operands[2], p));
4452 DONE;
4453})
4454
4455(define_expand "vec_widen_<su>subl_lo_<mode>"
4456 [(match_operand:<VWIDE> 0 "register_operand")
4457 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4458 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4459 "TARGET_SIMD"
4460{
4461 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4462 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4463 operands[2], p));
4464 DONE;
4465})
4466
4467(define_expand "vec_widen_<su>subl_hi_<mode>"
4468 [(match_operand:<VWIDE> 0 "register_operand")
4469 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4470 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4471 "TARGET_SIMD"
4472{
4473 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4474 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4475 operands[2], p));
4476 DONE;
4477})
8da00d65 4478
43e9d192 4479(define_expand "aarch64_saddl2<mode>"
1bbffb87
DZ
4480 [(match_operand:<VWIDE> 0 "register_operand")
4481 (match_operand:VQW 1 "register_operand")
4482 (match_operand:VQW 2 "register_operand")]
43e9d192
IB
4483 "TARGET_SIMD"
4484{
f5cbabc1 4485 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
8da00d65
VP
4486 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4487 operands[2], p));
43e9d192
IB
4488 DONE;
4489})
4490
4491(define_expand "aarch64_uaddl2<mode>"
1bbffb87
DZ
4492 [(match_operand:<VWIDE> 0 "register_operand")
4493 (match_operand:VQW 1 "register_operand")
4494 (match_operand:VQW 2 "register_operand")]
43e9d192
IB
4495 "TARGET_SIMD"
4496{
f5cbabc1 4497 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
8da00d65
VP
4498 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4499 operands[2], p));
43e9d192
IB
4500 DONE;
4501})
4502
4503(define_expand "aarch64_ssubl2<mode>"
1bbffb87
DZ
4504 [(match_operand:<VWIDE> 0 "register_operand")
4505 (match_operand:VQW 1 "register_operand")
4506 (match_operand:VQW 2 "register_operand")]
43e9d192
IB
4507 "TARGET_SIMD"
4508{
f5cbabc1 4509 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
8da00d65 4510 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
43e9d192
IB
4511 operands[2], p));
4512 DONE;
4513})
4514
4515(define_expand "aarch64_usubl2<mode>"
1bbffb87
DZ
4516 [(match_operand:<VWIDE> 0 "register_operand")
4517 (match_operand:VQW 1 "register_operand")
4518 (match_operand:VQW 2 "register_operand")]
43e9d192
IB
4519 "TARGET_SIMD"
4520{
f5cbabc1 4521 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
8da00d65 4522 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
43e9d192
IB
4523 operands[2], p));
4524 DONE;
4525})
4526
4527(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4528 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4529 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
a844a695 4530 (match_operand:VD_BHSI 1 "register_operand" "w"))
43e9d192 4531 (ANY_EXTEND:<VWIDE>
a844a695 4532 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
43e9d192 4533 "TARGET_SIMD"
130ee2eb 4534 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 4535 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
43e9d192
IB
4536)
4537
4538;; <su><addsub>w<q>.
4539
b1b49824 4540(define_expand "widen_ssum<mode>3"
1bbffb87 4541 [(set (match_operand:<VDBLW> 0 "register_operand")
b1b49824 4542 (plus:<VDBLW> (sign_extend:<VDBLW>
1bbffb87
DZ
4543 (match_operand:VQW 1 "register_operand"))
4544 (match_operand:<VDBLW> 2 "register_operand")))]
b1b49824
MC
4545 "TARGET_SIMD"
4546 {
f5cbabc1 4547 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
b1b49824
MC
4548 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4549
4550 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4551 operands[1], p));
4552 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4553 DONE;
4554 }
4555)
4556
4557(define_expand "widen_ssum<mode>3"
1bbffb87 4558 [(set (match_operand:<VWIDE> 0 "register_operand")
b1b49824 4559 (plus:<VWIDE> (sign_extend:<VWIDE>
1bbffb87
DZ
4560 (match_operand:VD_BHSI 1 "register_operand"))
4561 (match_operand:<VWIDE> 2 "register_operand")))]
b1b49824
MC
4562 "TARGET_SIMD"
4563{
4564 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4565 DONE;
4566})
4567
4568(define_expand "widen_usum<mode>3"
1bbffb87 4569 [(set (match_operand:<VDBLW> 0 "register_operand")
b1b49824 4570 (plus:<VDBLW> (zero_extend:<VDBLW>
1bbffb87
DZ
4571 (match_operand:VQW 1 "register_operand"))
4572 (match_operand:<VDBLW> 2 "register_operand")))]
b1b49824
MC
4573 "TARGET_SIMD"
4574 {
f5cbabc1 4575 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
b1b49824
MC
4576 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4577
4578 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4579 operands[1], p));
4580 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4581 DONE;
4582 }
4583)
4584
4585(define_expand "widen_usum<mode>3"
1bbffb87 4586 [(set (match_operand:<VWIDE> 0 "register_operand")
b1b49824 4587 (plus:<VWIDE> (zero_extend:<VWIDE>
1bbffb87
DZ
4588 (match_operand:VD_BHSI 1 "register_operand"))
4589 (match_operand:<VWIDE> 2 "register_operand")))]
b1b49824
MC
4590 "TARGET_SIMD"
4591{
4592 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4593 DONE;
4594})
4595
8da03df5 4596(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
43e9d192 4597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
8da03df5
MM
4598 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4599 (ANY_EXTEND:<VWIDE>
4600 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
43e9d192 4601 "TARGET_SIMD"
8da03df5
MM
4602 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4603 [(set_attr "type" "neon_sub_widen")]
43e9d192
IB
4604)
4605
8da03df5 4606(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
b1b49824 4607 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
8da03df5
MM
4608 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4609 (ANY_EXTEND:<VWIDE>
4610 (vec_select:<VHALF>
4611 (match_operand:VQW 2 "register_operand" "w")
4612 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
b1b49824 4613 "TARGET_SIMD"
8da03df5
MM
4614 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4615 [(set_attr "type" "neon_sub_widen")]
b1b49824
MC
4616)
4617
8da03df5 4618(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
43e9d192 4619 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
8da03df5
MM
4620 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4621 (ANY_EXTEND:<VWIDE>
4622 (vec_select:<VHALF>
4623 (match_operand:VQW 2 "register_operand" "w")
4624 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
43e9d192 4625 "TARGET_SIMD"
8da03df5
MM
4626 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4627 [(set_attr "type" "neon_sub_widen")]
4628)
4629
4630(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4631 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4632 (plus:<VWIDE>
4633 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4634 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4635 "TARGET_SIMD"
4636 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4637 [(set_attr "type" "neon_add_widen")]
4638)
4639
4640(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4641 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4642 (plus:<VWIDE>
4643 (ANY_EXTEND:<VWIDE>
4644 (vec_select:<VHALF>
4645 (match_operand:VQW 2 "register_operand" "w")
4646 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4647 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4648 "TARGET_SIMD"
4649 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4650 [(set_attr "type" "neon_add_widen")]
4651)
4652
4653(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4654 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4655 (plus:<VWIDE>
4656 (ANY_EXTEND:<VWIDE>
4657 (vec_select:<VHALF>
4658 (match_operand:VQW 2 "register_operand" "w")
4659 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4660 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4661 "TARGET_SIMD"
4662 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4663 [(set_attr "type" "neon_add_widen")]
43e9d192
IB
4664)
4665
4666(define_expand "aarch64_saddw2<mode>"
1bbffb87
DZ
4667 [(match_operand:<VWIDE> 0 "register_operand")
4668 (match_operand:<VWIDE> 1 "register_operand")
4669 (match_operand:VQW 2 "register_operand")]
43e9d192
IB
4670 "TARGET_SIMD"
4671{
f5cbabc1 4672 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4673 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
4674 operands[2], p));
4675 DONE;
4676})
4677
4678(define_expand "aarch64_uaddw2<mode>"
1bbffb87
DZ
4679 [(match_operand:<VWIDE> 0 "register_operand")
4680 (match_operand:<VWIDE> 1 "register_operand")
4681 (match_operand:VQW 2 "register_operand")]
43e9d192
IB
4682 "TARGET_SIMD"
4683{
f5cbabc1 4684 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4685 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
4686 operands[2], p));
4687 DONE;
4688})
4689
4690
4691(define_expand "aarch64_ssubw2<mode>"
1bbffb87
DZ
4692 [(match_operand:<VWIDE> 0 "register_operand")
4693 (match_operand:<VWIDE> 1 "register_operand")
4694 (match_operand:VQW 2 "register_operand")]
43e9d192
IB
4695 "TARGET_SIMD"
4696{
f5cbabc1 4697 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4698 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
4699 operands[2], p));
4700 DONE;
4701})
4702
4703(define_expand "aarch64_usubw2<mode>"
1bbffb87
DZ
4704 [(match_operand:<VWIDE> 0 "register_operand")
4705 (match_operand:<VWIDE> 1 "register_operand")
4706 (match_operand:VQW 2 "register_operand")]
43e9d192
IB
4707 "TARGET_SIMD"
4708{
f5cbabc1 4709 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
4710 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
4711 operands[2], p));
4712 DONE;
4713})
4714
4715;; <su><r>h<addsub>.
4716
42addb5a
RS
4717(define_expand "<u>avg<mode>3_floor"
4718 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4719 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4720 (match_operand:VDQ_BHSI 2 "register_operand")]
4721 HADD))]
4722 "TARGET_SIMD"
4723)
4724
4725(define_expand "<u>avg<mode>3_ceil"
4726 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4727 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4728 (match_operand:VDQ_BHSI 2 "register_operand")]
4729 RHADD))]
4730 "TARGET_SIMD"
4731)
4732
43e9d192 4733(define_insn "aarch64_<sur>h<addsub><mode>"
a844a695
AL
4734 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4735 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
4736 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
43e9d192
IB
4737 HADDSUB))]
4738 "TARGET_SIMD"
4739 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 4740 [(set_attr "type" "neon_<addsub>_halve<q>")]
43e9d192
IB
4741)
4742
4743;; <r><addsub>hn<q>.
4744
dbfc149b
JW
4745(define_insn "aarch64_<sur><addsub>hn<mode>_insn_le"
4746 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4747 (vec_concat:<VNARROWQ2>
4748 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
4749 (match_operand:VQN 2 "register_operand" "w")]
4750 ADDSUBHN)
4751 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
4752 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
43e9d192 4753 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 4754 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
43e9d192
IB
4755)
4756
dbfc149b
JW
4757(define_insn "aarch64_<sur><addsub>hn<mode>_insn_be"
4758 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4759 (vec_concat:<VNARROWQ2>
4760 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
4761 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
4762 (match_operand:VQN 2 "register_operand" "w")]
4763 ADDSUBHN)))]
4764 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4765 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4766 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4767)
4768
4769(define_expand "aarch64_<sur><addsub>hn<mode>"
4770 [(set (match_operand:<VNARROWQ> 0 "register_operand")
4771 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
4772 (match_operand:VQN 2 "register_operand")]
4773 ADDSUBHN))]
4774 "TARGET_SIMD"
4775 {
4776 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
4777 if (BYTES_BIG_ENDIAN)
4778 emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_be (tmp, operands[1],
4779 operands[2], CONST0_RTX (<VNARROWQ>mode)));
4780 else
4781 emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_le (tmp, operands[1],
4782 operands[2], CONST0_RTX (<VNARROWQ>mode)));
4783
4784 /* The intrinsic expects a narrow result, so emit a subreg that will get
4785 optimized away as appropriate. */
4786 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
4787 <VNARROWQ2>mode));
4788 DONE;
4789 }
4790)
4791
3eddaad0 4792(define_insn "aarch64_<sur><addsub>hn2<mode>_insn_le"
43e9d192 4793 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3eddaad0
JW
4794 (vec_concat:<VNARROWQ2>
4795 (match_operand:<VNARROWQ> 1 "register_operand" "0")
4796 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
4797 (match_operand:VQN 3 "register_operand" "w")]
4798 ADDSUBHN)))]
4799 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4800 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4801 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4802)
4803
4804(define_insn "aarch64_<sur><addsub>hn2<mode>_insn_be"
4805 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4806 (vec_concat:<VNARROWQ2>
4807 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
4808 (match_operand:VQN 3 "register_operand" "w")]
4809 ADDSUBHN)
4810 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4811 "TARGET_SIMD && BYTES_BIG_ENDIAN"
43e9d192 4812 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
78ec3036 4813 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
43e9d192
IB
4814)
4815
3eddaad0
JW
4816(define_expand "aarch64_<sur><addsub>hn2<mode>"
4817 [(match_operand:<VNARROWQ2> 0 "register_operand")
4818 (match_operand:<VNARROWQ> 1 "register_operand")
4819 (unspec [(match_operand:VQN 2 "register_operand")
4820 (match_operand:VQN 3 "register_operand")]
4821 ADDSUBHN)]
4822 "TARGET_SIMD"
4823 {
4824 if (BYTES_BIG_ENDIAN)
4825 emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_be (operands[0],
4826 operands[1], operands[2], operands[3]));
4827 else
4828 emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_le (operands[0],
4829 operands[1], operands[2], operands[3]));
4830 DONE;
4831 }
4832)
4833
43e9d192
IB
4834;; pmul.
4835
4836(define_insn "aarch64_pmul<mode>"
4837 [(set (match_operand:VB 0 "register_operand" "=w")
4838 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
4839 (match_operand:VB 2 "register_operand" "w")]
4840 UNSPEC_PMUL))]
4841 "TARGET_SIMD"
4842 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 4843 [(set_attr "type" "neon_mul_<Vetype><q>")]
43e9d192
IB
4844)
4845
f546e0d3
JW
4846(define_insn "aarch64_pmullv8qi"
4847 [(set (match_operand:V8HI 0 "register_operand" "=w")
4848 (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
4849 (match_operand:V8QI 2 "register_operand" "w")]
4850 UNSPEC_PMULL))]
4851 "TARGET_SIMD"
4852 "pmull\\t%0.8h, %1.8b, %2.8b"
4853 [(set_attr "type" "neon_mul_b_long")]
4854)
4855
4856(define_insn "aarch64_pmull_hiv16qi_insn"
4857 [(set (match_operand:V8HI 0 "register_operand" "=w")
4858 (unspec:V8HI
4859 [(vec_select:V8QI
4860 (match_operand:V16QI 1 "register_operand" "w")
4861 (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
4862 (vec_select:V8QI
4863 (match_operand:V16QI 2 "register_operand" "w")
4864 (match_dup 3))]
4865 UNSPEC_PMULL))]
4866 "TARGET_SIMD"
4867 "pmull2\\t%0.8h, %1.16b, %2.16b"
4868 [(set_attr "type" "neon_mul_b_long")]
4869)
4870
4871(define_expand "aarch64_pmull_hiv16qi"
4872 [(match_operand:V8HI 0 "register_operand")
4873 (match_operand:V16QI 1 "register_operand")
4874 (match_operand:V16QI 2 "register_operand")]
4875 "TARGET_SIMD"
4876 {
4877 rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
4878 emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
4879 operands[2], p));
4880 DONE;
4881 }
4882)
4883
496ea87d
BB
4884;; fmulx.
4885
4886(define_insn "aarch64_fmulx<mode>"
68ad28c3
JW
4887 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
4888 (unspec:VHSDF_HSDF
4889 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
4890 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
33d72b63 4891 UNSPEC_FMULX))]
496ea87d
BB
4892 "TARGET_SIMD"
4893 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
33d72b63 4894 [(set_attr "type" "neon_fp_mul_<stype>")]
496ea87d
BB
4895)
4896
9030a4d3
BB
4897;; vmulxq_lane_f32, and vmulx_laneq_f32
4898
4899(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
4900 [(set (match_operand:VDQSF 0 "register_operand" "=w")
4901 (unspec:VDQSF
4902 [(match_operand:VDQSF 1 "register_operand" "w")
4903 (vec_duplicate:VDQSF
4904 (vec_select:<VEL>
4905 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
4906 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4907 UNSPEC_FMULX))]
4908 "TARGET_SIMD"
4909 {
7ac29c0f 4910 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
9030a4d3
BB
4911 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4912 }
4913 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
4914)
4915
4916;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
4917
4918(define_insn "*aarch64_mulx_elt<mode>"
4919 [(set (match_operand:VDQF 0 "register_operand" "=w")
4920 (unspec:VDQF
4921 [(match_operand:VDQF 1 "register_operand" "w")
4922 (vec_duplicate:VDQF
4923 (vec_select:<VEL>
4924 (match_operand:VDQF 2 "register_operand" "w")
4925 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4926 UNSPEC_FMULX))]
4927 "TARGET_SIMD"
4928 {
7ac29c0f 4929 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
9030a4d3
BB
4930 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4931 }
4932 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
4933)
4934
ab2e8f01 4935;; vmulxq_lane
9030a4d3 4936
ab2e8f01
JW
4937(define_insn "*aarch64_mulx_elt_from_dup<mode>"
4938 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4939 (unspec:VHSDF
4940 [(match_operand:VHSDF 1 "register_operand" "w")
4941 (vec_duplicate:VHSDF
6d06971d 4942 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
9030a4d3
BB
4943 UNSPEC_FMULX))]
4944 "TARGET_SIMD"
ab2e8f01
JW
4945 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
4946 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
9030a4d3
BB
4947)
4948
4949;; vmulxs_lane_f32, vmulxs_laneq_f32
4950;; vmulxd_lane_f64 == vmulx_lane_f64
4951;; vmulxd_laneq_f64 == vmulx_laneq_f64
4952
4953(define_insn "*aarch64_vgetfmulx<mode>"
4954 [(set (match_operand:<VEL> 0 "register_operand" "=w")
4955 (unspec:<VEL>
4956 [(match_operand:<VEL> 1 "register_operand" "w")
4957 (vec_select:<VEL>
88119b46 4958 (match_operand:VDQF 2 "register_operand" "w")
9030a4d3
BB
4959 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4960 UNSPEC_FMULX))]
4961 "TARGET_SIMD"
4962 {
7ac29c0f 4963 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
9030a4d3
BB
4964 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
4965 }
4966 [(set_attr "type" "fmul<Vetype>")]
4967)
43e9d192
IB
4968;; <su>q<addsub>
4969
694e6b19 4970(define_insn "aarch64_<su_optab>q<addsub><mode>"
43e9d192
IB
4971 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4972 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
694e6b19 4973 (match_operand:VSDQ_I 2 "register_operand" "w")))]
43e9d192 4974 "TARGET_SIMD"
694e6b19
RS
4975 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4976 [(set_attr "type" "neon_q<addsub><q>")]
43e9d192
IB
4977)
4978
4979;; suqadd and usqadd
4980
4981(define_insn "aarch64_<sur>qadd<mode>"
4982 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4983 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
4984 (match_operand:VSDQ_I 2 "register_operand" "w")]
4985 USSUQADD))]
4986 "TARGET_SIMD"
4987 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
78ec3036 4988 [(set_attr "type" "neon_qadd<q>")]
43e9d192
IB
4989)
4990
43e9d192
IB
4991;; sqmovn and uqmovn
4992
c99f3747 4993(define_insn "aarch64_<su>qmovn<mode>"
43e9d192 4994 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
c99f3747 4995 (SAT_TRUNC:<VNARROWQ>
d0889b5d 4996 (match_operand:SD_HSDI 1 "register_operand" "w")))]
43e9d192 4997 "TARGET_SIMD"
c99f3747 4998 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
d0889b5d
JW
4999 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5000)
5001
5002(define_insn "aarch64_<su>qmovn<mode>_insn_le"
5003 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5004 (vec_concat:<VNARROWQ2>
5005 (SAT_TRUNC:<VNARROWQ>
5006 (match_operand:VQN 1 "register_operand" "w"))
5007 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
5008 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5009 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5010 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5011)
5012
5013(define_insn "aarch64_<su>qmovn<mode>_insn_be"
5014 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5015 (vec_concat:<VNARROWQ2>
5016 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
5017 (SAT_TRUNC:<VNARROWQ>
5018 (match_operand:VQN 1 "register_operand" "w"))))]
5019 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5020 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5021 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5022)
5023
5024(define_expand "aarch64_<su>qmovn<mode>"
5025 [(set (match_operand:<VNARROWQ> 0 "register_operand")
5026 (SAT_TRUNC:<VNARROWQ>
5027 (match_operand:VQN 1 "register_operand")))]
5028 "TARGET_SIMD"
5029 {
5030 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
5031 if (BYTES_BIG_ENDIAN)
5032 emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
5033 CONST0_RTX (<VNARROWQ>mode)));
5034 else
5035 emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
5036 CONST0_RTX (<VNARROWQ>mode)));
5037
5038 /* The intrinsic expects a narrow result, so emit a subreg that will get
5039 optimized away as appropriate. */
5040 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
5041 <VNARROWQ2>mode));
5042 DONE;
5043 }
57b26d65 5044)
43e9d192 5045
52cd1cd1
KT
5046(define_insn "aarch64_<su>qxtn2<mode>_le"
5047 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5048 (vec_concat:<VNARROWQ2>
5049 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5050 (SAT_TRUNC:<VNARROWQ>
5051 (match_operand:VQN 2 "register_operand" "w"))))]
5052 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5053 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5054 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5055)
5056
5057(define_insn "aarch64_<su>qxtn2<mode>_be"
5058 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5059 (vec_concat:<VNARROWQ2>
5060 (SAT_TRUNC:<VNARROWQ>
5061 (match_operand:VQN 2 "register_operand" "w"))
5062 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5063 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5064 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5065 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5066)
5067
5068(define_expand "aarch64_<su>qxtn2<mode>"
5069 [(match_operand:<VNARROWQ2> 0 "register_operand")
5070 (match_operand:<VNARROWQ> 1 "register_operand")
5071 (SAT_TRUNC:<VNARROWQ>
5072 (match_operand:VQN 2 "register_operand"))]
5073 "TARGET_SIMD"
5074 {
5075 if (BYTES_BIG_ENDIAN)
5076 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5077 operands[2]));
5078 else
5079 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5080 operands[2]));
5081 DONE;
5082 }
5083)
5084
c86a3039
JW
5085;; sqmovun
5086
5087(define_insn "aarch64_sqmovun<mode>"
5088 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5089 (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")]
5090 UNSPEC_SQXTUN))]
5091 "TARGET_SIMD"
5092 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5093 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5094)
5095
5096(define_insn "aarch64_sqmovun<mode>_insn_le"
5097 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5098 (vec_concat:<VNARROWQ2>
5099 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
5100 UNSPEC_SQXTUN)
5101 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
5102 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5103 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5104 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5105)
5106
5107(define_insn "aarch64_sqmovun<mode>_insn_be"
5108 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5109 (vec_concat:<VNARROWQ2>
5110 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
5111 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
5112 UNSPEC_SQXTUN)))]
5113 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5114 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5115 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5116)
5117
5118(define_expand "aarch64_sqmovun<mode>"
5119 [(set (match_operand:<VNARROWQ> 0 "register_operand")
5120 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")]
5121 UNSPEC_SQXTUN))]
5122 "TARGET_SIMD"
5123 {
5124 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
5125 if (BYTES_BIG_ENDIAN)
5126 emit_insn (gen_aarch64_sqmovun<mode>_insn_be (tmp, operands[1],
5127 CONST0_RTX (<VNARROWQ>mode)));
5128 else
5129 emit_insn (gen_aarch64_sqmovun<mode>_insn_le (tmp, operands[1],
5130 CONST0_RTX (<VNARROWQ>mode)));
5131
5132 /* The intrinsic expects a narrow result, so emit a subreg that will get
5133 optimized away as appropriate. */
5134 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
5135 <VNARROWQ2>mode));
5136 DONE;
5137 }
5138)
5139
8fdfd0cf
KT
5140(define_insn "aarch64_sqxtun2<mode>_le"
5141 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5142 (vec_concat:<VNARROWQ2>
5143 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5144 (unspec:<VNARROWQ>
778ac63f 5145 [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)))]
8fdfd0cf
KT
5146 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5147 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5148 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5149)
5150
5151(define_insn "aarch64_sqxtun2<mode>_be"
5152 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5153 (vec_concat:<VNARROWQ2>
5154 (unspec:<VNARROWQ>
778ac63f 5155 [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)
8fdfd0cf
KT
5156 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5157 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5158 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5159 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5160)
5161
5162(define_expand "aarch64_sqxtun2<mode>"
5163 [(match_operand:<VNARROWQ2> 0 "register_operand")
5164 (match_operand:<VNARROWQ> 1 "register_operand")
5165 (unspec:<VNARROWQ>
778ac63f 5166 [(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN)]
8fdfd0cf
KT
5167 "TARGET_SIMD"
5168 {
5169 if (BYTES_BIG_ENDIAN)
5170 emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5171 operands[2]));
5172 else
5173 emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5174 operands[2]));
5175 DONE;
5176 }
5177)
5178
43e9d192
IB
5179;; <su>q<absneg>
5180
5181(define_insn "aarch64_s<optab><mode>"
9551c7ec
AV
5182 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5183 (UNQOPS:VSDQ_I
5184 (match_operand:VSDQ_I 1 "register_operand" "w")))]
43e9d192
IB
5185 "TARGET_SIMD"
5186 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
78ec3036 5187 [(set_attr "type" "neon_<optab><q>")]
43e9d192
IB
5188)
5189
5190;; sq<r>dmulh.
5191
5192(define_insn "aarch64_sq<r>dmulh<mode>"
5193 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5194 (unspec:VSDQ_HSI
5195 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5196 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5197 VQDMULH))]
5198 "TARGET_SIMD"
5199 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 5200 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
43e9d192
IB
5201)
5202
a53b8229
JW
5203(define_insn "aarch64_sq<r>dmulh_n<mode>"
5204 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5205 (unspec:VDQHS
5206 [(match_operand:VDQHS 1 "register_operand" "w")
5207 (vec_duplicate:VDQHS
5208 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5209 VQDMULH))]
5210 "TARGET_SIMD"
5211 "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5212 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5213)
5214
43e9d192
IB
5215;; sq<r>dmulh_lane
5216
2a49c16d 5217(define_insn "aarch64_sq<r>dmulh_lane<mode>"
b7d7d917
TB
5218 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5219 (unspec:VDQHS
5220 [(match_operand:VDQHS 1 "register_operand" "w")
5221 (vec_select:<VEL>
5222 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5223 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5224 VQDMULH))]
5225 "TARGET_SIMD"
5226 "*
7ac29c0f 5227 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
b7d7d917 5228 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
78ec3036 5229 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
b7d7d917
TB
5230)
5231
2a49c16d 5232(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
b7d7d917
TB
5233 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5234 (unspec:VDQHS
5235 [(match_operand:VDQHS 1 "register_operand" "w")
5236 (vec_select:<VEL>
5237 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5238 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5239 VQDMULH))]
5240 "TARGET_SIMD"
5241 "*
7ac29c0f 5242 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
b7d7d917 5243 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
78ec3036 5244 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
b7d7d917
TB
5245)
5246
2a49c16d 5247(define_insn "aarch64_sq<r>dmulh_lane<mode>"
b7d7d917
TB
5248 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5249 (unspec:SD_HSI
5250 [(match_operand:SD_HSI 1 "register_operand" "w")
43e9d192 5251 (vec_select:<VEL>
278821f2 5252 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
43e9d192
IB
5253 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5254 VQDMULH))]
5255 "TARGET_SIMD"
5256 "*
7ac29c0f 5257 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
b7d7d917 5258 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
78ec3036 5259 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
43e9d192
IB
5260)
5261
2a49c16d 5262(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
d2937a2e
KT
5263 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5264 (unspec:SD_HSI
5265 [(match_operand:SD_HSI 1 "register_operand" "w")
5266 (vec_select:<VEL>
5267 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5268 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5269 VQDMULH))]
5270 "TARGET_SIMD"
5271 "*
7ac29c0f 5272 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
d2937a2e
KT
5273 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5274 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5275)
5276
57b26d65
MW
5277;; sqrdml[as]h.
5278
5279(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
5280 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5281 (unspec:VSDQ_HSI
5282 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5283 (match_operand:VSDQ_HSI 2 "register_operand" "w")
5284 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5285 SQRDMLH_AS))]
5286 "TARGET_SIMD_RDMA"
5287 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5288 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5289)
5290
5291;; sqrdml[as]h_lane.
5292
5293(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
5294 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5295 (unspec:VDQHS
5296 [(match_operand:VDQHS 1 "register_operand" "0")
5297 (match_operand:VDQHS 2 "register_operand" "w")
5298 (vec_select:<VEL>
51b3f077 5299 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
57b26d65
MW
5300 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5301 SQRDMLH_AS))]
5302 "TARGET_SIMD_RDMA"
5303 {
7ac29c0f 5304 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
57b26d65
MW
5305 return
5306 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5307 }
5308 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5309)
5310
5311(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
5312 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5313 (unspec:SD_HSI
5314 [(match_operand:SD_HSI 1 "register_operand" "0")
5315 (match_operand:SD_HSI 2 "register_operand" "w")
5316 (vec_select:<VEL>
51b3f077 5317 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
57b26d65
MW
5318 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5319 SQRDMLH_AS))]
5320 "TARGET_SIMD_RDMA"
5321 {
7ac29c0f 5322 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
57b26d65
MW
5323 return
5324 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5325 }
5326 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5327)
5328
5329;; sqrdml[as]h_laneq.
5330
5331(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
5332 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5333 (unspec:VDQHS
5334 [(match_operand:VDQHS 1 "register_operand" "0")
5335 (match_operand:VDQHS 2 "register_operand" "w")
5336 (vec_select:<VEL>
51b3f077 5337 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
57b26d65
MW
5338 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5339 SQRDMLH_AS))]
5340 "TARGET_SIMD_RDMA"
5341 {
7ac29c0f 5342 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
57b26d65
MW
5343 return
5344 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5345 }
5346 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5347)
5348
5349(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
5350 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5351 (unspec:SD_HSI
5352 [(match_operand:SD_HSI 1 "register_operand" "0")
5353 (match_operand:SD_HSI 2 "register_operand" "w")
5354 (vec_select:<VEL>
51b3f077 5355 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
57b26d65
MW
5356 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5357 SQRDMLH_AS))]
5358 "TARGET_SIMD_RDMA"
5359 {
7ac29c0f 5360 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
57b26d65
MW
5361 return
5362 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5363 }
5364 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5365)
5366
43e9d192
IB
5367;; vqdml[sa]l
5368
43705f3f 5369(define_insn "aarch64_sqdmlal<mode>"
43e9d192 5370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
43705f3f
KT
5371 (ss_plus:<VWIDE>
5372 (ss_ashift:<VWIDE>
5373 (mult:<VWIDE>
5374 (sign_extend:<VWIDE>
5375 (match_operand:VSD_HSI 2 "register_operand" "w"))
5376 (sign_extend:<VWIDE>
5377 (match_operand:VSD_HSI 3 "register_operand" "w")))
5378 (const_int 1))
5379 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5380 "TARGET_SIMD"
5381 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5382 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5383)
5384
5385(define_insn "aarch64_sqdmlsl<mode>"
5386 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5387 (ss_minus:<VWIDE>
43e9d192
IB
5388 (match_operand:<VWIDE> 1 "register_operand" "0")
5389 (ss_ashift:<VWIDE>
5390 (mult:<VWIDE>
5391 (sign_extend:<VWIDE>
5392 (match_operand:VSD_HSI 2 "register_operand" "w"))
5393 (sign_extend:<VWIDE>
5394 (match_operand:VSD_HSI 3 "register_operand" "w")))
5395 (const_int 1))))]
5396 "TARGET_SIMD"
43705f3f 5397 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
78ec3036 5398 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
43e9d192
IB
5399)
5400
5401;; vqdml[sa]l_lane
5402
43705f3f
KT
5403(define_insn "aarch64_sqdmlal_lane<mode>"
5404 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5405 (ss_plus:<VWIDE>
5406 (ss_ashift:<VWIDE>
5407 (mult:<VWIDE>
5408 (sign_extend:<VWIDE>
5409 (match_operand:VD_HSI 2 "register_operand" "w"))
3bc9db6a
JW
5410 (vec_duplicate:<VWIDE>
5411 (sign_extend:<VWIDE_S>
43705f3f
KT
5412 (vec_select:<VEL>
5413 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5414 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3bc9db6a 5415 ))
43705f3f
KT
5416 (const_int 1))
5417 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5418 "TARGET_SIMD"
5419 {
5420 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5421 return
5422 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5423 }
5424 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5425)
5426
5427(define_insn "aarch64_sqdmlsl_lane<mode>"
43e9d192 5428 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
43705f3f 5429 (ss_minus:<VWIDE>
43e9d192
IB
5430 (match_operand:<VWIDE> 1 "register_operand" "0")
5431 (ss_ashift:<VWIDE>
5432 (mult:<VWIDE>
5433 (sign_extend:<VWIDE>
5434 (match_operand:VD_HSI 2 "register_operand" "w"))
3bc9db6a
JW
5435 (vec_duplicate:<VWIDE>
5436 (sign_extend:<VWIDE_S>
43e9d192 5437 (vec_select:<VEL>
278821f2
KT
5438 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5439 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3bc9db6a 5440 ))
278821f2
KT
5441 (const_int 1))))]
5442 "TARGET_SIMD"
5443 {
7ac29c0f 5444 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
278821f2 5445 return
43705f3f 5446 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
278821f2
KT
5447 }
5448 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5449)
5450
43705f3f
KT
5451
5452(define_insn "aarch64_sqdmlsl_laneq<mode>"
278821f2 5453 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
43705f3f 5454 (ss_minus:<VWIDE>
278821f2
KT
5455 (match_operand:<VWIDE> 1 "register_operand" "0")
5456 (ss_ashift:<VWIDE>
5457 (mult:<VWIDE>
5458 (sign_extend:<VWIDE>
5459 (match_operand:VD_HSI 2 "register_operand" "w"))
3bc9db6a
JW
5460 (vec_duplicate:<VWIDE>
5461 (sign_extend:<VWIDE_S>
278821f2
KT
5462 (vec_select:<VEL>
5463 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
43e9d192 5464 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3bc9db6a 5465 ))
43e9d192
IB
5466 (const_int 1))))]
5467 "TARGET_SIMD"
1dd055a2 5468 {
7ac29c0f 5469 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
1dd055a2 5470 return
43705f3f 5471 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
1dd055a2 5472 }
78ec3036 5473 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
5474)
5475
43705f3f 5476(define_insn "aarch64_sqdmlal_laneq<mode>"
43e9d192 5477 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
43705f3f
KT
5478 (ss_plus:<VWIDE>
5479 (ss_ashift:<VWIDE>
5480 (mult:<VWIDE>
5481 (sign_extend:<VWIDE>
5482 (match_operand:VD_HSI 2 "register_operand" "w"))
3bc9db6a
JW
5483 (vec_duplicate:<VWIDE>
5484 (sign_extend:<VWIDE_S>
43705f3f
KT
5485 (vec_select:<VEL>
5486 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5487 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3bc9db6a 5488 ))
43705f3f
KT
5489 (const_int 1))
5490 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5491 "TARGET_SIMD"
5492 {
5493 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5494 return
5495 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5496 }
5497 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5498)
5499
5500
5501(define_insn "aarch64_sqdmlal_lane<mode>"
5502 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5503 (ss_plus:<VWIDE>
5504 (ss_ashift:<VWIDE>
5505 (mult:<VWIDE>
5506 (sign_extend:<VWIDE>
5507 (match_operand:SD_HSI 2 "register_operand" "w"))
5508 (sign_extend:<VWIDE>
5509 (vec_select:<VEL>
5510 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5511 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5512 )
5513 (const_int 1))
5514 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5515 "TARGET_SIMD"
5516 {
5517 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5518 return
5519 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5520 }
5521 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5522)
5523
5524(define_insn "aarch64_sqdmlsl_lane<mode>"
5525 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5526 (ss_minus:<VWIDE>
43e9d192
IB
5527 (match_operand:<VWIDE> 1 "register_operand" "0")
5528 (ss_ashift:<VWIDE>
5529 (mult:<VWIDE>
5530 (sign_extend:<VWIDE>
5531 (match_operand:SD_HSI 2 "register_operand" "w"))
5532 (sign_extend:<VWIDE>
5533 (vec_select:<VEL>
278821f2
KT
5534 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5535 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5536 )
5537 (const_int 1))))]
5538 "TARGET_SIMD"
5539 {
7ac29c0f 5540 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
278821f2 5541 return
43705f3f
KT
5542 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5543 }
5544 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5545)
5546
5547
5548(define_insn "aarch64_sqdmlal_laneq<mode>"
5549 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5550 (ss_plus:<VWIDE>
5551 (ss_ashift:<VWIDE>
5552 (mult:<VWIDE>
5553 (sign_extend:<VWIDE>
5554 (match_operand:SD_HSI 2 "register_operand" "w"))
5555 (sign_extend:<VWIDE>
5556 (vec_select:<VEL>
5557 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5558 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5559 )
5560 (const_int 1))
5561 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5562 "TARGET_SIMD"
5563 {
5564 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5565 return
5566 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
278821f2
KT
5567 }
5568 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5569)
5570
43705f3f 5571(define_insn "aarch64_sqdmlsl_laneq<mode>"
278821f2 5572 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
43705f3f 5573 (ss_minus:<VWIDE>
278821f2
KT
5574 (match_operand:<VWIDE> 1 "register_operand" "0")
5575 (ss_ashift:<VWIDE>
5576 (mult:<VWIDE>
5577 (sign_extend:<VWIDE>
5578 (match_operand:SD_HSI 2 "register_operand" "w"))
5579 (sign_extend:<VWIDE>
5580 (vec_select:<VEL>
5581 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
43e9d192
IB
5582 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5583 )
5584 (const_int 1))))]
5585 "TARGET_SIMD"
1dd055a2 5586 {
7ac29c0f 5587 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
1dd055a2 5588 return
43705f3f 5589 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
1dd055a2 5590 }
78ec3036 5591 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
5592)
5593
43e9d192
IB
5594;; vqdml[sa]l_n
5595
43705f3f 5596(define_insn "aarch64_sqdmlsl_n<mode>"
43e9d192 5597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
43705f3f 5598 (ss_minus:<VWIDE>
43e9d192
IB
5599 (match_operand:<VWIDE> 1 "register_operand" "0")
5600 (ss_ashift:<VWIDE>
5601 (mult:<VWIDE>
5602 (sign_extend:<VWIDE>
5603 (match_operand:VD_HSI 2 "register_operand" "w"))
3bc9db6a
JW
5604 (vec_duplicate:<VWIDE>
5605 (sign_extend:<VWIDE_S>
1c83b673 5606 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
43e9d192
IB
5607 (const_int 1))))]
5608 "TARGET_SIMD"
43705f3f 5609 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
78ec3036 5610 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
5611)
5612
43705f3f
KT
5613(define_insn "aarch64_sqdmlal_n<mode>"
5614 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5615 (ss_plus:<VWIDE>
5616 (ss_ashift:<VWIDE>
5617 (mult:<VWIDE>
5618 (sign_extend:<VWIDE>
5619 (match_operand:VD_HSI 2 "register_operand" "w"))
3bc9db6a
JW
5620 (vec_duplicate:<VWIDE>
5621 (sign_extend:<VWIDE_S>
43705f3f
KT
5622 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5623 (const_int 1))
5624 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5625 "TARGET_SIMD"
5626 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5627 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5628)
5629
5630
43e9d192
IB
5631;; sqdml[as]l2
5632
43705f3f
KT
5633(define_insn "aarch64_sqdmlal2<mode>_internal"
5634 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5635 (ss_plus:<VWIDE>
5636 (ss_ashift:<VWIDE>
5637 (mult:<VWIDE>
5638 (sign_extend:<VWIDE>
5639 (vec_select:<VHALF>
5640 (match_operand:VQ_HSI 2 "register_operand" "w")
5641 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5642 (sign_extend:<VWIDE>
5643 (vec_select:<VHALF>
5644 (match_operand:VQ_HSI 3 "register_operand" "w")
5645 (match_dup 4))))
5646 (const_int 1))
5647 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5648 "TARGET_SIMD"
5649 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5650 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5651)
5652
5653(define_insn "aarch64_sqdmlsl2<mode>_internal"
43e9d192 5654 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
43705f3f 5655 (ss_minus:<VWIDE>
43e9d192
IB
5656 (match_operand:<VWIDE> 1 "register_operand" "0")
5657 (ss_ashift:<VWIDE>
5658 (mult:<VWIDE>
5659 (sign_extend:<VWIDE>
5660 (vec_select:<VHALF>
5661 (match_operand:VQ_HSI 2 "register_operand" "w")
5662 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5663 (sign_extend:<VWIDE>
5664 (vec_select:<VHALF>
5665 (match_operand:VQ_HSI 3 "register_operand" "w")
5666 (match_dup 4))))
5667 (const_int 1))))]
5668 "TARGET_SIMD"
43705f3f 5669 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
78ec3036 5670 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
5671)
5672
543c0cbc 5673(define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
1bbffb87 5674 [(match_operand:<VWIDE> 0 "register_operand")
543c0cbc
KT
5675 (SBINQOPS:<VWIDE>
5676 (match_operand:<VWIDE> 1 "register_operand")
5677 (match_dup 1))
1bbffb87
DZ
5678 (match_operand:VQ_HSI 2 "register_operand")
5679 (match_operand:VQ_HSI 3 "register_operand")]
43e9d192
IB
5680 "TARGET_SIMD"
5681{
f5cbabc1 5682 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
543c0cbc
KT
5683 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
5684 operands[1], operands[2],
5685 operands[3], p));
43e9d192
IB
5686 DONE;
5687})
5688
5689;; vqdml[sa]l2_lane
5690
ff3809b4 5691(define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
43e9d192 5692 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
ff3809b4 5693 (ss_minus:<VWIDE>
43e9d192
IB
5694 (match_operand:<VWIDE> 1 "register_operand" "0")
5695 (ss_ashift:<VWIDE>
5696 (mult:<VWIDE>
5697 (sign_extend:<VWIDE>
3bc9db6a
JW
5698 (vec_select:<VHALF>
5699 (match_operand:VQ_HSI 2 "register_operand" "w")
5700 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5701 (vec_duplicate:<VWIDE>
5702 (sign_extend:<VWIDE_S>
43e9d192 5703 (vec_select:<VEL>
278821f2
KT
5704 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5705 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5706 ))))
5707 (const_int 1))))]
5708 "TARGET_SIMD"
5709 {
7ac29c0f 5710 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
278821f2 5711 return
ff3809b4
KT
5712 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5713 }
5714 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5715)
5716
5717(define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5718 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 5719 (ss_plus:<VWIDE>
ff3809b4
KT
5720 (ss_ashift:<VWIDE>
5721 (mult:<VWIDE>
5722 (sign_extend:<VWIDE>
3bc9db6a
JW
5723 (vec_select:<VHALF>
5724 (match_operand:VQ_HSI 2 "register_operand" "w")
5725 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5726 (vec_duplicate:<VWIDE>
5727 (sign_extend:<VWIDE_S>
ff3809b4
KT
5728 (vec_select:<VEL>
5729 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5730 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5731 ))))
5732 (const_int 1))
5733 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5734 "TARGET_SIMD"
5735 {
5736 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5737 return
5738 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
278821f2
KT
5739 }
5740 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5741)
5742
ff3809b4 5743(define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
278821f2 5744 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 5745 (ss_minus:<VWIDE>
278821f2
KT
5746 (match_operand:<VWIDE> 1 "register_operand" "0")
5747 (ss_ashift:<VWIDE>
5748 (mult:<VWIDE>
5749 (sign_extend:<VWIDE>
3bc9db6a
JW
5750 (vec_select:<VHALF>
5751 (match_operand:VQ_HSI 2 "register_operand" "w")
5752 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5753 (vec_duplicate:<VWIDE>
5754 (sign_extend:<VWIDE_S>
278821f2
KT
5755 (vec_select:<VEL>
5756 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
43e9d192
IB
5757 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5758 ))))
5759 (const_int 1))))]
5760 "TARGET_SIMD"
1dd055a2 5761 {
7ac29c0f 5762 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
1dd055a2 5763 return
ff3809b4
KT
5764 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5765 }
5766 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5767)
5768
5769(define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5770 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 5771 (ss_plus:<VWIDE>
ff3809b4
KT
5772 (ss_ashift:<VWIDE>
5773 (mult:<VWIDE>
5774 (sign_extend:<VWIDE>
3bc9db6a
JW
5775 (vec_select:<VHALF>
5776 (match_operand:VQ_HSI 2 "register_operand" "w")
5777 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5778 (vec_duplicate:<VWIDE>
5779 (sign_extend:<VWIDE_S>
ff3809b4
KT
5780 (vec_select:<VEL>
5781 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5782 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5783 ))))
5784 (const_int 1))
5785 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5786 "TARGET_SIMD"
5787 {
5788 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5789 return
5790 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
1dd055a2 5791 }
78ec3036 5792 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
5793)
5794
543c0cbc 5795(define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
1bbffb87 5796 [(match_operand:<VWIDE> 0 "register_operand")
543c0cbc
KT
5797 (SBINQOPS:<VWIDE>
5798 (match_operand:<VWIDE> 1 "register_operand")
5799 (match_dup 1))
1bbffb87
DZ
5800 (match_operand:VQ_HSI 2 "register_operand")
5801 (match_operand:<VCOND> 3 "register_operand")
5802 (match_operand:SI 4 "immediate_operand")]
43e9d192
IB
5803 "TARGET_SIMD"
5804{
f5cbabc1 5805 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
543c0cbc
KT
5806 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
5807 operands[1], operands[2],
5808 operands[3], operands[4], p));
43e9d192
IB
5809 DONE;
5810})
5811
543c0cbc 5812(define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
1bbffb87 5813 [(match_operand:<VWIDE> 0 "register_operand")
543c0cbc
KT
5814 (SBINQOPS:<VWIDE>
5815 (match_operand:<VWIDE> 1 "register_operand")
5816 (match_dup 1))
1bbffb87
DZ
5817 (match_operand:VQ_HSI 2 "register_operand")
5818 (match_operand:<VCONQ> 3 "register_operand")
5819 (match_operand:SI 4 "immediate_operand")]
43e9d192
IB
5820 "TARGET_SIMD"
5821{
f5cbabc1 5822 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
543c0cbc
KT
5823 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
5824 operands[1], operands[2],
5825 operands[3], operands[4], p));
43e9d192
IB
5826 DONE;
5827})
5828
ff3809b4 5829(define_insn "aarch64_sqdmlsl2_n<mode>_internal"
43e9d192 5830 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 5831 (ss_minus:<VWIDE>
43e9d192
IB
5832 (match_operand:<VWIDE> 1 "register_operand" "0")
5833 (ss_ashift:<VWIDE>
5834 (mult:<VWIDE>
5835 (sign_extend:<VWIDE>
3bc9db6a
JW
5836 (vec_select:<VHALF>
5837 (match_operand:VQ_HSI 2 "register_operand" "w")
5838 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5839 (vec_duplicate:<VWIDE>
5840 (sign_extend:<VWIDE_S>
1c83b673 5841 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
43e9d192
IB
5842 (const_int 1))))]
5843 "TARGET_SIMD"
ff3809b4
KT
5844 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5845 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5846)
5847
5848(define_insn "aarch64_sqdmlal2_n<mode>_internal"
5849 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 5850 (ss_plus:<VWIDE>
ff3809b4
KT
5851 (ss_ashift:<VWIDE>
5852 (mult:<VWIDE>
5853 (sign_extend:<VWIDE>
3bc9db6a
JW
5854 (vec_select:<VHALF>
5855 (match_operand:VQ_HSI 2 "register_operand" "w")
5856 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5857 (vec_duplicate:<VWIDE>
5858 (sign_extend:<VWIDE_S>
ff3809b4
KT
5859 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5860 (const_int 1))
5861 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5862 "TARGET_SIMD"
5863 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
78ec3036 5864 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
43e9d192
IB
5865)
5866
543c0cbc 5867(define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
1bbffb87 5868 [(match_operand:<VWIDE> 0 "register_operand")
543c0cbc
KT
5869 (SBINQOPS:<VWIDE>
5870 (match_operand:<VWIDE> 1 "register_operand")
5871 (match_dup 1))
1bbffb87
DZ
5872 (match_operand:VQ_HSI 2 "register_operand")
5873 (match_operand:<VEL> 3 "register_operand")]
43e9d192
IB
5874 "TARGET_SIMD"
5875{
f5cbabc1 5876 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
543c0cbc
KT
5877 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
5878 operands[1], operands[2],
5879 operands[3], p));
43e9d192
IB
5880 DONE;
5881})
5882
5883;; vqdmull
5884
5885(define_insn "aarch64_sqdmull<mode>"
5886 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5887 (ss_ashift:<VWIDE>
5888 (mult:<VWIDE>
5889 (sign_extend:<VWIDE>
5890 (match_operand:VSD_HSI 1 "register_operand" "w"))
5891 (sign_extend:<VWIDE>
5892 (match_operand:VSD_HSI 2 "register_operand" "w")))
5893 (const_int 1)))]
5894 "TARGET_SIMD"
5895 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 5896 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
43e9d192
IB
5897)
5898
5899;; vqdmull_lane
5900
2a49c16d 5901(define_insn "aarch64_sqdmull_lane<mode>"
43e9d192 5902 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 5903 (ss_ashift:<VWIDE>
43e9d192
IB
5904 (mult:<VWIDE>
5905 (sign_extend:<VWIDE>
5906 (match_operand:VD_HSI 1 "register_operand" "w"))
3bc9db6a
JW
5907 (vec_duplicate:<VWIDE>
5908 (sign_extend:<VWIDE_S>
5909 (vec_select:<VEL>
278821f2
KT
5910 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5911 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5912 ))
5913 (const_int 1)))]
5914 "TARGET_SIMD"
5915 {
7ac29c0f 5916 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
278821f2
KT
5917 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5918 }
5919 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5920)
5921
2a49c16d 5922(define_insn "aarch64_sqdmull_laneq<mode>"
278821f2 5923 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 5924 (ss_ashift:<VWIDE>
278821f2
KT
5925 (mult:<VWIDE>
5926 (sign_extend:<VWIDE>
5927 (match_operand:VD_HSI 1 "register_operand" "w"))
3bc9db6a
JW
5928 (vec_duplicate:<VWIDE>
5929 (sign_extend:<VWIDE_S>
5930 (vec_select:<VEL>
278821f2 5931 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
43e9d192
IB
5932 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5933 ))
5934 (const_int 1)))]
5935 "TARGET_SIMD"
1dd055a2 5936 {
7ac29c0f 5937 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
1dd055a2
JG
5938 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5939 }
78ec3036 5940 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
5941)
5942
2a49c16d 5943(define_insn "aarch64_sqdmull_lane<mode>"
43e9d192
IB
5944 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5945 (ss_ashift:<VWIDE>
5946 (mult:<VWIDE>
5947 (sign_extend:<VWIDE>
5948 (match_operand:SD_HSI 1 "register_operand" "w"))
5949 (sign_extend:<VWIDE>
5950 (vec_select:<VEL>
278821f2
KT
5951 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5952 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
5953 ))
5954 (const_int 1)))]
5955 "TARGET_SIMD"
5956 {
7ac29c0f 5957 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
278821f2
KT
5958 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5959 }
5960 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5961)
5962
2a49c16d 5963(define_insn "aarch64_sqdmull_laneq<mode>"
278821f2
KT
5964 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5965 (ss_ashift:<VWIDE>
5966 (mult:<VWIDE>
5967 (sign_extend:<VWIDE>
5968 (match_operand:SD_HSI 1 "register_operand" "w"))
5969 (sign_extend:<VWIDE>
5970 (vec_select:<VEL>
5971 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
43e9d192
IB
5972 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
5973 ))
5974 (const_int 1)))]
5975 "TARGET_SIMD"
1dd055a2 5976 {
7ac29c0f 5977 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
1dd055a2
JG
5978 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5979 }
78ec3036 5980 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
5981)
5982
43e9d192
IB
5983;; vqdmull_n
5984
5985(define_insn "aarch64_sqdmull_n<mode>"
5986 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 5987 (ss_ashift:<VWIDE>
43e9d192
IB
5988 (mult:<VWIDE>
5989 (sign_extend:<VWIDE>
5990 (match_operand:VD_HSI 1 "register_operand" "w"))
3bc9db6a
JW
5991 (vec_duplicate:<VWIDE>
5992 (sign_extend:<VWIDE_S>
5993 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
43e9d192
IB
5994 )
5995 (const_int 1)))]
5996 "TARGET_SIMD"
5997 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
78ec3036 5998 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
5999)
6000
6001;; vqdmull2
6002
43e9d192
IB
6003(define_insn "aarch64_sqdmull2<mode>_internal"
6004 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6005 (ss_ashift:<VWIDE>
6006 (mult:<VWIDE>
6007 (sign_extend:<VWIDE>
6008 (vec_select:<VHALF>
6009 (match_operand:VQ_HSI 1 "register_operand" "w")
6010 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6011 (sign_extend:<VWIDE>
6012 (vec_select:<VHALF>
6013 (match_operand:VQ_HSI 2 "register_operand" "w")
6014 (match_dup 3)))
6015 )
6016 (const_int 1)))]
6017 "TARGET_SIMD"
6018 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 6019 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
6020)
6021
6022(define_expand "aarch64_sqdmull2<mode>"
1bbffb87
DZ
6023 [(match_operand:<VWIDE> 0 "register_operand")
6024 (match_operand:VQ_HSI 1 "register_operand")
6025 (match_operand:VQ_HSI 2 "register_operand")]
43e9d192
IB
6026 "TARGET_SIMD"
6027{
f5cbabc1 6028 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
6029 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6030 operands[2], p));
6031 DONE;
6032})
6033
6034;; vqdmull2_lane
6035
6036(define_insn "aarch64_sqdmull2_lane<mode>_internal"
6037 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 6038 (ss_ashift:<VWIDE>
43e9d192
IB
6039 (mult:<VWIDE>
6040 (sign_extend:<VWIDE>
6041 (vec_select:<VHALF>
3bc9db6a
JW
6042 (match_operand:VQ_HSI 1 "register_operand" "w")
6043 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6044 (vec_duplicate:<VWIDE>
6045 (sign_extend:<VWIDE_S>
6046 (vec_select:<VEL>
278821f2
KT
6047 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6048 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6049 ))
6050 (const_int 1)))]
6051 "TARGET_SIMD"
6052 {
7ac29c0f 6053 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
278821f2
KT
6054 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6055 }
6056 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6057)
6058
6059(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6060 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 6061 (ss_ashift:<VWIDE>
278821f2
KT
6062 (mult:<VWIDE>
6063 (sign_extend:<VWIDE>
6064 (vec_select:<VHALF>
3bc9db6a
JW
6065 (match_operand:VQ_HSI 1 "register_operand" "w")
6066 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6067 (vec_duplicate:<VWIDE>
6068 (sign_extend:<VWIDE_S>
6069 (vec_select:<VEL>
278821f2 6070 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
43e9d192
IB
6071 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6072 ))
6073 (const_int 1)))]
6074 "TARGET_SIMD"
1dd055a2 6075 {
7ac29c0f 6076 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
1dd055a2
JG
6077 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6078 }
78ec3036 6079 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
6080)
6081
6082(define_expand "aarch64_sqdmull2_lane<mode>"
1bbffb87
DZ
6083 [(match_operand:<VWIDE> 0 "register_operand")
6084 (match_operand:VQ_HSI 1 "register_operand")
6085 (match_operand:<VCOND> 2 "register_operand")
6086 (match_operand:SI 3 "immediate_operand")]
43e9d192
IB
6087 "TARGET_SIMD"
6088{
f5cbabc1 6089 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
6090 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6091 operands[2], operands[3],
6092 p));
6093 DONE;
6094})
6095
6096(define_expand "aarch64_sqdmull2_laneq<mode>"
1bbffb87
DZ
6097 [(match_operand:<VWIDE> 0 "register_operand")
6098 (match_operand:VQ_HSI 1 "register_operand")
6099 (match_operand:<VCONQ> 2 "register_operand")
6100 (match_operand:SI 3 "immediate_operand")]
43e9d192
IB
6101 "TARGET_SIMD"
6102{
f5cbabc1 6103 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
278821f2 6104 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
43e9d192
IB
6105 operands[2], operands[3],
6106 p));
6107 DONE;
6108})
6109
6110;; vqdmull2_n
6111
6112(define_insn "aarch64_sqdmull2_n<mode>_internal"
6113 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3bc9db6a 6114 (ss_ashift:<VWIDE>
43e9d192
IB
6115 (mult:<VWIDE>
6116 (sign_extend:<VWIDE>
6117 (vec_select:<VHALF>
3bc9db6a
JW
6118 (match_operand:VQ_HSI 1 "register_operand" "w")
6119 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6120 (vec_duplicate:<VWIDE>
6121 (sign_extend:<VWIDE_S>
6122 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
43e9d192
IB
6123 )
6124 (const_int 1)))]
6125 "TARGET_SIMD"
6126 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
78ec3036 6127 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
43e9d192
IB
6128)
6129
6130(define_expand "aarch64_sqdmull2_n<mode>"
1bbffb87
DZ
6131 [(match_operand:<VWIDE> 0 "register_operand")
6132 (match_operand:VQ_HSI 1 "register_operand")
6133 (match_operand:<VEL> 2 "register_operand")]
43e9d192
IB
6134 "TARGET_SIMD"
6135{
f5cbabc1 6136 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
43e9d192
IB
6137 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6138 operands[2], p));
6139 DONE;
6140})
6141
6142;; vshl
6143
6144(define_insn "aarch64_<sur>shl<mode>"
6145 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6146 (unspec:VSDQ_I_DI
6147 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6148 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6149 VSHL))]
6150 "TARGET_SIMD"
6151 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
78ec3036 6152 [(set_attr "type" "neon_shift_reg<q>")]
43e9d192
IB
6153)
6154
6155
6156;; vqshl
6157
6158(define_insn "aarch64_<sur>q<r>shl<mode>"
6159 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6160 (unspec:VSDQ_I
6161 [(match_operand:VSDQ_I 1 "register_operand" "w")
6162 (match_operand:VSDQ_I 2 "register_operand" "w")]
6163 VQSHL))]
6164 "TARGET_SIMD"
6165 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
78ec3036 6166 [(set_attr "type" "neon_sat_shift_reg<q>")]
43e9d192
IB
6167)
6168
27842e2a
JH
6169(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
6170 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6171 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6172 (match_operand:SI 2
6173 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6174 VSHLL))]
6175 "TARGET_SIMD"
6176 {
6177 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
6178 emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
6179 p, operands[2]));
6180 DONE;
6181 }
6182)
6183
6184(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
6185 [(set (match_operand:<VWIDE> 0 "register_operand")
6186 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6187 (match_operand:SI 2
6188 "immediate_operand" "i")]
6189 VSHLL))]
6190 "TARGET_SIMD"
6191 {
6192 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6193 emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
6194 p, operands[2]));
6195 DONE;
6196 }
6197)
6198
43e9d192
IB
6199;; vshll_n
6200
27842e2a
JH
6201(define_insn "aarch64_<sur>shll<mode>_internal"
6202 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6203 (unspec:<VWIDE> [(vec_select:<VHALF>
6204 (match_operand:VQW 1 "register_operand" "w")
6205 (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
6206 (match_operand:SI 3
6207 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6208 VSHLL))]
6209 "TARGET_SIMD"
6210 {
6211 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6212 return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
6213 else
6214 return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
6215 }
6216 [(set_attr "type" "neon_shift_imm_long")]
6217)
6218
6219(define_insn "aarch64_<sur>shll2<mode>_internal"
6220 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6221 (unspec:<VWIDE> [(vec_select:<VHALF>
6222 (match_operand:VQW 1 "register_operand" "w")
6223 (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
6224 (match_operand:SI 3
6225 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6226 VSHLL))]
6227 "TARGET_SIMD"
6228 {
6229 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6230 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
6231 else
6232 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
6233 }
6234 [(set_attr "type" "neon_shift_imm_long")]
6235)
6236
43e9d192
IB
6237(define_insn "aarch64_<sur>shll_n<mode>"
6238 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
a844a695 6239 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
cb23a30c
JG
6240 (match_operand:SI 2
6241 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
43e9d192
IB
6242 VSHLL))]
6243 "TARGET_SIMD"
43e9d192 6244 {
10c619de
KT
6245 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6246 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
6247 else
6248 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
43e9d192 6249 }
78ec3036 6250 [(set_attr "type" "neon_shift_imm_long")]
43e9d192
IB
6251)
6252
6253;; vshll_high_n
6254
6255(define_insn "aarch64_<sur>shll2_n<mode>"
6256 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6257 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6258 (match_operand:SI 2 "immediate_operand" "i")]
6259 VSHLL))]
6260 "TARGET_SIMD"
43e9d192 6261 {
10c619de
KT
6262 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6263 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
6264 else
6265 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
43e9d192 6266 }
78ec3036 6267 [(set_attr "type" "neon_shift_imm_long")]
43e9d192
IB
6268)
6269
43e9d192
IB
6270;; vrshr_n
6271
6272(define_insn "aarch64_<sur>shr_n<mode>"
6273 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6274 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
cb23a30c
JG
6275 (match_operand:SI 2
6276 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
43e9d192
IB
6277 VRSHR_N))]
6278 "TARGET_SIMD"
cb23a30c 6279 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
78ec3036 6280 [(set_attr "type" "neon_sat_shift_imm<q>")]
43e9d192
IB
6281)
6282
6283;; v(r)sra_n
6284
6285(define_insn "aarch64_<sur>sra_n<mode>"
6286 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6287 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6288 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
cb23a30c
JG
6289 (match_operand:SI 3
6290 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
43e9d192
IB
6291 VSRA))]
6292 "TARGET_SIMD"
cb23a30c 6293 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
78ec3036 6294 [(set_attr "type" "neon_shift_acc<q>")]
43e9d192
IB
6295)
6296
6297;; vs<lr>i_n
6298
6299(define_insn "aarch64_<sur>s<lr>i_n<mode>"
6300 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6301 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6302 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
cb23a30c
JG
6303 (match_operand:SI 3
6304 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
43e9d192
IB
6305 VSLRI))]
6306 "TARGET_SIMD"
cb23a30c 6307 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
78ec3036 6308 [(set_attr "type" "neon_shift_imm<q>")]
43e9d192
IB
6309)
6310
6311;; vqshl(u)
6312
6313(define_insn "aarch64_<sur>qshl<u>_n<mode>"
6314 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6315 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
cb23a30c
JG
6316 (match_operand:SI 2
6317 "aarch64_simd_shift_imm_<ve_mode>" "i")]
43e9d192
IB
6318 VQSHL_N))]
6319 "TARGET_SIMD"
cb23a30c 6320 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
78ec3036 6321 [(set_attr "type" "neon_sat_shift_imm<q>")]
43e9d192
IB
6322)
6323
6324
6325;; vq(r)shr(u)n_n
6326
6327(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
6328 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
ddbdb9a3 6329 (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
cb23a30c
JG
6330 (match_operand:SI 2
6331 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
43e9d192
IB
6332 VQSHRN_N))]
6333 "TARGET_SIMD"
cb23a30c 6334 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
78ec3036 6335 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
43e9d192
IB
6336)
6337
ddbdb9a3
JW
6338(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
6339 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6340 (vec_concat:<VNARROWQ2>
6341 (unspec:<VNARROWQ>
6342 [(match_operand:VQN 1 "register_operand" "w")
6343 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6344 VQSHRN_N)
6345 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
6346 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6347 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6348 [(set_attr "type" "neon_shift_imm_narrow_q")]
6349)
6350
6351(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
6352 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6353 (vec_concat:<VNARROWQ2>
6354 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
6355 (unspec:<VNARROWQ>
6356 [(match_operand:VQN 1 "register_operand" "w")
6357 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6358 VQSHRN_N)))]
6359 "TARGET_SIMD && BYTES_BIG_ENDIAN"
6360 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6361 [(set_attr "type" "neon_shift_imm_narrow_q")]
6362)
6363
6364(define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>"
6365 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6366 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
6367 (match_operand:SI 2
6368 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6369 VQSHRN_N))]
6370 "TARGET_SIMD"
6371 {
6372 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6373 INTVAL (operands[2]));
6374 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
6375 if (BYTES_BIG_ENDIAN)
6376 emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
6377 operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
6378 else
6379 emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
6380 operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
6381
6382 /* The intrinsic expects a narrow result, so emit a subreg that will get
6383 optimized away as appropriate. */
6384 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
6385 <VNARROWQ2>mode));
6386 DONE;
6387 }
6388)
6389
4e26303e 6390(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le"
05f1883c 6391 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4e26303e
JW
6392 (vec_concat:<VNARROWQ2>
6393 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6394 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
6395 (match_operand:VQN 3
6396 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6397 VQSHRN_N)))]
6398 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
05f1883c
DC
6399 "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6400 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6401)
6402
4e26303e
JW
6403(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be"
6404 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6405 (vec_concat:<VNARROWQ2>
6406 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
6407 (match_operand:VQN 3
6408 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6409 VQSHRN_N)
6410 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6411 "TARGET_SIMD && BYTES_BIG_ENDIAN"
6412 "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6413 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6414)
6415
6416(define_expand "aarch64_<sur>q<r>shr<u>n2_n<mode>"
6417 [(match_operand:<VNARROWQ2> 0 "register_operand")
6418 (match_operand:<VNARROWQ> 1 "register_operand")
6419 (unspec:<VNARROWQ>
6420 [(match_operand:VQN 2 "register_operand")
6421 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6422 VQSHRN_N)]
6423 "TARGET_SIMD"
6424 {
6425 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6426 INTVAL (operands[3]));
6427
6428 if (BYTES_BIG_ENDIAN)
6429 emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be (operands[0],
6430 operands[1], operands[2], operands[3]));
6431 else
6432 emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le (operands[0],
6433 operands[1], operands[2], operands[3]));
6434 DONE;
6435 }
6436)
6437
43e9d192 6438
889b9412
JG
6439;; cm(eq|ge|gt|lt|le)
6440;; Note, we have constraints for Dz and Z as different expanders
6441;; have different ideas of what should be passed to this pattern.
43e9d192 6442
889b9412 6443(define_insn "aarch64_cm<optab><mode>"
5f565314
RS
6444 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
6445 (neg:<V_INT_EQUIV>
6446 (COMPARISONS:<V_INT_EQUIV>
a844a695
AL
6447 (match_operand:VDQ_I 1 "register_operand" "w,w")
6448 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
889b9412 6449 )))]
43e9d192
IB
6450 "TARGET_SIMD"
6451 "@
889b9412
JG
6452 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6453 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
78ec3036 6454 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
43e9d192
IB
6455)
6456
889b9412
JG
6457(define_insn_and_split "aarch64_cm<optab>di"
6458 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
6459 (neg:DI
6460 (COMPARISONS:DI
6461 (match_operand:DI 1 "register_operand" "w,w,r")
6462 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
75c7257f
JG
6463 )))
6464 (clobber (reg:CC CC_REGNUM))]
889b9412 6465 "TARGET_SIMD"
110e1ccc 6466 "#"
6acc5948 6467 "&& reload_completed"
110e1ccc
JG
6468 [(set (match_operand:DI 0 "register_operand")
6469 (neg:DI
6470 (COMPARISONS:DI
6471 (match_operand:DI 1 "register_operand")
6472 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
6473 )))]
889b9412 6474 {
110e1ccc
JG
6475 /* If we are in the general purpose register file,
6476 we split to a sequence of comparison and store. */
6477 if (GP_REGNUM_P (REGNO (operands[0]))
6478 && GP_REGNUM_P (REGNO (operands[1])))
6479 {
ef4bddc2 6480 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
110e1ccc
JG
6481 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6482 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6483 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6484 DONE;
6485 }
6486 /* Otherwise, we expand to a similar pattern which does not
6487 clobber CC_REGNUM. */
889b9412 6488 }
78ec3036 6489 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
889b9412
JG
6490)
6491
110e1ccc
JG
6492(define_insn "*aarch64_cm<optab>di"
6493 [(set (match_operand:DI 0 "register_operand" "=w,w")
6494 (neg:DI
6495 (COMPARISONS:DI
6496 (match_operand:DI 1 "register_operand" "w,w")
6497 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
6498 )))]
6499 "TARGET_SIMD && reload_completed"
6500 "@
6501 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
6502 cm<optab>\t%d0, %d1, #0"
6503 [(set_attr "type" "neon_compare, neon_compare_zero")]
6504)
6505
889b9412 6506;; cm(hs|hi)
43e9d192 6507
889b9412 6508(define_insn "aarch64_cm<optab><mode>"
5f565314
RS
6509 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6510 (neg:<V_INT_EQUIV>
6511 (UCOMPARISONS:<V_INT_EQUIV>
a844a695
AL
6512 (match_operand:VDQ_I 1 "register_operand" "w")
6513 (match_operand:VDQ_I 2 "register_operand" "w")
889b9412 6514 )))]
43e9d192 6515 "TARGET_SIMD"
889b9412 6516 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
78ec3036 6517 [(set_attr "type" "neon_compare<q>")]
43e9d192
IB
6518)
6519
889b9412
JG
6520(define_insn_and_split "aarch64_cm<optab>di"
6521 [(set (match_operand:DI 0 "register_operand" "=w,r")
6522 (neg:DI
6523 (UCOMPARISONS:DI
6524 (match_operand:DI 1 "register_operand" "w,r")
6525 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
75c7257f
JG
6526 )))
6527 (clobber (reg:CC CC_REGNUM))]
889b9412 6528 "TARGET_SIMD"
110e1ccc 6529 "#"
6acc5948 6530 "&& reload_completed"
110e1ccc
JG
6531 [(set (match_operand:DI 0 "register_operand")
6532 (neg:DI
6533 (UCOMPARISONS:DI
6534 (match_operand:DI 1 "register_operand")
6535 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
6536 )))]
889b9412 6537 {
110e1ccc
JG
6538 /* If we are in the general purpose register file,
6539 we split to a sequence of comparison and store. */
6540 if (GP_REGNUM_P (REGNO (operands[0]))
6541 && GP_REGNUM_P (REGNO (operands[1])))
6542 {
ef4bddc2 6543 machine_mode mode = CCmode;
110e1ccc
JG
6544 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6545 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6546 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6547 DONE;
6548 }
6549 /* Otherwise, we expand to a similar pattern which does not
6550 clobber CC_REGNUM. */
889b9412 6551 }
110e1ccc
JG
6552 [(set_attr "type" "neon_compare,multiple")]
6553)
6554
6555(define_insn "*aarch64_cm<optab>di"
6556 [(set (match_operand:DI 0 "register_operand" "=w")
6557 (neg:DI
6558 (UCOMPARISONS:DI
6559 (match_operand:DI 1 "register_operand" "w")
6560 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
6561 )))]
6562 "TARGET_SIMD && reload_completed"
6563 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
6564 [(set_attr "type" "neon_compare")]
889b9412 6565)
385eb93d 6566
889b9412
JG
6567;; cmtst
6568
ddeabd3e 6569;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
6c553b76 6570;; we don't have any insns using ne, and aarch64_vcond outputs
ddeabd3e
AL
6571;; not (neg (eq (and x y) 0))
6572;; which is rewritten by simplify_rtx as
6573;; plus (eq (and x y) 0) -1.
6574
889b9412 6575(define_insn "aarch64_cmtst<mode>"
5f565314
RS
6576 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6577 (plus:<V_INT_EQUIV>
6578 (eq:<V_INT_EQUIV>
a844a695
AL
6579 (and:VDQ_I
6580 (match_operand:VDQ_I 1 "register_operand" "w")
6581 (match_operand:VDQ_I 2 "register_operand" "w"))
6582 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
5f565314 6583 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
ddeabd3e 6584 ]
889b9412
JG
6585 "TARGET_SIMD"
6586 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 6587 [(set_attr "type" "neon_tst<q>")]
889b9412
JG
6588)
6589
3db4440d
TC
6590;; One can also get a cmtsts by having to combine a
6591;; not (neq (eq x 0)) in which case you rewrite it to
6592;; a comparison against itself
6593
6594(define_insn "*aarch64_cmtst_same_<mode>"
6595 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6596 (plus:<V_INT_EQUIV>
6597 (eq:<V_INT_EQUIV>
6598 (match_operand:VDQ_I 1 "register_operand" "w")
6599 (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
6600 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
6601 ]
6602 "TARGET_SIMD"
6603 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
6604 [(set_attr "type" "neon_tst<q>")]
6605)
6606
889b9412
JG
6607(define_insn_and_split "aarch64_cmtstdi"
6608 [(set (match_operand:DI 0 "register_operand" "=w,r")
6609 (neg:DI
6610 (ne:DI
6611 (and:DI
6612 (match_operand:DI 1 "register_operand" "w,r")
6613 (match_operand:DI 2 "register_operand" "w,r"))
75c7257f
JG
6614 (const_int 0))))
6615 (clobber (reg:CC CC_REGNUM))]
889b9412 6616 "TARGET_SIMD"
110e1ccc 6617 "#"
6acc5948 6618 "&& reload_completed"
110e1ccc
JG
6619 [(set (match_operand:DI 0 "register_operand")
6620 (neg:DI
6621 (ne:DI
6622 (and:DI
6623 (match_operand:DI 1 "register_operand")
6624 (match_operand:DI 2 "register_operand"))
6625 (const_int 0))))]
889b9412 6626 {
110e1ccc
JG
6627 /* If we are in the general purpose register file,
6628 we split to a sequence of comparison and store. */
6629 if (GP_REGNUM_P (REGNO (operands[0]))
6630 && GP_REGNUM_P (REGNO (operands[1])))
6631 {
6632 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
ef4bddc2 6633 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
110e1ccc
JG
6634 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
6635 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
6636 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6637 DONE;
6638 }
6639 /* Otherwise, we expand to a similar pattern which does not
6640 clobber CC_REGNUM. */
889b9412 6641 }
110e1ccc
JG
6642 [(set_attr "type" "neon_tst,multiple")]
6643)
6644
6645(define_insn "*aarch64_cmtstdi"
6646 [(set (match_operand:DI 0 "register_operand" "=w")
6647 (neg:DI
6648 (ne:DI
6649 (and:DI
6650 (match_operand:DI 1 "register_operand" "w")
6651 (match_operand:DI 2 "register_operand" "w"))
6652 (const_int 0))))]
6653 "TARGET_SIMD"
6654 "cmtst\t%d0, %d1, %d2"
78ec3036 6655 [(set_attr "type" "neon_tst")]
889b9412
JG
6656)
6657
6658;; fcm(eq|ge|gt|le|lt)
6659
6660(define_insn "aarch64_cm<optab><mode>"
5f565314
RS
6661 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
6662 (neg:<V_INT_EQUIV>
6663 (COMPARISONS:<V_INT_EQUIV>
d7f33f07
JW
6664 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
6665 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
889b9412 6666 )))]
385eb93d
JG
6667 "TARGET_SIMD"
6668 "@
889b9412
JG
6669 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6670 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
daef0a8c 6671 [(set_attr "type" "neon_fp_compare_<stype><q>")]
385eb93d
JG
6672)
6673
75dd5ace
JG
6674;; fac(ge|gt)
6675;; Note we can also handle what would be fac(le|lt) by
6676;; generating fac(ge|gt).
6677
33d72b63 6678(define_insn "aarch64_fac<optab><mode>"
5f565314
RS
6679 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6680 (neg:<V_INT_EQUIV>
6681 (FAC_COMPARISONS:<V_INT_EQUIV>
68ad28c3
JW
6682 (abs:VHSDF_HSDF
6683 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
6684 (abs:VHSDF_HSDF
6685 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
75dd5ace
JG
6686 )))]
6687 "TARGET_SIMD"
6688 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
33d72b63 6689 [(set_attr "type" "neon_fp_compare_<stype><q>")]
75dd5ace
JG
6690)
6691
43e9d192
IB
6692;; addp
6693
6694(define_insn "aarch64_addp<mode>"
eb2b3602
JW
6695 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
6696 (unspec:VDQ_I
6697 [(match_operand:VDQ_I 1 "register_operand" "w")
6698 (match_operand:VDQ_I 2 "register_operand" "w")]
43e9d192
IB
6699 UNSPEC_ADDP))]
6700 "TARGET_SIMD"
6701 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
78ec3036 6702 [(set_attr "type" "neon_reduc_add<q>")]
43e9d192
IB
6703)
6704
6705(define_insn "aarch64_addpdi"
6706 [(set (match_operand:DI 0 "register_operand" "=w")
6707 (unspec:DI
6708 [(match_operand:V2DI 1 "register_operand" "w")]
6709 UNSPEC_ADDP))]
6710 "TARGET_SIMD"
6711 "addp\t%d0, %1.2d"
78ec3036 6712 [(set_attr "type" "neon_reduc_add")]
43e9d192
IB
6713)
6714
43e9d192
IB
6715;; sqrt
6716
98daafa0 6717(define_expand "sqrt<mode>2"
1bbffb87
DZ
6718 [(set (match_operand:VHSDF 0 "register_operand")
6719 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
98daafa0
EM
6720 "TARGET_SIMD"
6721{
6722 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
6723 DONE;
6724})
6725
6726(define_insn "*sqrt<mode>2"
daef0a8c
JW
6727 [(set (match_operand:VHSDF 0 "register_operand" "=w")
6728 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
43e9d192
IB
6729 "TARGET_SIMD"
6730 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
daef0a8c 6731 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
43e9d192
IB
6732)
6733
43e9d192
IB
6734;; Patterns for vector struct loads and stores.
6735
66f206b8
JW
6736(define_insn "aarch64_simd_ld2<vstruct_elt>"
6737 [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
6738 (unspec:VSTRUCT_2Q [
6739 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
6740 UNSPEC_LD2))]
43e9d192
IB
6741 "TARGET_SIMD"
6742 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
78ec3036
JG
6743 [(set_attr "type" "neon_load2_2reg<q>")]
6744)
43e9d192 6745
66f206b8
JW
6746(define_insn "aarch64_simd_ld2r<vstruct_elt>"
6747 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
6748 (unspec:VSTRUCT_2QD [
6749 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6750 UNSPEC_LD2_DUP))]
77efea31
FY
6751 "TARGET_SIMD"
6752 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6753 [(set_attr "type" "neon_load2_all_lanes<q>")]
6754)
6755
66f206b8
JW
6756(define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6757 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
6758 (unspec:VSTRUCT_2QD [
6759 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6760 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
6761 (match_operand:SI 3 "immediate_operand" "i")]
6762 UNSPEC_LD2_LANE))]
3ec1be97 6763 "TARGET_SIMD"
4d0a0237 6764 {
66f206b8
JW
6765 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6766 INTVAL (operands[3]));
4d0a0237
CB
6767 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
6768 }
3ec1be97
CB
6769 [(set_attr "type" "neon_load2_one_lane")]
6770)
6771
66f206b8
JW
6772(define_expand "vec_load_lanes<mode><vstruct_elt>"
6773 [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
6774 (unspec:VSTRUCT_2Q [
6775 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
6776 UNSPEC_LD2))]
668046d1
DS
6777 "TARGET_SIMD"
6778{
6779 if (BYTES_BIG_ENDIAN)
6780 {
66f206b8
JW
6781 rtx tmp = gen_reg_rtx (<MODE>mode);
6782 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6783 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6784 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
6785 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
668046d1
DS
6786 }
6787 else
66f206b8 6788 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
668046d1
DS
6789 DONE;
6790})
6791
66f206b8
JW
6792(define_insn "aarch64_simd_st2<vstruct_elt>"
6793 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
6794 (unspec:VSTRUCT_2Q [
6795 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
6796 UNSPEC_ST2))]
43e9d192
IB
6797 "TARGET_SIMD"
6798 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
78ec3036
JG
6799 [(set_attr "type" "neon_store2_2reg<q>")]
6800)
43e9d192 6801
aaf3de7a 6802;; RTL uses GCC vector extension indices, so flip only for assembly.
66f206b8 6803(define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
abf47511 6804 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
66f206b8
JW
6805 (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
6806 (match_operand:SI 2 "immediate_operand" "i")]
6807 UNSPEC_ST2_LANE))]
ba081b77 6808 "TARGET_SIMD"
aaf3de7a 6809 {
66f206b8
JW
6810 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6811 INTVAL (operands[2]));
aaf3de7a
AL
6812 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
6813 }
b1db706a 6814 [(set_attr "type" "neon_store2_one_lane<q>")]
ba081b77
JG
6815)
6816
66f206b8
JW
6817(define_expand "vec_store_lanes<mode><vstruct_elt>"
6818 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
6819 (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
668046d1
DS
6820 UNSPEC_ST2))]
6821 "TARGET_SIMD"
6822{
6823 if (BYTES_BIG_ENDIAN)
6824 {
66f206b8
JW
6825 rtx tmp = gen_reg_rtx (<MODE>mode);
6826 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6827 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6828 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
6829 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
668046d1
DS
6830 }
6831 else
66f206b8 6832 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
668046d1
DS
6833 DONE;
6834})
6835
66f206b8
JW
6836(define_insn "aarch64_simd_ld3<vstruct_elt>"
6837 [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
6838 (unspec:VSTRUCT_3Q [
6839 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
6840 UNSPEC_LD3))]
43e9d192
IB
6841 "TARGET_SIMD"
6842 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
78ec3036
JG
6843 [(set_attr "type" "neon_load3_3reg<q>")]
6844)
43e9d192 6845
66f206b8
JW
6846(define_insn "aarch64_simd_ld3r<vstruct_elt>"
6847 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
6848 (unspec:VSTRUCT_3QD [
6849 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6850 UNSPEC_LD3_DUP))]
77efea31
FY
6851 "TARGET_SIMD"
6852 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6853 [(set_attr "type" "neon_load3_all_lanes<q>")]
6854)
6855
66f206b8
JW
6856(define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6857 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
6858 (unspec:VSTRUCT_3QD [
6859 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6860 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
6861 (match_operand:SI 3 "immediate_operand" "i")]
6862 UNSPEC_LD3_LANE))]
3ec1be97 6863 "TARGET_SIMD"
4d0a0237 6864{
66f206b8
JW
6865 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6866 INTVAL (operands[3]));
4d0a0237
CB
6867 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
6868}
3ec1be97
CB
6869 [(set_attr "type" "neon_load3_one_lane")]
6870)
6871
66f206b8
JW
6872(define_expand "vec_load_lanes<mode><vstruct_elt>"
6873 [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
6874 (unspec:VSTRUCT_3Q [
6875 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
6876 UNSPEC_LD3))]
668046d1
DS
6877 "TARGET_SIMD"
6878{
6879 if (BYTES_BIG_ENDIAN)
6880 {
66f206b8
JW
6881 rtx tmp = gen_reg_rtx (<MODE>mode);
6882 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6883 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6884 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
6885 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
668046d1
DS
6886 }
6887 else
66f206b8 6888 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
668046d1
DS
6889 DONE;
6890})
6891
66f206b8
JW
6892(define_insn "aarch64_simd_st3<vstruct_elt>"
6893 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
6894 (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
43e9d192
IB
6895 UNSPEC_ST3))]
6896 "TARGET_SIMD"
6897 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
78ec3036
JG
6898 [(set_attr "type" "neon_store3_3reg<q>")]
6899)
43e9d192 6900
aaf3de7a 6901;; RTL uses GCC vector extension indices, so flip only for assembly.
66f206b8 6902(define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
f4720b94 6903 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
66f206b8 6904 (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
f4720b94 6905 (match_operand:SI 2 "immediate_operand" "i")]
66f206b8 6906 UNSPEC_ST3_LANE))]
ba081b77 6907 "TARGET_SIMD"
aaf3de7a 6908 {
66f206b8
JW
6909 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6910 INTVAL (operands[2]));
aaf3de7a
AL
6911 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
6912 }
ba081b77
JG
6913 [(set_attr "type" "neon_store3_one_lane<q>")]
6914)
6915
66f206b8
JW
6916(define_expand "vec_store_lanes<mode><vstruct_elt>"
6917 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
6918 (unspec:VSTRUCT_3Q [
6919 (match_operand:VSTRUCT_3Q 1 "register_operand")]
6920 UNSPEC_ST3))]
668046d1
DS
6921 "TARGET_SIMD"
6922{
6923 if (BYTES_BIG_ENDIAN)
6924 {
66f206b8
JW
6925 rtx tmp = gen_reg_rtx (<MODE>mode);
6926 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6927 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6928 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
6929 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
668046d1
DS
6930 }
6931 else
66f206b8 6932 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
668046d1
DS
6933 DONE;
6934})
6935
66f206b8
JW
6936(define_insn "aarch64_simd_ld4<vstruct_elt>"
6937 [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
6938 (unspec:VSTRUCT_4Q [
6939 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
6940 UNSPEC_LD4))]
43e9d192
IB
6941 "TARGET_SIMD"
6942 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
78ec3036
JG
6943 [(set_attr "type" "neon_load4_4reg<q>")]
6944)
43e9d192 6945
66f206b8
JW
6946(define_insn "aarch64_simd_ld4r<vstruct_elt>"
6947 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
6948 (unspec:VSTRUCT_4QD [
6949 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6950 UNSPEC_LD4_DUP))]
77efea31
FY
6951 "TARGET_SIMD"
6952 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6953 [(set_attr "type" "neon_load4_all_lanes<q>")]
6954)
6955
66f206b8
JW
6956(define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6957 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
6958 (unspec:VSTRUCT_4QD [
6959 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6960 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
6961 (match_operand:SI 3 "immediate_operand" "i")]
6962 UNSPEC_LD4_LANE))]
3ec1be97 6963 "TARGET_SIMD"
4d0a0237 6964{
66f206b8
JW
6965 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6966 INTVAL (operands[3]));
4d0a0237
CB
6967 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
6968}
3ec1be97
CB
6969 [(set_attr "type" "neon_load4_one_lane")]
6970)
6971
66f206b8
JW
6972(define_expand "vec_load_lanes<mode><vstruct_elt>"
6973 [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
6974 (unspec:VSTRUCT_4Q [
6975 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
6976 UNSPEC_LD4))]
668046d1
DS
6977 "TARGET_SIMD"
6978{
6979 if (BYTES_BIG_ENDIAN)
6980 {
66f206b8
JW
6981 rtx tmp = gen_reg_rtx (<MODE>mode);
6982 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6983 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6984 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
6985 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
668046d1
DS
6986 }
6987 else
66f206b8 6988 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
668046d1
DS
6989 DONE;
6990})
6991
66f206b8
JW
6992(define_insn "aarch64_simd_st4<vstruct_elt>"
6993 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
6994 (unspec:VSTRUCT_4Q [
6995 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
6996 UNSPEC_ST4))]
43e9d192
IB
6997 "TARGET_SIMD"
6998 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
78ec3036
JG
6999 [(set_attr "type" "neon_store4_4reg<q>")]
7000)
43e9d192 7001
aaf3de7a 7002;; RTL uses GCC vector extension indices, so flip only for assembly.
66f206b8 7003(define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
d866f024 7004 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
66f206b8 7005 (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
d866f024 7006 (match_operand:SI 2 "immediate_operand" "i")]
66f206b8 7007 UNSPEC_ST4_LANE))]
ba081b77 7008 "TARGET_SIMD"
aaf3de7a 7009 {
66f206b8
JW
7010 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7011 INTVAL (operands[2]));
aaf3de7a
AL
7012 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7013 }
ba081b77
JG
7014 [(set_attr "type" "neon_store4_one_lane<q>")]
7015)
7016
66f206b8
JW
7017(define_expand "vec_store_lanes<mode><vstruct_elt>"
7018 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7019 (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
668046d1
DS
7020 UNSPEC_ST4))]
7021 "TARGET_SIMD"
7022{
7023 if (BYTES_BIG_ENDIAN)
7024 {
66f206b8
JW
7025 rtx tmp = gen_reg_rtx (<MODE>mode);
7026 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7027 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7028 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7029 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
668046d1
DS
7030 }
7031 else
66f206b8 7032 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
668046d1
DS
7033 DONE;
7034})
7035
7036(define_insn_and_split "aarch64_rev_reglist<mode>"
66f206b8
JW
7037[(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7038 (unspec:VSTRUCT_QD
7039 [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
668046d1
DS
7040 (match_operand:V16QI 2 "register_operand" "w")]
7041 UNSPEC_REV_REGLIST))]
7042 "TARGET_SIMD"
7043 "#"
7044 "&& reload_completed"
7045 [(const_int 0)]
7046{
7047 int i;
66f206b8 7048 int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
668046d1
DS
7049 for (i = 0; i < nregs; i++)
7050 {
7051 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7052 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
b7e450c9 7053 emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
668046d1
DS
7054 }
7055 DONE;
7056}
7057 [(set_attr "type" "neon_tbl1_q")
7058 (set_attr "length" "<insn_count>")]
7059)
7060
43e9d192
IB
7061;; Reload patterns for AdvSIMD register list operands.
7062
66f206b8
JW
7063(define_expand "mov<mode>"
7064 [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7065 (match_operand:VSTRUCT_QD 1 "general_operand"))]
7066 "TARGET_SIMD"
7067{
7068 if (can_create_pseudo_p ())
7069 {
7070 if (GET_CODE (operands[0]) != REG)
7071 operands[1] = force_reg (<MODE>mode, operands[1]);
7072 }
7073})
7074
43e9d192 7075(define_expand "mov<mode>"
1bbffb87
DZ
7076 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7077 (match_operand:VSTRUCT 1 "general_operand"))]
43e9d192
IB
7078 "TARGET_SIMD"
7079{
7080 if (can_create_pseudo_p ())
7081 {
7082 if (GET_CODE (operands[0]) != REG)
7083 operands[1] = force_reg (<MODE>mode, operands[1]);
7084 }
7085})
7086
fdcddba8
PW
7087(define_expand "movv8di"
7088 [(set (match_operand:V8DI 0 "nonimmediate_operand")
7089 (match_operand:V8DI 1 "general_operand"))]
7090 "TARGET_SIMD"
7091{
7092 if (can_create_pseudo_p () && MEM_P (operands[0]))
7093 operands[1] = force_reg (V8DImode, operands[1]);
7094})
7095
66f206b8
JW
7096(define_expand "aarch64_ld1x3<vstruct_elt>"
7097 [(match_operand:VSTRUCT_3QD 0 "register_operand")
7098 (match_operand:DI 1 "register_operand")]
568421ba
SD
7099 "TARGET_SIMD"
7100{
66f206b8
JW
7101 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7102 emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
568421ba
SD
7103 DONE;
7104})
7105
66f206b8
JW
7106(define_insn "aarch64_ld1_x3_<vstruct_elt>"
7107 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7108 (unspec:VSTRUCT_3QD
7109 [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
7110 UNSPEC_LD1))]
568421ba
SD
7111 "TARGET_SIMD"
7112 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7113 [(set_attr "type" "neon_load1_3reg<q>")]
7114)
7115
66f206b8
JW
7116(define_expand "aarch64_ld1x4<vstruct_elt>"
7117 [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7118 (match_operand:DI 1 "register_operand" "r")]
39162588
ST
7119 "TARGET_SIMD"
7120{
66f206b8
JW
7121 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7122 emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
39162588
ST
7123 DONE;
7124})
7125
66f206b8
JW
7126(define_insn "aarch64_ld1_x4_<vstruct_elt>"
7127 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7128 (unspec:VSTRUCT_4QD
7129 [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
39162588
ST
7130 UNSPEC_LD1))]
7131 "TARGET_SIMD"
7132 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7133 [(set_attr "type" "neon_load1_4reg<q>")]
7134)
7135
66f206b8 7136(define_expand "aarch64_st1x2<vstruct_elt>"
1bbffb87 7137 [(match_operand:DI 0 "register_operand")
66f206b8 7138 (match_operand:VSTRUCT_2QD 1 "register_operand")]
568421ba
SD
7139 "TARGET_SIMD"
7140{
66f206b8
JW
7141 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7142 emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
568421ba
SD
7143 DONE;
7144})
7145
66f206b8
JW
7146(define_insn "aarch64_st1_x2_<vstruct_elt>"
7147 [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
7148 (unspec:VSTRUCT_2QD
7149 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
7150 UNSPEC_ST1))]
568421ba
SD
7151 "TARGET_SIMD"
7152 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7153 [(set_attr "type" "neon_store1_2reg<q>")]
7154)
7155
66f206b8 7156(define_expand "aarch64_st1x3<vstruct_elt>"
1bbffb87 7157 [(match_operand:DI 0 "register_operand")
66f206b8 7158 (match_operand:VSTRUCT_3QD 1 "register_operand")]
568421ba
SD
7159 "TARGET_SIMD"
7160{
66f206b8
JW
7161 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7162 emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
568421ba
SD
7163 DONE;
7164})
7165
66f206b8
JW
7166(define_insn "aarch64_st1_x3_<vstruct_elt>"
7167 [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
7168 (unspec:VSTRUCT_3QD
7169 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
7170 UNSPEC_ST1))]
568421ba
SD
7171 "TARGET_SIMD"
7172 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7173 [(set_attr "type" "neon_store1_3reg<q>")]
7174)
7175
66f206b8 7176(define_expand "aarch64_st1x4<vstruct_elt>"
39162588 7177 [(match_operand:DI 0 "register_operand" "")
66f206b8 7178 (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
39162588
ST
7179 "TARGET_SIMD"
7180{
66f206b8
JW
7181 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7182 emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
39162588
ST
7183 DONE;
7184})
7185
66f206b8
JW
7186(define_insn "aarch64_st1_x4_<vstruct_elt>"
7187 [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
7188 (unspec:VSTRUCT_4QD
7189 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
7190 UNSPEC_ST1))]
39162588
ST
7191 "TARGET_SIMD"
7192 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7193 [(set_attr "type" "neon_store1_4reg<q>")]
7194)
7195
66f206b8
JW
7196(define_insn "*aarch64_mov<mode>"
7197 [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7198 (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7199 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7200 && (register_operand (operands[0], <MODE>mode)
7201 || register_operand (operands[1], <MODE>mode))"
7202 "@
7203 #
7204 st1\\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
7205 ld1\\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1"
7206 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7207 neon_load<nregs>_<nregs>reg_q")
7208 (set_attr "length" "<insn_count>,4,4")]
7209)
7210
43e9d192
IB
7211(define_insn "*aarch64_mov<mode>"
7212 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
2d8c6dc1
AH
7213 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7214 "TARGET_SIMD && !BYTES_BIG_ENDIAN
43e9d192
IB
7215 && (register_operand (operands[0], <MODE>mode)
7216 || register_operand (operands[1], <MODE>mode))"
2d8c6dc1
AH
7217 "@
7218 #
7219 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
7220 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
7221 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7222 neon_load<nregs>_<nregs>reg_q")
cd78b3dd 7223 (set_attr "length" "<insn_count>,4,4")]
78ec3036 7224)
43e9d192 7225
fdcddba8
PW
7226(define_insn "*aarch64_movv8di"
7227 [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
7228 (match_operand:V8DI 1 "general_operand" " r,r,m"))]
0a68862e
PW
7229 "(register_operand (operands[0], V8DImode)
7230 || register_operand (operands[1], V8DImode))"
fdcddba8
PW
7231 "#"
7232 [(set_attr "type" "multiple,multiple,multiple")
7233 (set_attr "length" "32,16,16")]
7234)
7235
89b4515c 7236(define_insn "aarch64_be_ld1<mode>"
71a11456
AL
7237 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
7238 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
7239 "aarch64_simd_struct_operand" "Utv")]
89b4515c
AV
7240 UNSPEC_LD1))]
7241 "TARGET_SIMD"
7242 "ld1\\t{%0<Vmtype>}, %1"
7243 [(set_attr "type" "neon_load1_1reg<q>")]
7244)
7245
7246(define_insn "aarch64_be_st1<mode>"
71a11456
AL
7247 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
7248 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
89b4515c
AV
7249 UNSPEC_ST1))]
7250 "TARGET_SIMD"
7251 "st1\\t{%1<Vmtype>}, %0"
7252 [(set_attr "type" "neon_store1_1reg<q>")]
7253)
7254
66f206b8
JW
7255(define_insn "*aarch64_be_mov<mode>"
7256 [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
7257 (match_operand:VSTRUCT_2D 1 "general_operand" " w,w,m"))]
7258 "TARGET_SIMD && BYTES_BIG_ENDIAN
7259 && (register_operand (operands[0], <MODE>mode)
7260 || register_operand (operands[1], <MODE>mode))"
7261 "@
7262 #
7263 stp\\t%d1, %R1, %0
7264 ldp\\t%d0, %R0, %1"
7265 [(set_attr "type" "multiple,neon_stp,neon_ldp")
7266 (set_attr "length" "8,4,4")]
7267)
7268
7269(define_insn "*aarch64_be_mov<mode>"
7270 [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
7271 (match_operand:VSTRUCT_2Q 1 "general_operand" " w,w,m"))]
7272 "TARGET_SIMD && BYTES_BIG_ENDIAN
7273 && (register_operand (operands[0], <MODE>mode)
7274 || register_operand (operands[1], <MODE>mode))"
7275 "@
7276 #
7277 stp\\t%q1, %R1, %0
7278 ldp\\t%q0, %R0, %1"
7279 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7280 (set_attr "length" "8,4,4")]
7281)
7282
2d8c6dc1
AH
7283(define_insn "*aarch64_be_movoi"
7284 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
7285 (match_operand:OI 1 "general_operand" " w,w,m"))]
7286 "TARGET_SIMD && BYTES_BIG_ENDIAN
7287 && (register_operand (operands[0], OImode)
7288 || register_operand (operands[1], OImode))"
7289 "@
7290 #
7291 stp\\t%q1, %R1, %0
7292 ldp\\t%q0, %R0, %1"
5c4b7f1c 7293 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
cd78b3dd 7294 (set_attr "length" "8,4,4")]
2d8c6dc1
AH
7295)
7296
66f206b8
JW
7297(define_insn "*aarch64_be_mov<mode>"
7298 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
7299 (match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
7300 "TARGET_SIMD && BYTES_BIG_ENDIAN
7301 && (register_operand (operands[0], <MODE>mode)
7302 || register_operand (operands[1], <MODE>mode))"
7303 "#"
7304 [(set_attr "type" "multiple")
7305 (set_attr "length" "12,8,8")]
7306)
7307
2d8c6dc1
AH
7308(define_insn "*aarch64_be_movci"
7309 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
7310 (match_operand:CI 1 "general_operand" " w,w,o"))]
7311 "TARGET_SIMD && BYTES_BIG_ENDIAN
7312 && (register_operand (operands[0], CImode)
7313 || register_operand (operands[1], CImode))"
7314 "#"
7315 [(set_attr "type" "multiple")
cd78b3dd 7316 (set_attr "length" "12,4,4")]
2d8c6dc1
AH
7317)
7318
66f206b8
JW
7319(define_insn "*aarch64_be_mov<mode>"
7320 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
7321 (match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
7322 "TARGET_SIMD && BYTES_BIG_ENDIAN
7323 && (register_operand (operands[0], <MODE>mode)
7324 || register_operand (operands[1], <MODE>mode))"
7325 "#"
7326 [(set_attr "type" "multiple")
7327 (set_attr "length" "16,8,8")]
7328)
7329
2d8c6dc1
AH
7330(define_insn "*aarch64_be_movxi"
7331 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
7332 (match_operand:XI 1 "general_operand" " w,w,o"))]
7333 "TARGET_SIMD && BYTES_BIG_ENDIAN
7334 && (register_operand (operands[0], XImode)
7335 || register_operand (operands[1], XImode))"
7336 "#"
7337 [(set_attr "type" "multiple")
cd78b3dd 7338 (set_attr "length" "16,4,4")]
2d8c6dc1
AH
7339)
7340
66f206b8
JW
7341(define_split
7342 [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
7343 (match_operand:VSTRUCT_2QD 1 "register_operand"))]
7344 "TARGET_SIMD && reload_completed"
7345 [(const_int 0)]
7346{
7347 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
7348 DONE;
7349})
7350
43e9d192 7351(define_split
2d8c6dc1
AH
7352 [(set (match_operand:OI 0 "register_operand")
7353 (match_operand:OI 1 "register_operand"))]
43e9d192 7354 "TARGET_SIMD && reload_completed"
2d8c6dc1 7355 [(const_int 0)]
43e9d192 7356{
2d8c6dc1
AH
7357 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
7358 DONE;
43e9d192
IB
7359})
7360
66f206b8
JW
7361(define_split
7362 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
7363 (match_operand:VSTRUCT_3QD 1 "general_operand"))]
7364 "TARGET_SIMD && reload_completed"
7365 [(const_int 0)]
7366{
7367 if (register_operand (operands[0], <MODE>mode)
7368 && register_operand (operands[1], <MODE>mode))
7369 {
7370 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
7371 DONE;
7372 }
7373 else if (BYTES_BIG_ENDIAN)
7374 {
7375 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
7376 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
7377 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7378 <MODE>mode, 0),
7379 simplify_gen_subreg (pair_mode, operands[1],
7380 <MODE>mode, 0));
7381 emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
7382 simplify_gen_subreg (<VSTRUCT_ELT>mode,
7383 operands[0],
7384 <MODE>mode,
7385 2 * elt_size)),
7386 gen_lowpart (<VSTRUCT_ELT>mode,
7387 simplify_gen_subreg (<VSTRUCT_ELT>mode,
7388 operands[1],
7389 <MODE>mode,
7390 2 * elt_size)));
7391 DONE;
7392 }
7393 else
7394 FAIL;
7395})
7396
43e9d192 7397(define_split
2d8c6dc1
AH
7398 [(set (match_operand:CI 0 "nonimmediate_operand")
7399 (match_operand:CI 1 "general_operand"))]
43e9d192 7400 "TARGET_SIMD && reload_completed"
2d8c6dc1 7401 [(const_int 0)]
43e9d192 7402{
2d8c6dc1
AH
7403 if (register_operand (operands[0], CImode)
7404 && register_operand (operands[1], CImode))
7405 {
7406 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
7407 DONE;
7408 }
7409 else if (BYTES_BIG_ENDIAN)
7410 {
7411 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
7412 simplify_gen_subreg (OImode, operands[1], CImode, 0));
7413 emit_move_insn (gen_lowpart (V16QImode,
7414 simplify_gen_subreg (TImode, operands[0],
7415 CImode, 32)),
7416 gen_lowpart (V16QImode,
7417 simplify_gen_subreg (TImode, operands[1],
7418 CImode, 32)));
7419 DONE;
7420 }
7421 else
7422 FAIL;
43e9d192
IB
7423})
7424
66f206b8
JW
7425(define_split
7426 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
7427 (match_operand:VSTRUCT_4QD 1 "general_operand"))]
7428 "TARGET_SIMD && reload_completed"
7429 [(const_int 0)]
7430{
7431 if (register_operand (operands[0], <MODE>mode)
7432 && register_operand (operands[1], <MODE>mode))
7433 {
7434 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
7435 DONE;
7436 }
7437 else if (BYTES_BIG_ENDIAN)
7438 {
7439 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
7440 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
7441 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7442 <MODE>mode, 0),
7443 simplify_gen_subreg (pair_mode, operands[1],
7444 <MODE>mode, 0));
7445 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7446 <MODE>mode, 2 * elt_size),
7447 simplify_gen_subreg (pair_mode, operands[1],
7448 <MODE>mode, 2 * elt_size));
7449 DONE;
7450 }
7451 else
7452 FAIL;
7453})
7454
43e9d192 7455(define_split
2d8c6dc1
AH
7456 [(set (match_operand:XI 0 "nonimmediate_operand")
7457 (match_operand:XI 1 "general_operand"))]
43e9d192 7458 "TARGET_SIMD && reload_completed"
2d8c6dc1 7459 [(const_int 0)]
43e9d192 7460{
2d8c6dc1
AH
7461 if (register_operand (operands[0], XImode)
7462 && register_operand (operands[1], XImode))
7463 {
7464 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
7465 DONE;
7466 }
7467 else if (BYTES_BIG_ENDIAN)
7468 {
7469 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
7470 simplify_gen_subreg (OImode, operands[1], XImode, 0));
7471 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
7472 simplify_gen_subreg (OImode, operands[1], XImode, 32));
7473 DONE;
7474 }
7475 else
7476 FAIL;
43e9d192
IB
7477})
7478
fdcddba8
PW
7479(define_split
7480 [(set (match_operand:V8DI 0 "nonimmediate_operand")
7481 (match_operand:V8DI 1 "general_operand"))]
7482 "TARGET_SIMD && reload_completed"
7483 [(const_int 0)]
7484{
7485 if (register_operand (operands[0], V8DImode)
7486 && register_operand (operands[1], V8DImode))
7487 {
7488 aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
7489 DONE;
7490 }
7491 else if ((register_operand (operands[0], V8DImode)
7492 && memory_operand (operands[1], V8DImode))
7493 || (memory_operand (operands[0], V8DImode)
7494 && register_operand (operands[1], V8DImode)))
7495 {
7496 for (int offset = 0; offset < 64; offset += 16)
7497 emit_move_insn (simplify_gen_subreg (TImode, operands[0],
7498 V8DImode, offset),
7499 simplify_gen_subreg (TImode, operands[1],
7500 V8DImode, offset));
7501 DONE;
7502 }
7503 else
7504 FAIL;
7505})
7506
66f206b8
JW
7507(define_expand "aarch64_ld<nregs>r<vstruct_elt>"
7508 [(match_operand:VSTRUCT_QD 0 "register_operand")
7509 (match_operand:DI 1 "register_operand")]
77efea31
FY
7510 "TARGET_SIMD"
7511{
abf47511 7512 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
66f206b8 7513 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
77efea31 7514
66f206b8 7515 emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
77efea31
FY
7516 DONE;
7517})
7518
66f206b8
JW
7519(define_insn "aarch64_ld2<vstruct_elt>_dreg"
7520 [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
7521 (unspec:VSTRUCT_2DNX [
7522 (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
7523 UNSPEC_LD2_DREG))]
fca7d0a4 7524 "TARGET_SIMD"
43e9d192 7525 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
78ec3036
JG
7526 [(set_attr "type" "neon_load2_2reg<q>")]
7527)
43e9d192 7528
66f206b8
JW
7529(define_insn "aarch64_ld2<vstruct_elt>_dreg"
7530 [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
7531 (unspec:VSTRUCT_2DX [
7532 (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
7533 UNSPEC_LD2_DREG))]
fca7d0a4 7534 "TARGET_SIMD"
ac45b2ba
TC
7535 "ld1\\t{%S0.1d - %T0.1d}, %1"
7536 [(set_attr "type" "neon_load1_2reg<q>")]
7537)
7538
66f206b8
JW
7539(define_insn "aarch64_ld3<vstruct_elt>_dreg"
7540 [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
7541 (unspec:VSTRUCT_3DNX [
7542 (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
7543 UNSPEC_LD3_DREG))]
fca7d0a4 7544 "TARGET_SIMD"
ac45b2ba
TC
7545 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7546 [(set_attr "type" "neon_load3_3reg<q>")]
7547)
7548
66f206b8
JW
7549(define_insn "aarch64_ld3<vstruct_elt>_dreg"
7550 [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
7551 (unspec:VSTRUCT_3DX [
7552 (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
7553 UNSPEC_LD3_DREG))]
fca7d0a4 7554 "TARGET_SIMD"
43e9d192 7555 "ld1\\t{%S0.1d - %U0.1d}, %1"
78ec3036
JG
7556 [(set_attr "type" "neon_load1_3reg<q>")]
7557)
43e9d192 7558
66f206b8
JW
7559(define_insn "aarch64_ld4<vstruct_elt>_dreg"
7560 [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
7561 (unspec:VSTRUCT_4DNX [
7562 (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
7563 UNSPEC_LD4_DREG))]
fca7d0a4 7564 "TARGET_SIMD"
ac45b2ba
TC
7565 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7566 [(set_attr "type" "neon_load4_4reg<q>")]
7567)
7568
66f206b8
JW
7569(define_insn "aarch64_ld4<vstruct_elt>_dreg"
7570 [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
7571 (unspec:VSTRUCT_4DX [
7572 (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
7573 UNSPEC_LD4_DREG))]
fca7d0a4 7574 "TARGET_SIMD"
43e9d192 7575 "ld1\\t{%S0.1d - %V0.1d}, %1"
78ec3036
JG
7576 [(set_attr "type" "neon_load1_4reg<q>")]
7577)
43e9d192 7578
66f206b8
JW
7579(define_expand "aarch64_ld<nregs><vstruct_elt>"
7580 [(match_operand:VSTRUCT_D 0 "register_operand")
7581 (match_operand:DI 1 "register_operand")]
43e9d192
IB
7582 "TARGET_SIMD"
7583{
66f206b8
JW
7584 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7585 emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
43e9d192
IB
7586 DONE;
7587})
7588
71a11456
AL
7589(define_expand "aarch64_ld1<VALL_F16:mode>"
7590 [(match_operand:VALL_F16 0 "register_operand")
dec11868
JG
7591 (match_operand:DI 1 "register_operand")]
7592 "TARGET_SIMD"
7593{
71a11456 7594 machine_mode mode = <VALL_F16:MODE>mode;
dec11868 7595 rtx mem = gen_rtx_MEM (mode, operands[1]);
89b4515c
AV
7596
7597 if (BYTES_BIG_ENDIAN)
71a11456 7598 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
89b4515c
AV
7599 else
7600 emit_move_insn (operands[0], mem);
dec11868
JG
7601 DONE;
7602})
7603
66f206b8
JW
7604(define_expand "aarch64_ld<nregs><vstruct_elt>"
7605 [(match_operand:VSTRUCT_Q 0 "register_operand")
7606 (match_operand:DI 1 "register_operand")]
467e6f1b
KV
7607 "TARGET_SIMD"
7608{
66f206b8
JW
7609 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7610 emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
467e6f1b
KV
7611 DONE;
7612})
7613
66f206b8
JW
7614(define_expand "aarch64_ld1x2<vstruct_elt>"
7615 [(match_operand:VSTRUCT_2QD 0 "register_operand")
7616 (match_operand:DI 1 "register_operand")]
467e6f1b
KV
7617 "TARGET_SIMD"
7618{
66f206b8 7619 machine_mode mode = <MODE>mode;
467e6f1b
KV
7620 rtx mem = gen_rtx_MEM (mode, operands[1]);
7621
66f206b8 7622 emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
467e6f1b
KV
7623 DONE;
7624})
7625
66f206b8
JW
7626(define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
7627 [(match_operand:VSTRUCT_QD 0 "register_operand")
1bbffb87 7628 (match_operand:DI 1 "register_operand")
66f206b8
JW
7629 (match_operand:VSTRUCT_QD 2 "register_operand")
7630 (match_operand:SI 3 "immediate_operand")]
3ec1be97
CB
7631 "TARGET_SIMD"
7632{
f4720b94 7633 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
66f206b8 7634 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
43e9d192 7635
66f206b8
JW
7636 aarch64_simd_lane_bounds (operands[3], 0,
7637 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
7638 emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
7639 mem, operands[2], operands[3]));
43e9d192
IB
7640 DONE;
7641})
7642
7643;; Permuted-store expanders for neon intrinsics.
7644
88b08073
JG
7645;; Permute instructions
7646
7647;; vec_perm support
7648
88b08073
JG
7649(define_expand "vec_perm<mode>"
7650 [(match_operand:VB 0 "register_operand")
7651 (match_operand:VB 1 "register_operand")
7652 (match_operand:VB 2 "register_operand")
7653 (match_operand:VB 3 "register_operand")]
f7c4e5b8 7654 "TARGET_SIMD"
88b08073
JG
7655{
7656 aarch64_expand_vec_perm (operands[0], operands[1],
80940017 7657 operands[2], operands[3], <nunits>);
88b08073
JG
7658 DONE;
7659})
7660
b7e450c9 7661(define_insn "aarch64_qtbl1<mode>"
88b08073
JG
7662 [(set (match_operand:VB 0 "register_operand" "=w")
7663 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
7664 (match_operand:VB 2 "register_operand" "w")]
7665 UNSPEC_TBL))]
7666 "TARGET_SIMD"
7667 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
78ec3036 7668 [(set_attr "type" "neon_tbl1<q>")]
88b08073
JG
7669)
7670
b7e450c9 7671(define_insn "aarch64_qtbx1<mode>"
4362c9c8
JW
7672 [(set (match_operand:VB 0 "register_operand" "=w")
7673 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7674 (match_operand:V16QI 2 "register_operand" "w")
7675 (match_operand:VB 3 "register_operand" "w")]
7676 UNSPEC_TBX))]
7677 "TARGET_SIMD"
7678 "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
7679 [(set_attr "type" "neon_tbl1<q>")]
7680)
7681
88b08073
JG
7682;; Two source registers.
7683
b7e450c9 7684(define_insn "aarch64_qtbl2<mode>"
246cc060 7685 [(set (match_operand:VB 0 "register_operand" "=w")
66f206b8 7686 (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
246cc060 7687 (match_operand:VB 2 "register_operand" "w")]
9371aecc
CL
7688 UNSPEC_TBL))]
7689 "TARGET_SIMD"
246cc060 7690 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
b7e450c9 7691 [(set_attr "type" "neon_tbl2")]
9371aecc
CL
7692)
7693
b7e450c9 7694(define_insn "aarch64_qtbx2<mode>"
246cc060
CL
7695 [(set (match_operand:VB 0 "register_operand" "=w")
7696 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
66f206b8 7697 (match_operand:V2x16QI 2 "register_operand" "w")
246cc060
CL
7698 (match_operand:VB 3 "register_operand" "w")]
7699 UNSPEC_TBX))]
7700 "TARGET_SIMD"
7701 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
b7e450c9 7702 [(set_attr "type" "neon_tbl2")]
246cc060
CL
7703)
7704
7705;; Three source registers.
7706
7707(define_insn "aarch64_qtbl3<mode>"
7708 [(set (match_operand:VB 0 "register_operand" "=w")
66f206b8 7709 (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
246cc060
CL
7710 (match_operand:VB 2 "register_operand" "w")]
7711 UNSPEC_TBL))]
7712 "TARGET_SIMD"
7713 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
7714 [(set_attr "type" "neon_tbl3")]
7715)
7716
7717(define_insn "aarch64_qtbx3<mode>"
7718 [(set (match_operand:VB 0 "register_operand" "=w")
7719 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
66f206b8 7720 (match_operand:V3x16QI 2 "register_operand" "w")
246cc060
CL
7721 (match_operand:VB 3 "register_operand" "w")]
7722 UNSPEC_TBX))]
7723 "TARGET_SIMD"
7724 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
7725 [(set_attr "type" "neon_tbl3")]
7726)
7727
7728;; Four source registers.
7729
7730(define_insn "aarch64_qtbl4<mode>"
7731 [(set (match_operand:VB 0 "register_operand" "=w")
66f206b8 7732 (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
246cc060
CL
7733 (match_operand:VB 2 "register_operand" "w")]
7734 UNSPEC_TBL))]
7735 "TARGET_SIMD"
7736 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
7737 [(set_attr "type" "neon_tbl4")]
7738)
7739
7740(define_insn "aarch64_qtbx4<mode>"
7741 [(set (match_operand:VB 0 "register_operand" "=w")
7742 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
66f206b8 7743 (match_operand:V4x16QI 2 "register_operand" "w")
246cc060 7744 (match_operand:VB 3 "register_operand" "w")]
9371aecc
CL
7745 UNSPEC_TBX))]
7746 "TARGET_SIMD"
246cc060 7747 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
9371aecc
CL
7748 [(set_attr "type" "neon_tbl4")]
7749)
7750
88b08073 7751(define_insn_and_split "aarch64_combinev16qi"
66f206b8
JW
7752 [(set (match_operand:V2x16QI 0 "register_operand" "=w")
7753 (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
7754 (match_operand:V16QI 2 "register_operand" "w")]
7755 UNSPEC_CONCAT))]
88b08073
JG
7756 "TARGET_SIMD"
7757 "#"
7758 "&& reload_completed"
7759 [(const_int 0)]
7760{
7761 aarch64_split_combinev16qi (operands);
7762 DONE;
0f686aa9
JG
7763}
7764[(set_attr "type" "multiple")]
7765)
88b08073 7766
3f8334a5
RS
7767;; This instruction's pattern is generated directly by
7768;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7769;; need corresponding changes there.
3e2751ce 7770(define_insn "aarch64_<PERMUTE:perm_insn><mode>"
358decd5
JW
7771 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7772 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
7773 (match_operand:VALL_F16 2 "register_operand" "w")]
7774 PERMUTE))]
cc4d934f 7775 "TARGET_SIMD"
3e2751ce 7776 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
78ec3036 7777 [(set_attr "type" "neon_permute<q>")]
cc4d934f
JG
7778)
7779
3f8334a5
RS
7780;; This instruction's pattern is generated directly by
7781;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7782;; need corresponding changes there. Note that the immediate (third)
7783;; operand is a lane index not a byte index.
ae0533da 7784(define_insn "aarch64_ext<mode>"
358decd5
JW
7785 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7786 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
7787 (match_operand:VALL_F16 2 "register_operand" "w")
7788 (match_operand:SI 3 "immediate_operand" "i")]
7789 UNSPEC_EXT))]
ae0533da
AL
7790 "TARGET_SIMD"
7791{
7792 operands[3] = GEN_INT (INTVAL (operands[3])
cb5ca315 7793 * GET_MODE_UNIT_SIZE (<MODE>mode));
ae0533da
AL
7794 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
7795}
7796 [(set_attr "type" "neon_ext<q>")]
7797)
7798
3f8334a5
RS
7799;; This instruction's pattern is generated directly by
7800;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7801;; need corresponding changes there.
923fcec3 7802(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
358decd5
JW
7803 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7804 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
923fcec3
AL
7805 REVERSE))]
7806 "TARGET_SIMD"
7807 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
7808 [(set_attr "type" "neon_rev<q>")]
7809)
7810
66f206b8
JW
7811(define_insn "aarch64_st2<vstruct_elt>_dreg"
7812 [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
7813 (unspec:VSTRUCT_2DNX [
7814 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
7815 UNSPEC_ST2))]
43e9d192
IB
7816 "TARGET_SIMD"
7817 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
78ec3036
JG
7818 [(set_attr "type" "neon_store2_2reg")]
7819)
43e9d192 7820
66f206b8
JW
7821(define_insn "aarch64_st2<vstruct_elt>_dreg"
7822 [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
7823 (unspec:VSTRUCT_2DX [
7824 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
7825 UNSPEC_ST2))]
43e9d192
IB
7826 "TARGET_SIMD"
7827 "st1\\t{%S1.1d - %T1.1d}, %0"
78ec3036
JG
7828 [(set_attr "type" "neon_store1_2reg")]
7829)
43e9d192 7830
66f206b8
JW
7831(define_insn "aarch64_st3<vstruct_elt>_dreg"
7832 [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
7833 (unspec:VSTRUCT_3DNX [
7834 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
7835 UNSPEC_ST3))]
43e9d192
IB
7836 "TARGET_SIMD"
7837 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
78ec3036
JG
7838 [(set_attr "type" "neon_store3_3reg")]
7839)
43e9d192 7840
66f206b8
JW
7841(define_insn "aarch64_st3<vstruct_elt>_dreg"
7842 [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
7843 (unspec:VSTRUCT_3DX [
7844 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
7845 UNSPEC_ST3))]
43e9d192
IB
7846 "TARGET_SIMD"
7847 "st1\\t{%S1.1d - %U1.1d}, %0"
78ec3036
JG
7848 [(set_attr "type" "neon_store1_3reg")]
7849)
43e9d192 7850
66f206b8
JW
7851(define_insn "aarch64_st4<vstruct_elt>_dreg"
7852 [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
7853 (unspec:VSTRUCT_4DNX [
7854 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
7855 UNSPEC_ST4))]
43e9d192
IB
7856 "TARGET_SIMD"
7857 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
78ec3036
JG
7858 [(set_attr "type" "neon_store4_4reg")]
7859)
43e9d192 7860
66f206b8
JW
7861(define_insn "aarch64_st4<vstruct_elt>_dreg"
7862 [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
7863 (unspec:VSTRUCT_4DX [
7864 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
7865 UNSPEC_ST4))]
43e9d192
IB
7866 "TARGET_SIMD"
7867 "st1\\t{%S1.1d - %V1.1d}, %0"
78ec3036
JG
7868 [(set_attr "type" "neon_store1_4reg")]
7869)
43e9d192 7870
66f206b8 7871(define_expand "aarch64_st<nregs><vstruct_elt>"
1bbffb87 7872 [(match_operand:DI 0 "register_operand")
66f206b8 7873 (match_operand:VSTRUCT_D 1 "register_operand")]
43e9d192
IB
7874 "TARGET_SIMD"
7875{
66f206b8
JW
7876 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7877 emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
43e9d192
IB
7878 DONE;
7879})
7880
66f206b8 7881(define_expand "aarch64_st<nregs><vstruct_elt>"
1bbffb87 7882 [(match_operand:DI 0 "register_operand")
66f206b8 7883 (match_operand:VSTRUCT_Q 1 "register_operand")]
43e9d192
IB
7884 "TARGET_SIMD"
7885{
66f206b8
JW
7886 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7887 emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
43e9d192
IB
7888 DONE;
7889})
7890
66f206b8 7891(define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
1bbffb87 7892 [(match_operand:DI 0 "register_operand")
66f206b8 7893 (match_operand:VSTRUCT_QD 1 "register_operand")
ba081b77
JG
7894 (match_operand:SI 2 "immediate_operand")]
7895 "TARGET_SIMD"
7896{
d866f024 7897 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
66f206b8 7898 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
ba081b77 7899
66f206b8
JW
7900 aarch64_simd_lane_bounds (operands[2], 0,
7901 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
7902 emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
7903 operands[1], operands[2]));
ba081b77
JG
7904 DONE;
7905})
7906
71a11456 7907(define_expand "aarch64_st1<VALL_F16:mode>"
dec11868 7908 [(match_operand:DI 0 "register_operand")
71a11456 7909 (match_operand:VALL_F16 1 "register_operand")]
dec11868
JG
7910 "TARGET_SIMD"
7911{
71a11456 7912 machine_mode mode = <VALL_F16:MODE>mode;
dec11868 7913 rtx mem = gen_rtx_MEM (mode, operands[0]);
89b4515c
AV
7914
7915 if (BYTES_BIG_ENDIAN)
71a11456 7916 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
89b4515c
AV
7917 else
7918 emit_move_insn (mem, operands[1]);
dec11868
JG
7919 DONE;
7920})
7921
ff03930a 7922;; Standard pattern name vec_init<mode><Vel>.
4369c11e 7923
ff03930a 7924(define_expand "vec_init<mode><Vel>"
1bbffb87 7925 [(match_operand:VALL_F16 0 "register_operand")
4369c11e
TB
7926 (match_operand 1 "" "")]
7927 "TARGET_SIMD"
41dab855
KT
7928{
7929 aarch64_expand_vector_init (operands[0], operands[1]);
7930 DONE;
7931})
7932
7933(define_expand "vec_init<mode><Vhalf>"
1bbffb87 7934 [(match_operand:VQ_NO2E 0 "register_operand")
41dab855
KT
7935 (match_operand 1 "" "")]
7936 "TARGET_SIMD"
4369c11e
TB
7937{
7938 aarch64_expand_vector_init (operands[0], operands[1]);
7939 DONE;
7940})
7941
a50344cb 7942(define_insn "*aarch64_simd_ld1r<mode>"
862abc04
AL
7943 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7944 (vec_duplicate:VALL_F16
a50344cb
TB
7945 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
7946 "TARGET_SIMD"
7947 "ld1r\\t{%0.<Vtype>}, %1"
78ec3036
JG
7948 [(set_attr "type" "neon_load1_all_lanes")]
7949)
0050faf8 7950
66f206b8
JW
7951(define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
7952 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7953 (unspec:VSTRUCT_2QD [
7954 (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
7955 UNSPEC_LD1))]
467e6f1b
KV
7956 "TARGET_SIMD"
7957 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7958 [(set_attr "type" "neon_load1_2reg<q>")]
7959)
7960
7961
0016d8d9 7962(define_insn "@aarch64_frecpe<mode>"
4663b943
RS
7963 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
7964 (unspec:VHSDF_HSDF
7965 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
daef0a8c 7966 UNSPEC_FRECPE))]
0050faf8 7967 "TARGET_SIMD"
4663b943 7968 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
daef0a8c 7969 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
0050faf8
JG
7970)
7971
4663b943 7972(define_insn "aarch64_frecpx<mode>"
d7f33f07
JW
7973 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
7974 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
4663b943 7975 UNSPEC_FRECPX))]
fe6f68e2 7976 "TARGET_SIMD"
4663b943
RS
7977 "frecpx\t%<s>0, %<s>1"
7978 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
fe6f68e2
JG
7979)
7980
0016d8d9 7981(define_insn "@aarch64_frecps<mode>"
68ad28c3
JW
7982 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
7983 (unspec:VHSDF_HSDF
7984 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
7985 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
33d72b63 7986 UNSPEC_FRECPS))]
0050faf8 7987 "TARGET_SIMD"
fe6f68e2 7988 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
33d72b63 7989 [(set_attr "type" "neon_fp_recps_<stype><q>")]
0050faf8
JG
7990)
7991
58a3bd25
FY
7992(define_insn "aarch64_urecpe<mode>"
7993 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
7994 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
7995 UNSPEC_URECPE))]
7996 "TARGET_SIMD"
7997 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
7998 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
7999
ff03930a 8000;; Standard pattern name vec_extract<mode><Vel>.
0f365c10 8001
ff03930a 8002(define_expand "vec_extract<mode><Vel>"
1bbffb87
DZ
8003 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8004 (match_operand:VALL_F16 1 "register_operand")
8005 (match_operand:SI 2 "immediate_operand")]
0f365c10 8006 "TARGET_SIMD"
e58bf20a
TB
8007{
8008 emit_insn
8009 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8010 DONE;
8011})
5a7a4e80 8012
c15893df
RS
8013;; Extract a 64-bit vector from one half of a 128-bit vector.
8014(define_expand "vec_extract<mode><Vhalf>"
8015 [(match_operand:<VHALF> 0 "register_operand")
8016 (match_operand:VQMOV_NO2E 1 "register_operand")
8017 (match_operand 2 "immediate_operand")]
8018 "TARGET_SIMD"
8019{
8020 int start = INTVAL (operands[2]);
8021 if (start != 0 && start != <nunits> / 2)
8022 FAIL;
8023 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8024 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8025 DONE;
8026})
8027
8028;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8029(define_expand "vec_extractv2dfv1df"
8030 [(match_operand:V1DF 0 "register_operand")
8031 (match_operand:V2DF 1 "register_operand")
8032 (match_operand 2 "immediate_operand")]
8033 "TARGET_SIMD"
8034{
8035 /* V1DF is rarely used by other patterns, so it should be better to hide
8036 it in a subreg destination of a normal DF op. */
8037 rtx scalar0 = gen_lowpart (DFmode, operands[0]);
8038 emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
8039 DONE;
8040})
8041
5a7a4e80
TB
8042;; aes
8043
8044(define_insn "aarch64_crypto_aes<aes_op>v16qi"
8045 [(set (match_operand:V16QI 0 "register_operand" "=w")
5169fa77
ST
8046 (unspec:V16QI
8047 [(xor:V16QI
8048 (match_operand:V16QI 1 "register_operand" "%0")
8049 (match_operand:V16QI 2 "register_operand" "w"))]
5a7a4e80 8050 CRYPTO_AES))]
27086ea3 8051 "TARGET_SIMD && TARGET_AES"
5a7a4e80 8052 "aes<aes_op>\\t%0.16b, %2.16b"
b10baa95 8053 [(set_attr "type" "crypto_aese")]
5a7a4e80
TB
8054)
8055
8056(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5169fa77
ST
8057 [(set (match_operand:V16QI 0 "register_operand" "=w")
8058 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
5a7a4e80 8059 CRYPTO_AESMC))]
27086ea3 8060 "TARGET_SIMD && TARGET_AES"
5a7a4e80 8061 "aes<aesmc_op>\\t%0.16b, %1.16b"
5169fa77 8062 [(set_attr "type" "crypto_aesmc")]
5a7a4e80
TB
8063)
8064
10c54e5b
KT
8065;; When AESE/AESMC fusion is enabled we really want to keep the two together
8066;; and enforce the register dependency without scheduling or register
8067;; allocation messing up the order or introducing moves inbetween.
8068;; Mash the two together during combine.
8069
8070(define_insn "*aarch64_crypto_aese_fused"
5169fa77 8071 [(set (match_operand:V16QI 0 "register_operand" "=w")
10c54e5b
KT
8072 (unspec:V16QI
8073 [(unspec:V16QI
5169fa77
ST
8074 [(xor:V16QI
8075 (match_operand:V16QI 1 "register_operand" "%0")
8076 (match_operand:V16QI 2 "register_operand" "w"))]
8077 UNSPEC_AESE)]
8078 UNSPEC_AESMC))]
10c54e5b
KT
8079 "TARGET_SIMD && TARGET_AES
8080 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8081 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8082 [(set_attr "type" "crypto_aese")
8083 (set_attr "length" "8")]
8084)
8085
8086;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8087;; and enforce the register dependency without scheduling or register
8088;; allocation messing up the order or introducing moves inbetween.
8089;; Mash the two together during combine.
8090
8091(define_insn "*aarch64_crypto_aesd_fused"
5169fa77 8092 [(set (match_operand:V16QI 0 "register_operand" "=w")
10c54e5b
KT
8093 (unspec:V16QI
8094 [(unspec:V16QI
5169fa77
ST
8095 [(xor:V16QI
8096 (match_operand:V16QI 1 "register_operand" "%0")
8097 (match_operand:V16QI 2 "register_operand" "w"))]
8098 UNSPEC_AESD)]
8099 UNSPEC_AESIMC))]
10c54e5b
KT
8100 "TARGET_SIMD && TARGET_AES
8101 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8102 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8103 [(set_attr "type" "crypto_aese")
8104 (set_attr "length" "8")]
8105)
8106
30442682
TB
8107;; sha1
8108
8109(define_insn "aarch64_crypto_sha1hsi"
8110 [(set (match_operand:SI 0 "register_operand" "=w")
8111 (unspec:SI [(match_operand:SI 1
8112 "register_operand" "w")]
8113 UNSPEC_SHA1H))]
27086ea3 8114 "TARGET_SIMD && TARGET_SHA2"
30442682
TB
8115 "sha1h\\t%s0, %s1"
8116 [(set_attr "type" "crypto_sha1_fast")]
8117)
8118
5304d044
WD
8119(define_insn "aarch64_crypto_sha1hv4si"
8120 [(set (match_operand:SI 0 "register_operand" "=w")
8121 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8122 (parallel [(const_int 0)]))]
8123 UNSPEC_SHA1H))]
27086ea3 8124 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5304d044
WD
8125 "sha1h\\t%s0, %s1"
8126 [(set_attr "type" "crypto_sha1_fast")]
8127)
8128
8129(define_insn "aarch64_be_crypto_sha1hv4si"
8130 [(set (match_operand:SI 0 "register_operand" "=w")
8131 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8132 (parallel [(const_int 3)]))]
8133 UNSPEC_SHA1H))]
27086ea3 8134 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5304d044
WD
8135 "sha1h\\t%s0, %s1"
8136 [(set_attr "type" "crypto_sha1_fast")]
8137)
8138
30442682
TB
8139(define_insn "aarch64_crypto_sha1su1v4si"
8140 [(set (match_operand:V4SI 0 "register_operand" "=w")
8141 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8142 (match_operand:V4SI 2 "register_operand" "w")]
8143 UNSPEC_SHA1SU1))]
27086ea3 8144 "TARGET_SIMD && TARGET_SHA2"
30442682
TB
8145 "sha1su1\\t%0.4s, %2.4s"
8146 [(set_attr "type" "crypto_sha1_fast")]
8147)
8148
8149(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8150 [(set (match_operand:V4SI 0 "register_operand" "=w")
8151 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8152 (match_operand:SI 2 "register_operand" "w")
8153 (match_operand:V4SI 3 "register_operand" "w")]
8154 CRYPTO_SHA1))]
27086ea3 8155 "TARGET_SIMD && TARGET_SHA2"
30442682
TB
8156 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
8157 [(set_attr "type" "crypto_sha1_slow")]
8158)
8159
8160(define_insn "aarch64_crypto_sha1su0v4si"
8161 [(set (match_operand:V4SI 0 "register_operand" "=w")
8162 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8163 (match_operand:V4SI 2 "register_operand" "w")
8164 (match_operand:V4SI 3 "register_operand" "w")]
8165 UNSPEC_SHA1SU0))]
27086ea3 8166 "TARGET_SIMD && TARGET_SHA2"
30442682
TB
8167 "sha1su0\\t%0.4s, %2.4s, %3.4s"
8168 [(set_attr "type" "crypto_sha1_xor")]
8169)
b9cb0a44
TB
8170
8171;; sha256
8172
8173(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8174 [(set (match_operand:V4SI 0 "register_operand" "=w")
8175 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8176 (match_operand:V4SI 2 "register_operand" "w")
8177 (match_operand:V4SI 3 "register_operand" "w")]
8178 CRYPTO_SHA256))]
27086ea3 8179 "TARGET_SIMD && TARGET_SHA2"
b9cb0a44
TB
8180 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
8181 [(set_attr "type" "crypto_sha256_slow")]
8182)
8183
8184(define_insn "aarch64_crypto_sha256su0v4si"
8185 [(set (match_operand:V4SI 0 "register_operand" "=w")
8186 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8187 (match_operand:V4SI 2 "register_operand" "w")]
8188 UNSPEC_SHA256SU0))]
27086ea3 8189 "TARGET_SIMD && TARGET_SHA2"
b9cb0a44
TB
8190 "sha256su0\\t%0.4s, %2.4s"
8191 [(set_attr "type" "crypto_sha256_fast")]
8192)
8193
8194(define_insn "aarch64_crypto_sha256su1v4si"
8195 [(set (match_operand:V4SI 0 "register_operand" "=w")
8196 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8197 (match_operand:V4SI 2 "register_operand" "w")
8198 (match_operand:V4SI 3 "register_operand" "w")]
8199 UNSPEC_SHA256SU1))]
27086ea3 8200 "TARGET_SIMD && TARGET_SHA2"
b9cb0a44
TB
8201 "sha256su1\\t%0.4s, %2.4s, %3.4s"
8202 [(set_attr "type" "crypto_sha256_slow")]
8203)
7baa225d 8204
27086ea3
MC
8205;; sha512
8206
8207(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8208 [(set (match_operand:V2DI 0 "register_operand" "=w")
8209 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8210 (match_operand:V2DI 2 "register_operand" "w")
8211 (match_operand:V2DI 3 "register_operand" "w")]
8212 CRYPTO_SHA512))]
8213 "TARGET_SIMD && TARGET_SHA3"
8214 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
8215 [(set_attr "type" "crypto_sha512")]
8216)
8217
8218(define_insn "aarch64_crypto_sha512su0qv2di"
8219 [(set (match_operand:V2DI 0 "register_operand" "=w")
8220 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8221 (match_operand:V2DI 2 "register_operand" "w")]
8222 UNSPEC_SHA512SU0))]
8223 "TARGET_SIMD && TARGET_SHA3"
8224 "sha512su0\\t%0.2d, %2.2d"
8225 [(set_attr "type" "crypto_sha512")]
8226)
8227
8228(define_insn "aarch64_crypto_sha512su1qv2di"
8229 [(set (match_operand:V2DI 0 "register_operand" "=w")
8230 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8231 (match_operand:V2DI 2 "register_operand" "w")
8232 (match_operand:V2DI 3 "register_operand" "w")]
8233 UNSPEC_SHA512SU1))]
8234 "TARGET_SIMD && TARGET_SHA3"
8235 "sha512su1\\t%0.2d, %2.2d, %3.2d"
8236 [(set_attr "type" "crypto_sha512")]
8237)
8238
8239;; sha3
8240
d21052eb
TC
8241(define_insn "eor3q<mode>4"
8242 [(set (match_operand:VQ_I 0 "register_operand" "=w")
8243 (xor:VQ_I
8244 (xor:VQ_I
8245 (match_operand:VQ_I 2 "register_operand" "w")
8246 (match_operand:VQ_I 3 "register_operand" "w"))
8247 (match_operand:VQ_I 1 "register_operand" "w")))]
27086ea3
MC
8248 "TARGET_SIMD && TARGET_SHA3"
8249 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
8250 [(set_attr "type" "crypto_sha3")]
8251)
8252
8253(define_insn "aarch64_rax1qv2di"
8254 [(set (match_operand:V2DI 0 "register_operand" "=w")
8255 (xor:V2DI
8256 (rotate:V2DI
8257 (match_operand:V2DI 2 "register_operand" "w")
8258 (const_int 1))
8259 (match_operand:V2DI 1 "register_operand" "w")))]
8260 "TARGET_SIMD && TARGET_SHA3"
8261 "rax1\\t%0.2d, %1.2d, %2.2d"
8262 [(set_attr "type" "crypto_sha3")]
8263)
8264
8265(define_insn "aarch64_xarqv2di"
8266 [(set (match_operand:V2DI 0 "register_operand" "=w")
8267 (rotatert:V2DI
8268 (xor:V2DI
8269 (match_operand:V2DI 1 "register_operand" "%w")
8270 (match_operand:V2DI 2 "register_operand" "w"))
8271 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
8272 "TARGET_SIMD && TARGET_SHA3"
8273 "xar\\t%0.2d, %1.2d, %2.2d, %3"
8274 [(set_attr "type" "crypto_sha3")]
8275)
8276
d21052eb
TC
8277(define_insn "bcaxq<mode>4"
8278 [(set (match_operand:VQ_I 0 "register_operand" "=w")
8279 (xor:VQ_I
8280 (and:VQ_I
8281 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
8282 (match_operand:VQ_I 2 "register_operand" "w"))
8283 (match_operand:VQ_I 1 "register_operand" "w")))]
27086ea3
MC
8284 "TARGET_SIMD && TARGET_SHA3"
8285 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
8286 [(set_attr "type" "crypto_sha3")]
8287)
8288
8289;; SM3
8290
8291(define_insn "aarch64_sm3ss1qv4si"
8292 [(set (match_operand:V4SI 0 "register_operand" "=w")
8293 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8294 (match_operand:V4SI 2 "register_operand" "w")
8295 (match_operand:V4SI 3 "register_operand" "w")]
8296 UNSPEC_SM3SS1))]
8297 "TARGET_SIMD && TARGET_SM4"
8298 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
8299 [(set_attr "type" "crypto_sm3")]
8300)
8301
8302
8303(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
8304 [(set (match_operand:V4SI 0 "register_operand" "=w")
8305 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8306 (match_operand:V4SI 2 "register_operand" "w")
8307 (match_operand:V4SI 3 "register_operand" "w")
8308 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
8309 CRYPTO_SM3TT))]
8310 "TARGET_SIMD && TARGET_SM4"
8311 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
8312 [(set_attr "type" "crypto_sm3")]
8313)
8314
8315(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
8316 [(set (match_operand:V4SI 0 "register_operand" "=w")
8317 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8318 (match_operand:V4SI 2 "register_operand" "w")
8319 (match_operand:V4SI 3 "register_operand" "w")]
8320 CRYPTO_SM3PART))]
8321 "TARGET_SIMD && TARGET_SM4"
8322 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
8323 [(set_attr "type" "crypto_sm3")]
8324)
8325
8326;; SM4
8327
8328(define_insn "aarch64_sm4eqv4si"
8329 [(set (match_operand:V4SI 0 "register_operand" "=w")
8330 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8331 (match_operand:V4SI 2 "register_operand" "w")]
8332 UNSPEC_SM4E))]
8333 "TARGET_SIMD && TARGET_SM4"
8334 "sm4e\\t%0.4s, %2.4s"
8335 [(set_attr "type" "crypto_sm4")]
8336)
8337
8338(define_insn "aarch64_sm4ekeyqv4si"
8339 [(set (match_operand:V4SI 0 "register_operand" "=w")
8340 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8341 (match_operand:V4SI 2 "register_operand" "w")]
8342 UNSPEC_SM4EKEY))]
8343 "TARGET_SIMD && TARGET_SM4"
8344 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
8345 [(set_attr "type" "crypto_sm4")]
8346)
8347
8348;; fp16fml
8349
8350(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
1bbffb87 8351 [(set (match_operand:VDQSF 0 "register_operand")
27086ea3 8352 (unspec:VDQSF
1bbffb87
DZ
8353 [(match_operand:VDQSF 1 "register_operand")
8354 (match_operand:<VFMLA_W> 2 "register_operand")
8355 (match_operand:<VFMLA_W> 3 "register_operand")]
27086ea3
MC
8356 VFMLA16_LOW))]
8357 "TARGET_F16FML"
8358{
f3bd9505
RS
8359 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
8360 <nunits> * 2, false);
8361 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
8362 <nunits> * 2, false);
27086ea3
MC
8363
8364 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
8365 operands[1],
8366 operands[2],
8367 operands[3],
8368 p1, p2));
8369 DONE;
8370
8371})
8372
8373(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
1bbffb87 8374 [(set (match_operand:VDQSF 0 "register_operand")
27086ea3 8375 (unspec:VDQSF
1bbffb87
DZ
8376 [(match_operand:VDQSF 1 "register_operand")
8377 (match_operand:<VFMLA_W> 2 "register_operand")
8378 (match_operand:<VFMLA_W> 3 "register_operand")]
27086ea3
MC
8379 VFMLA16_HIGH))]
8380 "TARGET_F16FML"
8381{
f3bd9505
RS
8382 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
8383 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
27086ea3
MC
8384
8385 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
8386 operands[1],
8387 operands[2],
8388 operands[3],
8389 p1, p2));
8390 DONE;
8391})
8392
8393(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
8394 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8395 (fma:VDQSF
8396 (float_extend:VDQSF
8397 (vec_select:<VFMLA_SEL_W>
8398 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8399 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
8400 (float_extend:VDQSF
8401 (vec_select:<VFMLA_SEL_W>
8402 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8403 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
8404 (match_operand:VDQSF 1 "register_operand" "0")))]
8405 "TARGET_F16FML"
8406 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8407 [(set_attr "type" "neon_fp_mul_s")]
8408)
8409
8410(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
8411 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8412 (fma:VDQSF
8413 (float_extend:VDQSF
8414 (neg:<VFMLA_SEL_W>
8415 (vec_select:<VFMLA_SEL_W>
8416 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8417 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
8418 (float_extend:VDQSF
8419 (vec_select:<VFMLA_SEL_W>
8420 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8421 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
8422 (match_operand:VDQSF 1 "register_operand" "0")))]
8423 "TARGET_F16FML"
8424 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8425 [(set_attr "type" "neon_fp_mul_s")]
8426)
8427
8428(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
8429 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8430 (fma:VDQSF
8431 (float_extend:VDQSF
8432 (vec_select:<VFMLA_SEL_W>
8433 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8434 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
8435 (float_extend:VDQSF
8436 (vec_select:<VFMLA_SEL_W>
8437 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8438 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
8439 (match_operand:VDQSF 1 "register_operand" "0")))]
8440 "TARGET_F16FML"
8441 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8442 [(set_attr "type" "neon_fp_mul_s")]
8443)
8444
8445(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
8446 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8447 (fma:VDQSF
8448 (float_extend:VDQSF
8449 (neg:<VFMLA_SEL_W>
8450 (vec_select:<VFMLA_SEL_W>
8451 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8452 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
8453 (float_extend:VDQSF
8454 (vec_select:<VFMLA_SEL_W>
8455 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8456 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
8457 (match_operand:VDQSF 1 "register_operand" "0")))]
8458 "TARGET_F16FML"
8459 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8460 [(set_attr "type" "neon_fp_mul_s")]
8461)
8462
8463(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
1bbffb87
DZ
8464 [(set (match_operand:V2SF 0 "register_operand")
8465 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8466 (match_operand:V4HF 2 "register_operand")
8467 (match_operand:V4HF 3 "register_operand")
8468 (match_operand:SI 4 "aarch64_imm2")]
27086ea3
MC
8469 VFMLA16_LOW))]
8470 "TARGET_F16FML"
8471{
f3bd9505 8472 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
27086ea3
MC
8473 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8474
8475 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
8476 operands[1],
8477 operands[2],
8478 operands[3],
8479 p1, lane));
8480 DONE;
8481}
8482)
8483
8484(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
1bbffb87
DZ
8485 [(set (match_operand:V2SF 0 "register_operand")
8486 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8487 (match_operand:V4HF 2 "register_operand")
8488 (match_operand:V4HF 3 "register_operand")
8489 (match_operand:SI 4 "aarch64_imm2")]
27086ea3
MC
8490 VFMLA16_HIGH))]
8491 "TARGET_F16FML"
8492{
f3bd9505 8493 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
27086ea3
MC
8494 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8495
8496 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
8497 operands[1],
8498 operands[2],
8499 operands[3],
8500 p1, lane));
8501 DONE;
8502})
8503
8504(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
8505 [(set (match_operand:V2SF 0 "register_operand" "=w")
8506 (fma:V2SF
8507 (float_extend:V2SF
8508 (vec_select:V2HF
8509 (match_operand:V4HF 2 "register_operand" "w")
8510 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
8511 (float_extend:V2SF
8512 (vec_duplicate:V2HF
8513 (vec_select:HF
8514 (match_operand:V4HF 3 "register_operand" "x")
8515 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8516 (match_operand:V2SF 1 "register_operand" "0")))]
8517 "TARGET_F16FML"
8518 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
8519 [(set_attr "type" "neon_fp_mul_s")]
8520)
8521
8522(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
8523 [(set (match_operand:V2SF 0 "register_operand" "=w")
8524 (fma:V2SF
8525 (float_extend:V2SF
8526 (neg:V2HF
8527 (vec_select:V2HF
8528 (match_operand:V4HF 2 "register_operand" "w")
8529 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
8530 (float_extend:V2SF
8531 (vec_duplicate:V2HF
8532 (vec_select:HF
8533 (match_operand:V4HF 3 "register_operand" "x")
8534 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8535 (match_operand:V2SF 1 "register_operand" "0")))]
8536 "TARGET_F16FML"
8537 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
8538 [(set_attr "type" "neon_fp_mul_s")]
8539)
8540
8541(define_insn "aarch64_simd_fmlal_lane_highv2sf"
8542 [(set (match_operand:V2SF 0 "register_operand" "=w")
8543 (fma:V2SF
8544 (float_extend:V2SF
8545 (vec_select:V2HF
8546 (match_operand:V4HF 2 "register_operand" "w")
8547 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
8548 (float_extend:V2SF
8549 (vec_duplicate:V2HF
8550 (vec_select:HF
8551 (match_operand:V4HF 3 "register_operand" "x")
8552 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8553 (match_operand:V2SF 1 "register_operand" "0")))]
8554 "TARGET_F16FML"
8555 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
8556 [(set_attr "type" "neon_fp_mul_s")]
8557)
8558
8559(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
8560 [(set (match_operand:V2SF 0 "register_operand" "=w")
8561 (fma:V2SF
8562 (float_extend:V2SF
8563 (neg:V2HF
8564 (vec_select:V2HF
8565 (match_operand:V4HF 2 "register_operand" "w")
8566 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
8567 (float_extend:V2SF
8568 (vec_duplicate:V2HF
8569 (vec_select:HF
8570 (match_operand:V4HF 3 "register_operand" "x")
8571 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8572 (match_operand:V2SF 1 "register_operand" "0")))]
8573 "TARGET_F16FML"
8574 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
8575 [(set_attr "type" "neon_fp_mul_s")]
8576)
8577
8578(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
1bbffb87
DZ
8579 [(set (match_operand:V4SF 0 "register_operand")
8580 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8581 (match_operand:V8HF 2 "register_operand")
8582 (match_operand:V8HF 3 "register_operand")
8583 (match_operand:SI 4 "aarch64_lane_imm3")]
27086ea3
MC
8584 VFMLA16_LOW))]
8585 "TARGET_F16FML"
8586{
f3bd9505 8587 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
27086ea3
MC
8588 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8589
8590 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
8591 operands[1],
8592 operands[2],
8593 operands[3],
8594 p1, lane));
8595 DONE;
8596})
8597
8598(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
1bbffb87
DZ
8599 [(set (match_operand:V4SF 0 "register_operand")
8600 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8601 (match_operand:V8HF 2 "register_operand")
8602 (match_operand:V8HF 3 "register_operand")
8603 (match_operand:SI 4 "aarch64_lane_imm3")]
27086ea3
MC
8604 VFMLA16_HIGH))]
8605 "TARGET_F16FML"
8606{
f3bd9505 8607 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
27086ea3
MC
8608 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8609
8610 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
8611 operands[1],
8612 operands[2],
8613 operands[3],
8614 p1, lane));
8615 DONE;
8616})
8617
8618(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
8619 [(set (match_operand:V4SF 0 "register_operand" "=w")
8620 (fma:V4SF
8621 (float_extend:V4SF
8622 (vec_select:V4HF
8623 (match_operand:V8HF 2 "register_operand" "w")
8624 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
8625 (float_extend:V4SF
8626 (vec_duplicate:V4HF
8627 (vec_select:HF
8628 (match_operand:V8HF 3 "register_operand" "x")
8629 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8630 (match_operand:V4SF 1 "register_operand" "0")))]
8631 "TARGET_F16FML"
8632 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
8633 [(set_attr "type" "neon_fp_mul_s")]
8634)
8635
8636(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
8637 [(set (match_operand:V4SF 0 "register_operand" "=w")
8638 (fma:V4SF
8639 (float_extend:V4SF
8640 (neg:V4HF
8641 (vec_select:V4HF
8642 (match_operand:V8HF 2 "register_operand" "w")
8643 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
8644 (float_extend:V4SF
8645 (vec_duplicate:V4HF
8646 (vec_select:HF
8647 (match_operand:V8HF 3 "register_operand" "x")
8648 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8649 (match_operand:V4SF 1 "register_operand" "0")))]
8650 "TARGET_F16FML"
8651 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
8652 [(set_attr "type" "neon_fp_mul_s")]
8653)
8654
8655(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
8656 [(set (match_operand:V4SF 0 "register_operand" "=w")
8657 (fma:V4SF
8658 (float_extend:V4SF
8659 (vec_select:V4HF
8660 (match_operand:V8HF 2 "register_operand" "w")
8661 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
8662 (float_extend:V4SF
8663 (vec_duplicate:V4HF
8664 (vec_select:HF
8665 (match_operand:V8HF 3 "register_operand" "x")
8666 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8667 (match_operand:V4SF 1 "register_operand" "0")))]
8668 "TARGET_F16FML"
8669 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
8670 [(set_attr "type" "neon_fp_mul_s")]
8671)
8672
8673(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
8674 [(set (match_operand:V4SF 0 "register_operand" "=w")
8675 (fma:V4SF
8676 (float_extend:V4SF
8677 (neg:V4HF
8678 (vec_select:V4HF
8679 (match_operand:V8HF 2 "register_operand" "w")
8680 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
8681 (float_extend:V4SF
8682 (vec_duplicate:V4HF
8683 (vec_select:HF
8684 (match_operand:V8HF 3 "register_operand" "x")
8685 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8686 (match_operand:V4SF 1 "register_operand" "0")))]
8687 "TARGET_F16FML"
8688 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
8689 [(set_attr "type" "neon_fp_mul_s")]
8690)
8691
8692(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
1bbffb87
DZ
8693 [(set (match_operand:V2SF 0 "register_operand")
8694 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8695 (match_operand:V4HF 2 "register_operand")
8696 (match_operand:V8HF 3 "register_operand")
8697 (match_operand:SI 4 "aarch64_lane_imm3")]
27086ea3
MC
8698 VFMLA16_LOW))]
8699 "TARGET_F16FML"
8700{
f3bd9505 8701 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
27086ea3
MC
8702 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8703
8704 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
8705 operands[1],
8706 operands[2],
8707 operands[3],
8708 p1, lane));
8709 DONE;
8710
8711})
8712
8713(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
1bbffb87
DZ
8714 [(set (match_operand:V2SF 0 "register_operand")
8715 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8716 (match_operand:V4HF 2 "register_operand")
8717 (match_operand:V8HF 3 "register_operand")
8718 (match_operand:SI 4 "aarch64_lane_imm3")]
27086ea3
MC
8719 VFMLA16_HIGH))]
8720 "TARGET_F16FML"
8721{
f3bd9505 8722 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
27086ea3
MC
8723 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8724
8725 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
8726 operands[1],
8727 operands[2],
8728 operands[3],
8729 p1, lane));
8730 DONE;
8731
8732})
8733
8734(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
8735 [(set (match_operand:V2SF 0 "register_operand" "=w")
8736 (fma:V2SF
8737 (float_extend:V2SF
8738 (vec_select:V2HF
8739 (match_operand:V4HF 2 "register_operand" "w")
8740 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
8741 (float_extend:V2SF
8742 (vec_duplicate:V2HF
8743 (vec_select:HF
8744 (match_operand:V8HF 3 "register_operand" "x")
8745 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8746 (match_operand:V2SF 1 "register_operand" "0")))]
8747 "TARGET_F16FML"
8748 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
8749 [(set_attr "type" "neon_fp_mul_s")]
8750)
8751
8752(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
8753 [(set (match_operand:V2SF 0 "register_operand" "=w")
8754 (fma:V2SF
8755 (float_extend:V2SF
8756 (neg:V2HF
8757 (vec_select:V2HF
8758 (match_operand:V4HF 2 "register_operand" "w")
8759 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
8760 (float_extend:V2SF
8761 (vec_duplicate:V2HF
8762 (vec_select:HF
8763 (match_operand:V8HF 3 "register_operand" "x")
8764 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8765 (match_operand:V2SF 1 "register_operand" "0")))]
8766 "TARGET_F16FML"
8767 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
8768 [(set_attr "type" "neon_fp_mul_s")]
8769)
8770
8771(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
8772 [(set (match_operand:V2SF 0 "register_operand" "=w")
8773 (fma:V2SF
8774 (float_extend:V2SF
8775 (vec_select:V2HF
8776 (match_operand:V4HF 2 "register_operand" "w")
8777 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
8778 (float_extend:V2SF
8779 (vec_duplicate:V2HF
8780 (vec_select:HF
8781 (match_operand:V8HF 3 "register_operand" "x")
8782 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8783 (match_operand:V2SF 1 "register_operand" "0")))]
8784 "TARGET_F16FML"
8785 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
8786 [(set_attr "type" "neon_fp_mul_s")]
8787)
8788
8789(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
8790 [(set (match_operand:V2SF 0 "register_operand" "=w")
8791 (fma:V2SF
8792 (float_extend:V2SF
8793 (neg:V2HF
8794 (vec_select:V2HF
8795 (match_operand:V4HF 2 "register_operand" "w")
8796 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
8797 (float_extend:V2SF
8798 (vec_duplicate:V2HF
8799 (vec_select:HF
8800 (match_operand:V8HF 3 "register_operand" "x")
8801 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8802 (match_operand:V2SF 1 "register_operand" "0")))]
8803 "TARGET_F16FML"
8804 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
8805 [(set_attr "type" "neon_fp_mul_s")]
8806)
8807
8808(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
1bbffb87
DZ
8809 [(set (match_operand:V4SF 0 "register_operand")
8810 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8811 (match_operand:V8HF 2 "register_operand")
8812 (match_operand:V4HF 3 "register_operand")
8813 (match_operand:SI 4 "aarch64_imm2")]
27086ea3
MC
8814 VFMLA16_LOW))]
8815 "TARGET_F16FML"
8816{
f3bd9505 8817 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
27086ea3
MC
8818 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8819
8820 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
8821 operands[1],
8822 operands[2],
8823 operands[3],
8824 p1, lane));
8825 DONE;
8826})
8827
8828(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
1bbffb87
DZ
8829 [(set (match_operand:V4SF 0 "register_operand")
8830 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8831 (match_operand:V8HF 2 "register_operand")
8832 (match_operand:V4HF 3 "register_operand")
8833 (match_operand:SI 4 "aarch64_imm2")]
27086ea3
MC
8834 VFMLA16_HIGH))]
8835 "TARGET_F16FML"
8836{
f3bd9505 8837 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
27086ea3
MC
8838 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8839
8840 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
8841 operands[1],
8842 operands[2],
8843 operands[3],
8844 p1, lane));
8845 DONE;
8846})
8847
8848(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
8849 [(set (match_operand:V4SF 0 "register_operand" "=w")
8850 (fma:V4SF
8851 (float_extend:V4SF
8852 (vec_select:V4HF
8853 (match_operand:V8HF 2 "register_operand" "w")
8854 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
8855 (float_extend:V4SF
8856 (vec_duplicate:V4HF
8857 (vec_select:HF
8858 (match_operand:V4HF 3 "register_operand" "x")
8859 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8860 (match_operand:V4SF 1 "register_operand" "0")))]
8861 "TARGET_F16FML"
8862 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
8863 [(set_attr "type" "neon_fp_mul_s")]
8864)
8865
8866(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
8867 [(set (match_operand:V4SF 0 "register_operand" "=w")
8868 (fma:V4SF
8869 (float_extend:V4SF
8870 (neg:V4HF
8871 (vec_select:V4HF
8872 (match_operand:V8HF 2 "register_operand" "w")
8873 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
8874 (float_extend:V4SF
8875 (vec_duplicate:V4HF
8876 (vec_select:HF
8877 (match_operand:V4HF 3 "register_operand" "x")
8878 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8879 (match_operand:V4SF 1 "register_operand" "0")))]
8880 "TARGET_F16FML"
8881 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
8882 [(set_attr "type" "neon_fp_mul_s")]
8883)
8884
8885(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
8886 [(set (match_operand:V4SF 0 "register_operand" "=w")
8887 (fma:V4SF
8888 (float_extend:V4SF
8889 (vec_select:V4HF
8890 (match_operand:V8HF 2 "register_operand" "w")
8891 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
8892 (float_extend:V4SF
8893 (vec_duplicate:V4HF
8894 (vec_select:HF
8895 (match_operand:V4HF 3 "register_operand" "x")
8896 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8897 (match_operand:V4SF 1 "register_operand" "0")))]
8898 "TARGET_F16FML"
8899 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
8900 [(set_attr "type" "neon_fp_mul_s")]
8901)
8902
8903(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
8904 [(set (match_operand:V4SF 0 "register_operand" "=w")
8905 (fma:V4SF
8906 (float_extend:V4SF
8907 (neg:V4HF
8908 (vec_select:V4HF
8909 (match_operand:V8HF 2 "register_operand" "w")
8910 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
8911 (float_extend:V4SF
8912 (vec_duplicate:V4HF
8913 (vec_select:HF
8914 (match_operand:V4HF 3 "register_operand" "x")
8915 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8916 (match_operand:V4SF 1 "register_operand" "0")))]
8917 "TARGET_F16FML"
8918 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
8919 [(set_attr "type" "neon_fp_mul_s")]
8920)
8921
7baa225d
TB
8922;; pmull
8923
8924(define_insn "aarch64_crypto_pmulldi"
8925 [(set (match_operand:TI 0 "register_operand" "=w")
8926 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
8927 (match_operand:DI 2 "register_operand" "w")]
8928 UNSPEC_PMULL))]
27086ea3 8929 "TARGET_SIMD && TARGET_AES"
7baa225d 8930 "pmull\\t%0.1q, %1.1d, %2.1d"
a2074e9c 8931 [(set_attr "type" "crypto_pmull")]
7baa225d
TB
8932)
8933
8934(define_insn "aarch64_crypto_pmullv2di"
8935 [(set (match_operand:TI 0 "register_operand" "=w")
8936 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
8937 (match_operand:V2DI 2 "register_operand" "w")]
8938 UNSPEC_PMULL2))]
27086ea3 8939 "TARGET_SIMD && TARGET_AES"
7baa225d 8940 "pmull2\\t%0.1q, %1.2d, %2.2d"
a2074e9c 8941 [(set_attr "type" "crypto_pmull")]
7baa225d 8942)
9c437a10
RS
8943
8944;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
8945(define_insn "<optab><Vnarrowq><mode>2"
8946 [(set (match_operand:VQN 0 "register_operand" "=w")
8947 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
8948 "TARGET_SIMD"
8949 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
8950 [(set_attr "type" "neon_shift_imm_long")]
8951)
8952
48f8d1d4
KT
8953(define_expand "aarch64_<su>xtl<mode>"
8954 [(set (match_operand:VQN 0 "register_operand" "=w")
8955 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
8956 "TARGET_SIMD"
8957 ""
8958)
8959
9c437a10
RS
8960;; Truncate a 128-bit integer vector to a 64-bit vector.
8961(define_insn "trunc<mode><Vnarrowq>2"
8962 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
8963 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
8964 "TARGET_SIMD"
8965 "xtn\t%0.<Vntype>, %1.<Vtype>"
45364338 8966 [(set_attr "type" "neon_move_narrow_q")]
9c437a10 8967)
f275d73a
SMW
8968
8969(define_insn "aarch64_bfdot<mode>"
8970 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8971 (plus:VDQSF
8972 (unspec:VDQSF
8973 [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
8974 (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
8975 UNSPEC_BFDOT)
8976 (match_operand:VDQSF 1 "register_operand" "0")))]
8977 "TARGET_BF16_SIMD"
8978 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
8979 [(set_attr "type" "neon_dot<q>")]
8980)
8981
8982(define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
8983 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8984 (plus:VDQSF
8985 (unspec:VDQSF
8986 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
8987 (match_operand:VBF 3 "register_operand" "w")
8988 (match_operand:SI 4 "const_int_operand" "n")]
8989 UNSPEC_BFDOT)
8990 (match_operand:VDQSF 1 "register_operand" "0")))]
8991 "TARGET_BF16_SIMD"
8992{
8993 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
8994 int lane = INTVAL (operands[4]);
8995 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
8996 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
8997}
8998 [(set_attr "type" "neon_dot<VDQSF:q>")]
8999)
f78335df 9000
3553c658
DZ
9001;; vget_low/high_bf16
9002(define_expand "aarch64_vget_lo_halfv8bf"
9003 [(match_operand:V4BF 0 "register_operand")
9004 (match_operand:V8BF 1 "register_operand")]
9005 "TARGET_BF16_SIMD"
9006{
9007 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
9008 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9009 DONE;
9010})
9011
9012(define_expand "aarch64_vget_hi_halfv8bf"
9013 [(match_operand:V4BF 0 "register_operand")
9014 (match_operand:V8BF 1 "register_operand")]
9015 "TARGET_BF16_SIMD"
9016{
9017 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
9018 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9019 DONE;
9020})
9021
f78335df
DB
9022;; bfmmla
9023(define_insn "aarch64_bfmmlaqv4sf"
9024 [(set (match_operand:V4SF 0 "register_operand" "=w")
9025 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9026 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9027 (match_operand:V8BF 3 "register_operand" "w")]
9028 UNSPEC_BFMMLA)))]
9029 "TARGET_BF16_SIMD"
9030 "bfmmla\\t%0.4s, %2.8h, %3.8h"
9031 [(set_attr "type" "neon_fp_mla_s_q")]
9032)
9033
9034;; bfmlal<bt>
9035(define_insn "aarch64_bfmlal<bt>v4sf"
9036 [(set (match_operand:V4SF 0 "register_operand" "=w")
9037 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9038 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9039 (match_operand:V8BF 3 "register_operand" "w")]
9040 BF_MLA)))]
9041 "TARGET_BF16_SIMD"
9042 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9043 [(set_attr "type" "neon_fp_mla_s_q")]
9044)
9045
9046(define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9047 [(set (match_operand:V4SF 0 "register_operand" "=w")
9048 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9049 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
5b99b0f1 9050 (match_operand:VBF 3 "register_operand" "x")
f78335df
DB
9051 (match_operand:SI 4 "const_int_operand" "n")]
9052 BF_MLA)))]
9053 "TARGET_BF16_SIMD"
9054{
9055 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9056 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9057}
9058 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9059)
40f64837
DZ
9060
9061;; 8-bit integer matrix multiply-accumulate
9062(define_insn "aarch64_simd_<sur>mmlav16qi"
9063 [(set (match_operand:V4SI 0 "register_operand" "=w")
9064 (plus:V4SI
9065 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9066 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9067 (match_operand:V4SI 1 "register_operand" "0")))]
9068 "TARGET_I8MM"
9069 "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9070 [(set_attr "type" "neon_mla_s_q")]
9071)
1f520d34
DB
9072
9073;; bfcvtn
9074(define_insn "aarch64_bfcvtn<q><mode>"
9075 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9076 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9077 UNSPEC_BFCVTN))]
9078 "TARGET_BF16_SIMD"
9079 "bfcvtn\\t%0.4h, %1.4s"
9080 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9081)
9082
9083(define_insn "aarch64_bfcvtn2v8bf"
9084 [(set (match_operand:V8BF 0 "register_operand" "=w")
9085 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9086 (match_operand:V4SF 2 "register_operand" "w")]
9087 UNSPEC_BFCVTN2))]
9088 "TARGET_BF16_SIMD"
9089 "bfcvtn2\\t%0.8h, %2.4s"
9090 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9091)
9092
9093(define_insn "aarch64_bfcvtbf"
9094 [(set (match_operand:BF 0 "register_operand" "=w")
9095 (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9096 UNSPEC_BFCVT))]
9097 "TARGET_BF16_FP"
9098 "bfcvt\\t%h0, %s1"
9099 [(set_attr "type" "f_cvt")]
9100)
f7d69611
DZ
9101
9102;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9103(define_insn "aarch64_vbfcvt<mode>"
9104 [(set (match_operand:V4SF 0 "register_operand" "=w")
9105 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
9106 UNSPEC_BFCVTN))]
9107 "TARGET_BF16_SIMD"
9108 "shll\\t%0.4s, %1.4h, #16"
9109 [(set_attr "type" "neon_shift_imm_long")]
9110)
9111
9112(define_insn "aarch64_vbfcvt_highv8bf"
9113 [(set (match_operand:V4SF 0 "register_operand" "=w")
9114 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
9115 UNSPEC_BFCVTN2))]
9116 "TARGET_BF16_SIMD"
9117 "shll2\\t%0.4s, %1.8h, #16"
9118 [(set_attr "type" "neon_shift_imm_long")]
9119)
9120
9121(define_insn "aarch64_bfcvtsf"
9122 [(set (match_operand:SF 0 "register_operand" "=w")
9123 (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
9124 UNSPEC_BFCVT))]
9125 "TARGET_BF16_FP"
9126 "shl\\t%d0, %d1, #16"
9127 [(set_attr "type" "neon_shift_imm")]
9128)