]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
aarch64: Add LS64 extension and intrinsics
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2021 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
23 (match_operand:VALL_F16MOV 1 "general_operand"))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38
39 /* If a constant is too complex to force to memory (e.g. because it
40 contains CONST_POLY_INTs), build it up from individual elements instead.
41 We should only need to do this before RA; aarch64_legitimate_constant_p
42 should ensure that we don't try to rematerialize the constant later. */
43 if (GET_CODE (operands[1]) == CONST_VECTOR
44 && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
45 {
46 aarch64_expand_vector_init (operands[0], operands[1]);
47 DONE;
48 }
49 "
50 )
51
52 (define_expand "movmisalign<mode>"
53 [(set (match_operand:VALL 0 "nonimmediate_operand")
54 (match_operand:VALL 1 "general_operand"))]
55 "TARGET_SIMD && !STRICT_ALIGNMENT"
56 {
57 /* This pattern is not permitted to fail during expansion: if both arguments
58 are non-registers (e.g. memory := constant, which can be created by the
59 auto-vectorizer), force operand 1 into a register. */
60 if (!register_operand (operands[0], <MODE>mode)
61 && !register_operand (operands[1], <MODE>mode))
62 operands[1] = force_reg (<MODE>mode, operands[1]);
63 })
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
67 (vec_duplicate:VDQ_I
68 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
69 "TARGET_SIMD"
70 "@
71 dup\\t%0.<Vtype>, %1.<Vetype>[0]
72 dup\\t%0.<Vtype>, %<vwcore>1"
73 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
74 )
75
76 (define_insn "aarch64_simd_dup<mode>"
77 [(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
78 (vec_duplicate:VDQF_F16
79 (match_operand:<VEL> 1 "register_operand" "w,r")))]
80 "TARGET_SIMD"
81 "@
82 dup\\t%0.<Vtype>, %1.<Vetype>[0]
83 dup\\t%0.<Vtype>, %<vwcore>1"
84 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
85 )
86
87 (define_insn "aarch64_dup_lane<mode>"
88 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
89 (vec_duplicate:VALL_F16
90 (vec_select:<VEL>
91 (match_operand:VALL_F16 1 "register_operand" "w")
92 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
93 )))]
94 "TARGET_SIMD"
95 {
96 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
97 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
98 }
99 [(set_attr "type" "neon_dup<q>")]
100 )
101
102 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
103 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
104 (vec_duplicate:VALL_F16_NO_V2Q
105 (vec_select:<VEL>
106 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
107 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
108 )))]
109 "TARGET_SIMD"
110 {
111 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
112 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
113 }
114 [(set_attr "type" "neon_dup<q>")]
115 )
116
117 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
118 [(set (match_operand:VDMOV 0 "nonimmediate_operand"
119 "=w, m, m, w, ?r, ?w, ?r, w")
120 (match_operand:VDMOV 1 "general_operand"
121 "m, Dz, w, w, w, r, r, Dn"))]
122 "TARGET_SIMD
123 && (register_operand (operands[0], <MODE>mode)
124 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
125 {
126 switch (which_alternative)
127 {
128 case 0: return "ldr\t%d0, %1";
129 case 1: return "str\txzr, %0";
130 case 2: return "str\t%d1, %0";
131 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
132 case 4: return "umov\t%0, %1.d[0]";
133 case 5: return "fmov\t%d0, %1";
134 case 6: return "mov\t%0, %1";
135 case 7:
136 return aarch64_output_simd_mov_immediate (operands[1], 64);
137 default: gcc_unreachable ();
138 }
139 }
140 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
141 neon_logic<q>, neon_to_gp<q>, f_mcr,\
142 mov_reg, neon_move<q>")]
143 )
144
145 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
146 [(set (match_operand:VQMOV 0 "nonimmediate_operand"
147 "=w, Umn, m, w, ?r, ?w, ?r, w")
148 (match_operand:VQMOV 1 "general_operand"
149 "m, Dz, w, w, w, r, r, Dn"))]
150 "TARGET_SIMD
151 && (register_operand (operands[0], <MODE>mode)
152 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
153 {
154 switch (which_alternative)
155 {
156 case 0:
157 return "ldr\t%q0, %1";
158 case 1:
159 return "stp\txzr, xzr, %0";
160 case 2:
161 return "str\t%q1, %0";
162 case 3:
163 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
164 case 4:
165 case 5:
166 case 6:
167 return "#";
168 case 7:
169 return aarch64_output_simd_mov_immediate (operands[1], 128);
170 default:
171 gcc_unreachable ();
172 }
173 }
174 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
175 neon_logic<q>, multiple, multiple,\
176 multiple, neon_move<q>")
177 (set_attr "length" "4,4,4,4,8,8,8,4")]
178 )
179
180 ;; When storing lane zero we can use the normal STR and its more permissive
181 ;; addressing modes.
182
183 (define_insn "aarch64_store_lane0<mode>"
184 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
185 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
186 (parallel [(match_operand 2 "const_int_operand" "n")])))]
187 "TARGET_SIMD
188 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
189 "str\\t%<Vetype>1, %0"
190 [(set_attr "type" "neon_store1_1reg<q>")]
191 )
192
193 (define_insn "load_pair<DREG:mode><DREG2:mode>"
194 [(set (match_operand:DREG 0 "register_operand" "=w")
195 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
196 (set (match_operand:DREG2 2 "register_operand" "=w")
197 (match_operand:DREG2 3 "memory_operand" "m"))]
198 "TARGET_SIMD
199 && rtx_equal_p (XEXP (operands[3], 0),
200 plus_constant (Pmode,
201 XEXP (operands[1], 0),
202 GET_MODE_SIZE (<DREG:MODE>mode)))"
203 "ldp\\t%d0, %d2, %z1"
204 [(set_attr "type" "neon_ldp")]
205 )
206
207 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
208 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
209 (match_operand:DREG 1 "register_operand" "w"))
210 (set (match_operand:DREG2 2 "memory_operand" "=m")
211 (match_operand:DREG2 3 "register_operand" "w"))]
212 "TARGET_SIMD
213 && rtx_equal_p (XEXP (operands[2], 0),
214 plus_constant (Pmode,
215 XEXP (operands[0], 0),
216 GET_MODE_SIZE (<DREG:MODE>mode)))"
217 "stp\\t%d1, %d3, %z0"
218 [(set_attr "type" "neon_stp")]
219 )
220
221 (define_insn "load_pair<VQ:mode><VQ2:mode>"
222 [(set (match_operand:VQ 0 "register_operand" "=w")
223 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
224 (set (match_operand:VQ2 2 "register_operand" "=w")
225 (match_operand:VQ2 3 "memory_operand" "m"))]
226 "TARGET_SIMD
227 && rtx_equal_p (XEXP (operands[3], 0),
228 plus_constant (Pmode,
229 XEXP (operands[1], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "ldp\\t%q0, %q2, %z1"
232 [(set_attr "type" "neon_ldp_q")]
233 )
234
235 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
236 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
237 (match_operand:VQ 1 "register_operand" "w"))
238 (set (match_operand:VQ2 2 "memory_operand" "=m")
239 (match_operand:VQ2 3 "register_operand" "w"))]
240 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
241 plus_constant (Pmode,
242 XEXP (operands[0], 0),
243 GET_MODE_SIZE (<VQ:MODE>mode)))"
244 "stp\\t%q1, %q3, %z0"
245 [(set_attr "type" "neon_stp_q")]
246 )
247
248
249 (define_split
250 [(set (match_operand:VQMOV 0 "register_operand" "")
251 (match_operand:VQMOV 1 "register_operand" ""))]
252 "TARGET_SIMD && reload_completed
253 && GP_REGNUM_P (REGNO (operands[0]))
254 && GP_REGNUM_P (REGNO (operands[1]))"
255 [(const_int 0)]
256 {
257 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
258 DONE;
259 })
260
261 (define_split
262 [(set (match_operand:VQMOV 0 "register_operand" "")
263 (match_operand:VQMOV 1 "register_operand" ""))]
264 "TARGET_SIMD && reload_completed
265 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
266 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
267 [(const_int 0)]
268 {
269 aarch64_split_simd_move (operands[0], operands[1]);
270 DONE;
271 })
272
273 (define_expand "@aarch64_split_simd_mov<mode>"
274 [(set (match_operand:VQMOV 0)
275 (match_operand:VQMOV 1))]
276 "TARGET_SIMD"
277 {
278 rtx dst = operands[0];
279 rtx src = operands[1];
280
281 if (GP_REGNUM_P (REGNO (src)))
282 {
283 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
284 rtx src_high_part = gen_highpart (<VHALF>mode, src);
285
286 emit_insn
287 (gen_move_lo_quad_<mode> (dst, src_low_part));
288 emit_insn
289 (gen_move_hi_quad_<mode> (dst, src_high_part));
290 }
291
292 else
293 {
294 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
295 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
296 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
297 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
298 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
299 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
300 }
301 DONE;
302 }
303 )
304
305 (define_expand "aarch64_get_half<mode>"
306 [(set (match_operand:<VHALF> 0 "register_operand")
307 (vec_select:<VHALF>
308 (match_operand:VQMOV 1 "register_operand")
309 (match_operand 2 "ascending_int_parallel")))]
310 "TARGET_SIMD"
311 )
312
313 (define_expand "aarch64_get_low<mode>"
314 [(match_operand:<VHALF> 0 "register_operand")
315 (match_operand:VQMOV 1 "register_operand")]
316 "TARGET_SIMD"
317 {
318 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
319 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
320 DONE;
321 }
322 )
323
324 (define_expand "aarch64_get_high<mode>"
325 [(match_operand:<VHALF> 0 "register_operand")
326 (match_operand:VQMOV 1 "register_operand")]
327 "TARGET_SIMD"
328 {
329 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
330 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
331 DONE;
332 }
333 )
334
335 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
336 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
337 (vec_select:<VHALF>
338 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
339 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
340 "TARGET_SIMD"
341 "@
342 #
343 umov\t%0, %1.d[0]"
344 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
345 [(set (match_dup 0) (match_dup 1))]
346 {
347 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
348 }
349 [(set_attr "type" "mov_reg,neon_to_gp<q>")
350 (set_attr "length" "4")]
351 )
352
353 (define_insn "aarch64_simd_mov_from_<mode>high"
354 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
355 (vec_select:<VHALF>
356 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
357 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
358 "TARGET_SIMD"
359 "@
360 dup\\t%d0, %1.d[1]
361 umov\t%0, %1.d[1]"
362 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
363 (set_attr "length" "4")]
364 )
365
366 (define_insn "orn<mode>3"
367 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
368 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
369 (match_operand:VDQ_I 2 "register_operand" "w")))]
370 "TARGET_SIMD"
371 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
372 [(set_attr "type" "neon_logic<q>")]
373 )
374
375 (define_insn "bic<mode>3"
376 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
377 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
378 (match_operand:VDQ_I 2 "register_operand" "w")))]
379 "TARGET_SIMD"
380 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
381 [(set_attr "type" "neon_logic<q>")]
382 )
383
384 (define_insn "add<mode>3"
385 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
386 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
387 (match_operand:VDQ_I 2 "register_operand" "w")))]
388 "TARGET_SIMD"
389 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
390 [(set_attr "type" "neon_add<q>")]
391 )
392
393 (define_insn "sub<mode>3"
394 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
395 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
396 (match_operand:VDQ_I 2 "register_operand" "w")))]
397 "TARGET_SIMD"
398 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
399 [(set_attr "type" "neon_sub<q>")]
400 )
401
402 (define_insn "mul<mode>3"
403 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
404 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
405 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
406 "TARGET_SIMD"
407 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
408 [(set_attr "type" "neon_mul_<Vetype><q>")]
409 )
410
411 (define_insn "bswap<mode>2"
412 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
413 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
414 "TARGET_SIMD"
415 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
416 [(set_attr "type" "neon_rev<q>")]
417 )
418
419 (define_insn "aarch64_rbit<mode>"
420 [(set (match_operand:VB 0 "register_operand" "=w")
421 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
422 UNSPEC_RBIT))]
423 "TARGET_SIMD"
424 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
425 [(set_attr "type" "neon_rbit")]
426 )
427
428 (define_expand "ctz<mode>2"
429 [(set (match_operand:VS 0 "register_operand")
430 (ctz:VS (match_operand:VS 1 "register_operand")))]
431 "TARGET_SIMD"
432 {
433 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
434 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
435 <MODE>mode, 0);
436 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
437 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
438 DONE;
439 }
440 )
441
442 (define_expand "xorsign<mode>3"
443 [(match_operand:VHSDF 0 "register_operand")
444 (match_operand:VHSDF 1 "register_operand")
445 (match_operand:VHSDF 2 "register_operand")]
446 "TARGET_SIMD"
447 {
448
449 machine_mode imode = <V_INT_EQUIV>mode;
450 rtx v_bitmask = gen_reg_rtx (imode);
451 rtx op1x = gen_reg_rtx (imode);
452 rtx op2x = gen_reg_rtx (imode);
453
454 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
455 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
456
457 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
458
459 emit_move_insn (v_bitmask,
460 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
461 HOST_WIDE_INT_M1U << bits));
462
463 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
464 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
465 emit_move_insn (operands[0],
466 lowpart_subreg (<MODE>mode, op1x, imode));
467 DONE;
468 }
469 )
470
471 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
472 ;; fact that their usage need to guarantee that the source vectors are
473 ;; contiguous. It would be wrong to describe the operation without being able
474 ;; to describe the permute that is also required, but even if that is done
475 ;; the permute would have been created as a LOAD_LANES which means the values
476 ;; in the registers are in the wrong order.
477 (define_insn "aarch64_fcadd<rot><mode>"
478 [(set (match_operand:VHSDF 0 "register_operand" "=w")
479 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
480 (match_operand:VHSDF 2 "register_operand" "w")]
481 FCADD))]
482 "TARGET_COMPLEX"
483 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
484 [(set_attr "type" "neon_fcadd")]
485 )
486
487 (define_expand "cadd<rot><mode>3"
488 [(set (match_operand:VHSDF 0 "register_operand")
489 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
490 (match_operand:VHSDF 2 "register_operand")]
491 FCADD))]
492 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
493 )
494
495 (define_insn "aarch64_fcmla<rot><mode>"
496 [(set (match_operand:VHSDF 0 "register_operand" "=w")
497 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
498 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
499 (match_operand:VHSDF 3 "register_operand" "w")]
500 FCMLA)))]
501 "TARGET_COMPLEX"
502 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
503 [(set_attr "type" "neon_fcmla")]
504 )
505
506
507 (define_insn "aarch64_fcmla_lane<rot><mode>"
508 [(set (match_operand:VHSDF 0 "register_operand" "=w")
509 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
510 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
511 (match_operand:VHSDF 3 "register_operand" "w")
512 (match_operand:SI 4 "const_int_operand" "n")]
513 FCMLA)))]
514 "TARGET_COMPLEX"
515 {
516 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
517 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
518 }
519 [(set_attr "type" "neon_fcmla")]
520 )
521
522 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
523 [(set (match_operand:V4HF 0 "register_operand" "=w")
524 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
525 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
526 (match_operand:V8HF 3 "register_operand" "w")
527 (match_operand:SI 4 "const_int_operand" "n")]
528 FCMLA)))]
529 "TARGET_COMPLEX"
530 {
531 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
532 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
533 }
534 [(set_attr "type" "neon_fcmla")]
535 )
536
537 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
538 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
539 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
540 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
541 (match_operand:<VHALF> 3 "register_operand" "w")
542 (match_operand:SI 4 "const_int_operand" "n")]
543 FCMLA)))]
544 "TARGET_COMPLEX"
545 {
546 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
547 operands[4]
548 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
549 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
550 }
551 [(set_attr "type" "neon_fcmla")]
552 )
553
554 ;; The complex mla/mls operations always need to expand to two instructions.
555 ;; The first operation does half the computation and the second does the
556 ;; remainder. Because of this, expand early.
557 (define_expand "cml<fcmac1><conj_op><mode>4"
558 [(set (match_operand:VHSDF 0 "register_operand")
559 (plus:VHSDF (match_operand:VHSDF 1 "register_operand")
560 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
561 (match_operand:VHSDF 3 "register_operand")]
562 FCMLA_OP)))]
563 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
564 {
565 rtx tmp = gen_reg_rtx (<MODE>mode);
566 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1],
567 operands[3], operands[2]));
568 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
569 operands[3], operands[2]));
570 DONE;
571 })
572
573 ;; The complex mul operations always need to expand to two instructions.
574 ;; The first operation does half the computation and the second does the
575 ;; remainder. Because of this, expand early.
576 (define_expand "cmul<conj_op><mode>3"
577 [(set (match_operand:VHSDF 0 "register_operand")
578 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
580 FCMUL_OP))]
581 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
582 {
583 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
584 rtx res1 = gen_reg_rtx (<MODE>mode);
585 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
586 operands[2], operands[1]));
587 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
588 operands[2], operands[1]));
589 DONE;
590 })
591
592 ;; These expands map to the Dot Product optab the vectorizer checks for
593 ;; and to the intrinsics patttern.
594 ;; The auto-vectorizer expects a dot product builtin that also does an
595 ;; accumulation into the provided register.
596 ;; Given the following pattern
597 ;;
598 ;; for (i=0; i<len; i++) {
599 ;; c = a[i] * b[i];
600 ;; r += c;
601 ;; }
602 ;; return result;
603 ;;
604 ;; This can be auto-vectorized to
605 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
606 ;;
607 ;; given enough iterations. However the vectorizer can keep unrolling the loop
608 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
609 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
610 ;; ...
611 ;;
612 ;; and so the vectorizer provides r, in which the result has to be accumulated.
613 (define_insn "<sur>dot_prod<vsi2qi>"
614 [(set (match_operand:VS 0 "register_operand" "=w")
615 (plus:VS
616 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
617 (match_operand:<VSI2QI> 2 "register_operand" "w")]
618 DOTPROD)
619 (match_operand:VS 3 "register_operand" "0")))]
620 "TARGET_DOTPROD"
621 "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
622 [(set_attr "type" "neon_dot<q>")]
623 )
624
625 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
626 ;; (vector) Dot Product operation and the vectorized optab.
627 (define_insn "usdot_prod<vsi2qi>"
628 [(set (match_operand:VS 0 "register_operand" "=w")
629 (plus:VS
630 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
631 (match_operand:<VSI2QI> 2 "register_operand" "w")]
632 UNSPEC_USDOT)
633 (match_operand:VS 3 "register_operand" "0")))]
634 "TARGET_I8MM"
635 "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
636 [(set_attr "type" "neon_dot<q>")]
637 )
638
639 ;; These instructions map to the __builtins for the Dot Product
640 ;; indexed operations.
641 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
642 [(set (match_operand:VS 0 "register_operand" "=w")
643 (plus:VS
644 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
645 (match_operand:V8QI 3 "register_operand" "<h_con>")
646 (match_operand:SI 4 "immediate_operand" "i")]
647 DOTPROD)
648 (match_operand:VS 1 "register_operand" "0")))]
649 "TARGET_DOTPROD"
650 {
651 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
652 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
653 }
654 [(set_attr "type" "neon_dot<q>")]
655 )
656
657 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
658 [(set (match_operand:VS 0 "register_operand" "=w")
659 (plus:VS
660 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
661 (match_operand:V16QI 3 "register_operand" "<h_con>")
662 (match_operand:SI 4 "immediate_operand" "i")]
663 DOTPROD)
664 (match_operand:VS 1 "register_operand" "0")))]
665 "TARGET_DOTPROD"
666 {
667 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
668 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
669 }
670 [(set_attr "type" "neon_dot<q>")]
671 )
672
673 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
674 ;; (by element) Dot Product operations.
675 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
676 [(set (match_operand:VS 0 "register_operand" "=w")
677 (plus:VS
678 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
679 (match_operand:VB 3 "register_operand" "w")
680 (match_operand:SI 4 "immediate_operand" "i")]
681 DOTPROD_I8MM)
682 (match_operand:VS 1 "register_operand" "0")))]
683 "TARGET_I8MM"
684 {
685 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
686 int lane = INTVAL (operands[4]);
687 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
688 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
689 }
690 [(set_attr "type" "neon_dot<VS:q>")]
691 )
692
693 (define_expand "copysign<mode>3"
694 [(match_operand:VHSDF 0 "register_operand")
695 (match_operand:VHSDF 1 "register_operand")
696 (match_operand:VHSDF 2 "register_operand")]
697 "TARGET_FLOAT && TARGET_SIMD"
698 {
699 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
700 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
701
702 emit_move_insn (v_bitmask,
703 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
704 HOST_WIDE_INT_M1U << bits));
705 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
706 operands[2], operands[1]));
707 DONE;
708 }
709 )
710
711 (define_insn "mul_lane<mode>3"
712 [(set (match_operand:VMULD 0 "register_operand" "=w")
713 (mult:VMULD
714 (vec_duplicate:VMULD
715 (vec_select:<VEL>
716 (match_operand:<VCOND> 2 "register_operand" "<h_con>")
717 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
718 (match_operand:VMULD 1 "register_operand" "w")))]
719 "TARGET_SIMD"
720 {
721 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
722 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
723 }
724 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
725 )
726
727 (define_insn "mul_laneq<mode>3"
728 [(set (match_operand:VMUL 0 "register_operand" "=w")
729 (mult:VMUL
730 (vec_duplicate:VMUL
731 (vec_select:<VEL>
732 (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
733 (parallel [(match_operand:SI 3 "immediate_operand")])))
734 (match_operand:VMUL 1 "register_operand" "w")))]
735 "TARGET_SIMD"
736 {
737 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
738 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
739 }
740 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
741 )
742
743 (define_insn "mul_n<mode>3"
744 [(set (match_operand:VMUL 0 "register_operand" "=w")
745 (mult:VMUL
746 (vec_duplicate:VMUL
747 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
748 (match_operand:VMUL 1 "register_operand" "w")))]
749 "TARGET_SIMD"
750 "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
751 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
752 )
753
754 (define_insn "@aarch64_rsqrte<mode>"
755 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
756 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
757 UNSPEC_RSQRTE))]
758 "TARGET_SIMD"
759 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
760 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
761
762 (define_insn "@aarch64_rsqrts<mode>"
763 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
764 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
765 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
766 UNSPEC_RSQRTS))]
767 "TARGET_SIMD"
768 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
769 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
770
771 (define_expand "rsqrt<mode>2"
772 [(set (match_operand:VALLF 0 "register_operand")
773 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
774 UNSPEC_RSQRT))]
775 "TARGET_SIMD"
776 {
777 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
778 DONE;
779 })
780
781 (define_insn "aarch64_ursqrte<mode>"
782 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
783 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
784 UNSPEC_RSQRTE))]
785 "TARGET_SIMD"
786 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
787 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
788
789 (define_insn "*aarch64_mul3_elt_to_64v2df"
790 [(set (match_operand:DF 0 "register_operand" "=w")
791 (mult:DF
792 (vec_select:DF
793 (match_operand:V2DF 1 "register_operand" "w")
794 (parallel [(match_operand:SI 2 "immediate_operand")]))
795 (match_operand:DF 3 "register_operand" "w")))]
796 "TARGET_SIMD"
797 {
798 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
799 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
800 }
801 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
802 )
803
804 (define_insn "neg<mode>2"
805 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
806 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
807 "TARGET_SIMD"
808 "neg\t%0.<Vtype>, %1.<Vtype>"
809 [(set_attr "type" "neon_neg<q>")]
810 )
811
812 (define_insn "abs<mode>2"
813 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
814 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
815 "TARGET_SIMD"
816 "abs\t%0.<Vtype>, %1.<Vtype>"
817 [(set_attr "type" "neon_abs<q>")]
818 )
819
820 ;; The intrinsic version of integer ABS must not be allowed to
821 ;; combine with any operation with an integerated ABS step, such
822 ;; as SABD.
823 (define_insn "aarch64_abs<mode>"
824 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
825 (unspec:VSDQ_I_DI
826 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
827 UNSPEC_ABS))]
828 "TARGET_SIMD"
829 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
830 [(set_attr "type" "neon_abs<q>")]
831 )
832
833 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
834 ;; This isn't accurate as ABS treats always its input as a signed value.
835 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
836 ;; Whereas SABD would return 192 (-64 signed) on the above example.
837 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
838 (define_insn "aarch64_<su>abd<mode>"
839 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
840 (minus:VDQ_BHSI
841 (USMAX:VDQ_BHSI
842 (match_operand:VDQ_BHSI 1 "register_operand" "w")
843 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
844 (<max_opp>:VDQ_BHSI
845 (match_dup 1)
846 (match_dup 2))))]
847 "TARGET_SIMD"
848 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
849 [(set_attr "type" "neon_abd<q>")]
850 )
851
852
853 (define_insn "aarch64_<sur>abdl<mode>"
854 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
855 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
856 (match_operand:VD_BHSI 2 "register_operand" "w")]
857 ABDL))]
858 "TARGET_SIMD"
859 "<sur>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
860 [(set_attr "type" "neon_abd<q>")]
861 )
862
863 (define_insn "aarch64_<sur>abdl2<mode>"
864 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
865 (unspec:<VDBLW> [(match_operand:VQW 1 "register_operand" "w")
866 (match_operand:VQW 2 "register_operand" "w")]
867 ABDL2))]
868 "TARGET_SIMD"
869 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
870 [(set_attr "type" "neon_abd<q>")]
871 )
872
873 (define_insn "aarch64_<sur>abal<mode>"
874 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
875 (unspec:<VWIDE> [(match_operand:VD_BHSI 2 "register_operand" "w")
876 (match_operand:VD_BHSI 3 "register_operand" "w")
877 (match_operand:<VWIDE> 1 "register_operand" "0")]
878 ABAL))]
879 "TARGET_SIMD"
880 "<sur>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
881 [(set_attr "type" "neon_arith_acc<q>")]
882 )
883
884 (define_insn "aarch64_<sur>abal2<mode>"
885 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
886 (unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
887 (match_operand:VQW 3 "register_operand" "w")
888 (match_operand:<VWIDE> 1 "register_operand" "0")]
889 ABAL2))]
890 "TARGET_SIMD"
891 "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
892 [(set_attr "type" "neon_arith_acc<q>")]
893 )
894
895 (define_insn "aarch64_<sur>adalp<mode>"
896 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
897 (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
898 (match_operand:<VDBLW> 1 "register_operand" "0")]
899 ADALP))]
900 "TARGET_SIMD"
901 "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
902 [(set_attr "type" "neon_reduc_add<q>")]
903 )
904
905 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
906 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
907 ;; reduction of the difference into a V4SI vector and accumulate that into
908 ;; operand 3 before copying that into the result operand 0.
909 ;; Perform that with a sequence of:
910 ;; UABDL2 tmp.8h, op1.16b, op2.16b
911 ;; UABAL tmp.8h, op1.8b, op2.8b
912 ;; UADALP op3.4s, tmp.8h
913 ;; MOV op0, op3 // should be eliminated in later passes.
914 ;;
915 ;; For TARGET_DOTPROD we do:
916 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
917 ;; UABD tmp2.16b, op1.16b, op2.16b
918 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
919 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
920 ;;
921 ;; The signed version just uses the signed variants of the above instructions
922 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
923 ;; unsigned.
924
925 (define_expand "<sur>sadv16qi"
926 [(use (match_operand:V4SI 0 "register_operand"))
927 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
928 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
929 (use (match_operand:V4SI 3 "register_operand"))]
930 "TARGET_SIMD"
931 {
932 if (TARGET_DOTPROD)
933 {
934 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
935 rtx abd = gen_reg_rtx (V16QImode);
936 emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
937 emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
938 DONE;
939 }
940 rtx reduc = gen_reg_rtx (V8HImode);
941 emit_insn (gen_aarch64_<sur>abdl2v16qi (reduc, operands[1],
942 operands[2]));
943 emit_insn (gen_aarch64_<sur>abalv8qi (reduc, reduc,
944 gen_lowpart (V8QImode, operands[1]),
945 gen_lowpart (V8QImode,
946 operands[2])));
947 emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
948 emit_move_insn (operands[0], operands[3]);
949 DONE;
950 }
951 )
952
953 (define_insn "aarch64_<su>aba<mode>"
954 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
955 (plus:VDQ_BHSI (minus:VDQ_BHSI
956 (USMAX:VDQ_BHSI
957 (match_operand:VDQ_BHSI 2 "register_operand" "w")
958 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
959 (<max_opp>:VDQ_BHSI
960 (match_dup 2)
961 (match_dup 3)))
962 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
963 "TARGET_SIMD"
964 "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
965 [(set_attr "type" "neon_arith_acc<q>")]
966 )
967
968 (define_insn "fabd<mode>3"
969 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
970 (abs:VHSDF_HSDF
971 (minus:VHSDF_HSDF
972 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
973 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
974 "TARGET_SIMD"
975 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
976 [(set_attr "type" "neon_fp_abd_<stype><q>")]
977 )
978
979 ;; For AND (vector, register) and BIC (vector, immediate)
980 (define_insn "and<mode>3"
981 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
982 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
983 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
984 "TARGET_SIMD"
985 {
986 switch (which_alternative)
987 {
988 case 0:
989 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
990 case 1:
991 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
992 AARCH64_CHECK_BIC);
993 default:
994 gcc_unreachable ();
995 }
996 }
997 [(set_attr "type" "neon_logic<q>")]
998 )
999
1000 ;; For ORR (vector, register) and ORR (vector, immediate)
1001 (define_insn "ior<mode>3"
1002 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1003 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
1004 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
1005 "TARGET_SIMD"
1006 {
1007 switch (which_alternative)
1008 {
1009 case 0:
1010 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
1011 case 1:
1012 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
1013 AARCH64_CHECK_ORR);
1014 default:
1015 gcc_unreachable ();
1016 }
1017 }
1018 [(set_attr "type" "neon_logic<q>")]
1019 )
1020
1021 (define_insn "xor<mode>3"
1022 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1023 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1024 (match_operand:VDQ_I 2 "register_operand" "w")))]
1025 "TARGET_SIMD"
1026 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
1027 [(set_attr "type" "neon_logic<q>")]
1028 )
1029
1030 (define_insn "one_cmpl<mode>2"
1031 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1032 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1033 "TARGET_SIMD"
1034 "not\t%0.<Vbtype>, %1.<Vbtype>"
1035 [(set_attr "type" "neon_logic<q>")]
1036 )
1037
1038 (define_insn "aarch64_simd_vec_set<mode>"
1039 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1040 (vec_merge:VALL_F16
1041 (vec_duplicate:VALL_F16
1042 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
1043 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1044 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1045 "TARGET_SIMD"
1046 {
1047 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1048 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1049 switch (which_alternative)
1050 {
1051 case 0:
1052 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1053 case 1:
1054 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1055 case 2:
1056 return "ld1\\t{%0.<Vetype>}[%p2], %1";
1057 default:
1058 gcc_unreachable ();
1059 }
1060 }
1061 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1062 )
1063
1064 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1065 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1066 (vec_merge:VALL_F16
1067 (vec_duplicate:VALL_F16
1068 (vec_select:<VEL>
1069 (match_operand:VALL_F16 3 "register_operand" "w")
1070 (parallel
1071 [(match_operand:SI 4 "immediate_operand" "i")])))
1072 (match_operand:VALL_F16 1 "register_operand" "0")
1073 (match_operand:SI 2 "immediate_operand" "i")))]
1074 "TARGET_SIMD"
1075 {
1076 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1077 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1078 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1079
1080 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1081 }
1082 [(set_attr "type" "neon_ins<q>")]
1083 )
1084
1085 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1086 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1087 (vec_merge:VALL_F16_NO_V2Q
1088 (vec_duplicate:VALL_F16_NO_V2Q
1089 (vec_select:<VEL>
1090 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1091 (parallel
1092 [(match_operand:SI 4 "immediate_operand" "i")])))
1093 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1094 (match_operand:SI 2 "immediate_operand" "i")))]
1095 "TARGET_SIMD"
1096 {
1097 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1098 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1099 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1100 INTVAL (operands[4]));
1101
1102 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1103 }
1104 [(set_attr "type" "neon_ins<q>")]
1105 )
1106
1107 (define_expand "signbit<mode>2"
1108 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1109 (use (match_operand:VDQSF 1 "register_operand"))]
1110 "TARGET_SIMD"
1111 {
1112 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1113 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1114 shift_amount);
1115 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1116
1117 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1118 shift_vector));
1119 DONE;
1120 })
1121
1122 (define_insn "aarch64_simd_lshr<mode>"
1123 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1124 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1125 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1126 "TARGET_SIMD"
1127 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1128 [(set_attr "type" "neon_shift_imm<q>")]
1129 )
1130
1131 (define_insn "aarch64_simd_ashr<mode>"
1132 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1133 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
1134 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "D1,Dr")))]
1135 "TARGET_SIMD"
1136 "@
1137 cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1138 sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1139 [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
1140 )
1141
1142 (define_insn "*aarch64_simd_sra<mode>"
1143 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1144 (plus:VDQ_I
1145 (SHIFTRT:VDQ_I
1146 (match_operand:VDQ_I 1 "register_operand" "w")
1147 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1148 (match_operand:VDQ_I 3 "register_operand" "0")))]
1149 "TARGET_SIMD"
1150 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1151 [(set_attr "type" "neon_shift_acc<q>")]
1152 )
1153
1154 (define_insn "aarch64_simd_imm_shl<mode>"
1155 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1156 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1157 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
1158 "TARGET_SIMD"
1159 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1160 [(set_attr "type" "neon_shift_imm<q>")]
1161 )
1162
1163 (define_insn "aarch64_simd_reg_sshl<mode>"
1164 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1165 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1166 (match_operand:VDQ_I 2 "register_operand" "w")))]
1167 "TARGET_SIMD"
1168 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1169 [(set_attr "type" "neon_shift_reg<q>")]
1170 )
1171
1172 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1173 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1174 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1175 (match_operand:VDQ_I 2 "register_operand" "w")]
1176 UNSPEC_ASHIFT_UNSIGNED))]
1177 "TARGET_SIMD"
1178 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1179 [(set_attr "type" "neon_shift_reg<q>")]
1180 )
1181
1182 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1183 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1184 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1185 (match_operand:VDQ_I 2 "register_operand" "w")]
1186 UNSPEC_ASHIFT_SIGNED))]
1187 "TARGET_SIMD"
1188 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1189 [(set_attr "type" "neon_shift_reg<q>")]
1190 )
1191
1192 (define_expand "ashl<mode>3"
1193 [(match_operand:VDQ_I 0 "register_operand")
1194 (match_operand:VDQ_I 1 "register_operand")
1195 (match_operand:SI 2 "general_operand")]
1196 "TARGET_SIMD"
1197 {
1198 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1199 int shift_amount;
1200
1201 if (CONST_INT_P (operands[2]))
1202 {
1203 shift_amount = INTVAL (operands[2]);
1204 if (shift_amount >= 0 && shift_amount < bit_width)
1205 {
1206 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1207 shift_amount);
1208 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1209 operands[1],
1210 tmp));
1211 DONE;
1212 }
1213 }
1214
1215 operands[2] = force_reg (SImode, operands[2]);
1216
1217 rtx tmp = gen_reg_rtx (<MODE>mode);
1218 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1219 operands[2],
1220 0)));
1221 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1222 DONE;
1223 })
1224
1225 (define_expand "lshr<mode>3"
1226 [(match_operand:VDQ_I 0 "register_operand")
1227 (match_operand:VDQ_I 1 "register_operand")
1228 (match_operand:SI 2 "general_operand")]
1229 "TARGET_SIMD"
1230 {
1231 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1232 int shift_amount;
1233
1234 if (CONST_INT_P (operands[2]))
1235 {
1236 shift_amount = INTVAL (operands[2]);
1237 if (shift_amount > 0 && shift_amount <= bit_width)
1238 {
1239 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1240 shift_amount);
1241 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1242 operands[1],
1243 tmp));
1244 DONE;
1245 }
1246 }
1247
1248 operands[2] = force_reg (SImode, operands[2]);
1249
1250 rtx tmp = gen_reg_rtx (SImode);
1251 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1252 emit_insn (gen_negsi2 (tmp, operands[2]));
1253 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1254 convert_to_mode (<VEL>mode, tmp, 0)));
1255 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1256 tmp1));
1257 DONE;
1258 })
1259
1260 (define_expand "ashr<mode>3"
1261 [(match_operand:VDQ_I 0 "register_operand")
1262 (match_operand:VDQ_I 1 "register_operand")
1263 (match_operand:SI 2 "general_operand")]
1264 "TARGET_SIMD"
1265 {
1266 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1267 int shift_amount;
1268
1269 if (CONST_INT_P (operands[2]))
1270 {
1271 shift_amount = INTVAL (operands[2]);
1272 if (shift_amount > 0 && shift_amount <= bit_width)
1273 {
1274 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1275 shift_amount);
1276 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1277 operands[1],
1278 tmp));
1279 DONE;
1280 }
1281 }
1282
1283 operands[2] = force_reg (SImode, operands[2]);
1284
1285 rtx tmp = gen_reg_rtx (SImode);
1286 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1287 emit_insn (gen_negsi2 (tmp, operands[2]));
1288 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1289 tmp, 0)));
1290 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1291 tmp1));
1292 DONE;
1293 })
1294
1295 (define_expand "vashl<mode>3"
1296 [(match_operand:VDQ_I 0 "register_operand")
1297 (match_operand:VDQ_I 1 "register_operand")
1298 (match_operand:VDQ_I 2 "register_operand")]
1299 "TARGET_SIMD"
1300 {
1301 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1302 operands[2]));
1303 DONE;
1304 })
1305
1306 (define_expand "vashr<mode>3"
1307 [(match_operand:VDQ_I 0 "register_operand")
1308 (match_operand:VDQ_I 1 "register_operand")
1309 (match_operand:VDQ_I 2 "register_operand")]
1310 "TARGET_SIMD"
1311 {
1312 rtx neg = gen_reg_rtx (<MODE>mode);
1313 emit (gen_neg<mode>2 (neg, operands[2]));
1314 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1315 neg));
1316 DONE;
1317 })
1318
1319 ;; DI vector shift
1320 (define_expand "aarch64_ashr_simddi"
1321 [(match_operand:DI 0 "register_operand")
1322 (match_operand:DI 1 "register_operand")
1323 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1324 "TARGET_SIMD"
1325 {
1326 /* An arithmetic shift right by 64 fills the result with copies of the sign
1327 bit, just like asr by 63 - however the standard pattern does not handle
1328 a shift by 64. */
1329 if (INTVAL (operands[2]) == 64)
1330 operands[2] = GEN_INT (63);
1331 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1332 DONE;
1333 }
1334 )
1335
1336 (define_expand "vlshr<mode>3"
1337 [(match_operand:VDQ_I 0 "register_operand")
1338 (match_operand:VDQ_I 1 "register_operand")
1339 (match_operand:VDQ_I 2 "register_operand")]
1340 "TARGET_SIMD"
1341 {
1342 rtx neg = gen_reg_rtx (<MODE>mode);
1343 emit (gen_neg<mode>2 (neg, operands[2]));
1344 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1345 neg));
1346 DONE;
1347 })
1348
1349 (define_expand "aarch64_lshr_simddi"
1350 [(match_operand:DI 0 "register_operand")
1351 (match_operand:DI 1 "register_operand")
1352 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1353 "TARGET_SIMD"
1354 {
1355 if (INTVAL (operands[2]) == 64)
1356 emit_move_insn (operands[0], const0_rtx);
1357 else
1358 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1359 DONE;
1360 }
1361 )
1362
1363 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1364 (define_insn "vec_shr_<mode>"
1365 [(set (match_operand:VD 0 "register_operand" "=w")
1366 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1367 (match_operand:SI 2 "immediate_operand" "i")]
1368 UNSPEC_VEC_SHR))]
1369 "TARGET_SIMD"
1370 {
1371 if (BYTES_BIG_ENDIAN)
1372 return "shl %d0, %d1, %2";
1373 else
1374 return "ushr %d0, %d1, %2";
1375 }
1376 [(set_attr "type" "neon_shift_imm")]
1377 )
1378
1379 (define_expand "vec_set<mode>"
1380 [(match_operand:VALL_F16 0 "register_operand")
1381 (match_operand:<VEL> 1 "register_operand")
1382 (match_operand:SI 2 "immediate_operand")]
1383 "TARGET_SIMD"
1384 {
1385 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1386 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1387 GEN_INT (elem), operands[0]));
1388 DONE;
1389 }
1390 )
1391
1392
1393 (define_insn "aarch64_mla<mode>"
1394 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1395 (plus:VDQ_BHSI (mult:VDQ_BHSI
1396 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1397 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1398 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1399 "TARGET_SIMD"
1400 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1401 [(set_attr "type" "neon_mla_<Vetype><q>")]
1402 )
1403
1404 (define_insn "*aarch64_mla_elt<mode>"
1405 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1406 (plus:VDQHS
1407 (mult:VDQHS
1408 (vec_duplicate:VDQHS
1409 (vec_select:<VEL>
1410 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1411 (parallel [(match_operand:SI 2 "immediate_operand")])))
1412 (match_operand:VDQHS 3 "register_operand" "w"))
1413 (match_operand:VDQHS 4 "register_operand" "0")))]
1414 "TARGET_SIMD"
1415 {
1416 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1417 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1418 }
1419 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1420 )
1421
1422 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1423 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1424 (plus:VDQHS
1425 (mult:VDQHS
1426 (vec_duplicate:VDQHS
1427 (vec_select:<VEL>
1428 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1429 (parallel [(match_operand:SI 2 "immediate_operand")])))
1430 (match_operand:VDQHS 3 "register_operand" "w"))
1431 (match_operand:VDQHS 4 "register_operand" "0")))]
1432 "TARGET_SIMD"
1433 {
1434 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1435 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1436 }
1437 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1438 )
1439
1440 (define_insn "aarch64_mla_n<mode>"
1441 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1442 (plus:VDQHS
1443 (mult:VDQHS
1444 (vec_duplicate:VDQHS
1445 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1446 (match_operand:VDQHS 2 "register_operand" "w"))
1447 (match_operand:VDQHS 1 "register_operand" "0")))]
1448 "TARGET_SIMD"
1449 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1450 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1451 )
1452
1453 (define_insn "aarch64_mls<mode>"
1454 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1455 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1456 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1457 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1458 "TARGET_SIMD"
1459 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1460 [(set_attr "type" "neon_mla_<Vetype><q>")]
1461 )
1462
1463 (define_insn "*aarch64_mls_elt<mode>"
1464 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1465 (minus:VDQHS
1466 (match_operand:VDQHS 4 "register_operand" "0")
1467 (mult:VDQHS
1468 (vec_duplicate:VDQHS
1469 (vec_select:<VEL>
1470 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1471 (parallel [(match_operand:SI 2 "immediate_operand")])))
1472 (match_operand:VDQHS 3 "register_operand" "w"))))]
1473 "TARGET_SIMD"
1474 {
1475 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1476 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1477 }
1478 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1479 )
1480
1481 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1482 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1483 (minus:VDQHS
1484 (match_operand:VDQHS 4 "register_operand" "0")
1485 (mult:VDQHS
1486 (vec_duplicate:VDQHS
1487 (vec_select:<VEL>
1488 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1489 (parallel [(match_operand:SI 2 "immediate_operand")])))
1490 (match_operand:VDQHS 3 "register_operand" "w"))))]
1491 "TARGET_SIMD"
1492 {
1493 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1494 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1495 }
1496 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1497 )
1498
1499 (define_insn "aarch64_mls_n<mode>"
1500 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1501 (minus:VDQHS
1502 (match_operand:VDQHS 1 "register_operand" "0")
1503 (mult:VDQHS
1504 (vec_duplicate:VDQHS
1505 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1506 (match_operand:VDQHS 2 "register_operand" "w"))))]
1507 "TARGET_SIMD"
1508 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1509 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1510 )
1511
1512 ;; Max/Min operations.
1513 (define_insn "<su><maxmin><mode>3"
1514 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1515 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1516 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1517 "TARGET_SIMD"
1518 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1519 [(set_attr "type" "neon_minmax<q>")]
1520 )
1521
1522 (define_expand "<su><maxmin>v2di3"
1523 [(set (match_operand:V2DI 0 "register_operand")
1524 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1525 (match_operand:V2DI 2 "register_operand")))]
1526 "TARGET_SIMD"
1527 {
1528 enum rtx_code cmp_operator;
1529 rtx cmp_fmt;
1530
1531 switch (<CODE>)
1532 {
1533 case UMIN:
1534 cmp_operator = LTU;
1535 break;
1536 case SMIN:
1537 cmp_operator = LT;
1538 break;
1539 case UMAX:
1540 cmp_operator = GTU;
1541 break;
1542 case SMAX:
1543 cmp_operator = GT;
1544 break;
1545 default:
1546 gcc_unreachable ();
1547 }
1548
1549 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1550 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1551 operands[2], cmp_fmt, operands[1], operands[2]));
1552 DONE;
1553 })
1554
1555 ;; Pairwise Integer Max/Min operations.
1556 (define_insn "aarch64_<optab>p<mode>"
1557 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1558 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1559 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1560 MAXMINV))]
1561 "TARGET_SIMD"
1562 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1563 [(set_attr "type" "neon_minmax<q>")]
1564 )
1565
1566 ;; Pairwise FP Max/Min operations.
1567 (define_insn "aarch64_<optab>p<mode>"
1568 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1569 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1570 (match_operand:VHSDF 2 "register_operand" "w")]
1571 FMAXMINV))]
1572 "TARGET_SIMD"
1573 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1574 [(set_attr "type" "neon_minmax<q>")]
1575 )
1576
1577 ;; vec_concat gives a new vector with the low elements from operand 1, and
1578 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1579 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1580 ;; What that means, is that the RTL descriptions of the below patterns
1581 ;; need to change depending on endianness.
1582
1583 ;; Move to the low architectural bits of the register.
1584 ;; On little-endian this is { operand, zeroes }
1585 ;; On big-endian this is { zeroes, operand }
1586
1587 (define_insn "move_lo_quad_internal_<mode>"
1588 [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1589 (vec_concat:VQMOV
1590 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1591 (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))]
1592 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1593 "@
1594 dup\\t%d0, %1.d[0]
1595 fmov\\t%d0, %1
1596 dup\\t%d0, %1"
1597 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1598 (set_attr "length" "4")
1599 (set_attr "arch" "simd,fp,simd")]
1600 )
1601
1602 (define_insn "move_lo_quad_internal_be_<mode>"
1603 [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1604 (vec_concat:VQMOV
1605 (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")
1606 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1607 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1608 "@
1609 dup\\t%d0, %1.d[0]
1610 fmov\\t%d0, %1
1611 dup\\t%d0, %1"
1612 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1613 (set_attr "length" "4")
1614 (set_attr "arch" "simd,fp,simd")]
1615 )
1616
1617 (define_expand "move_lo_quad_<mode>"
1618 [(match_operand:VQMOV 0 "register_operand")
1619 (match_operand:<VHALF> 1 "register_operand")]
1620 "TARGET_SIMD"
1621 {
1622 rtx zs = CONST0_RTX (<VHALF>mode);
1623 if (BYTES_BIG_ENDIAN)
1624 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs));
1625 else
1626 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs));
1627 DONE;
1628 }
1629 )
1630
1631 ;; Move operand1 to the high architectural bits of the register, keeping
1632 ;; the low architectural bits of operand2.
1633 ;; For little-endian this is { operand2, operand1 }
1634 ;; For big-endian this is { operand1, operand2 }
1635
1636 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1637 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1638 (vec_concat:VQMOV
1639 (vec_select:<VHALF>
1640 (match_dup 0)
1641 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1642 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1643 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1644 "@
1645 ins\\t%0.d[1], %1.d[0]
1646 ins\\t%0.d[1], %1"
1647 [(set_attr "type" "neon_ins")]
1648 )
1649
1650 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1651 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1652 (vec_concat:VQMOV
1653 (match_operand:<VHALF> 1 "register_operand" "w,r")
1654 (vec_select:<VHALF>
1655 (match_dup 0)
1656 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1657 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1658 "@
1659 ins\\t%0.d[1], %1.d[0]
1660 ins\\t%0.d[1], %1"
1661 [(set_attr "type" "neon_ins")]
1662 )
1663
1664 (define_expand "move_hi_quad_<mode>"
1665 [(match_operand:VQMOV 0 "register_operand")
1666 (match_operand:<VHALF> 1 "register_operand")]
1667 "TARGET_SIMD"
1668 {
1669 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1670 if (BYTES_BIG_ENDIAN)
1671 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1672 operands[1], p));
1673 else
1674 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1675 operands[1], p));
1676 DONE;
1677 })
1678
1679 ;; Narrowing operations.
1680
1681 (define_insn "aarch64_xtn<mode>_insn_le"
1682 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1683 (vec_concat:<VNARROWQ2>
1684 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1685 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
1686 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1687 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1688 [(set_attr "type" "neon_move_narrow_q")]
1689 )
1690
1691 (define_insn "aarch64_xtn<mode>_insn_be"
1692 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1693 (vec_concat:<VNARROWQ2>
1694 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
1695 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
1696 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1697 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1698 [(set_attr "type" "neon_move_narrow_q")]
1699 )
1700
1701 (define_expand "aarch64_xtn<mode>"
1702 [(set (match_operand:<VNARROWQ> 0 "register_operand")
1703 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
1704 "TARGET_SIMD"
1705 {
1706 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1707 if (BYTES_BIG_ENDIAN)
1708 emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
1709 CONST0_RTX (<VNARROWQ>mode)));
1710 else
1711 emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
1712 CONST0_RTX (<VNARROWQ>mode)));
1713
1714 /* The intrinsic expects a narrow result, so emit a subreg that will get
1715 optimized away as appropriate. */
1716 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1717 <VNARROWQ2>mode));
1718 DONE;
1719 }
1720 )
1721
1722 (define_insn "aarch64_xtn2<mode>_insn_le"
1723 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1724 (vec_concat:<VNARROWQ2>
1725 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1726 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1727 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1728 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1729 [(set_attr "type" "neon_move_narrow_q")]
1730 )
1731
1732 (define_insn "aarch64_xtn2<mode>_insn_be"
1733 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1734 (vec_concat:<VNARROWQ2>
1735 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1736 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1737 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1738 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1739 [(set_attr "type" "neon_move_narrow_q")]
1740 )
1741
1742 (define_expand "aarch64_xtn2<mode>"
1743 [(match_operand:<VNARROWQ2> 0 "register_operand")
1744 (match_operand:<VNARROWQ> 1 "register_operand")
1745 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1746 "TARGET_SIMD"
1747 {
1748 if (BYTES_BIG_ENDIAN)
1749 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1750 operands[2]));
1751 else
1752 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1753 operands[2]));
1754 DONE;
1755 }
1756 )
1757
1758 (define_insn "*aarch64_narrow_trunc<mode>"
1759 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1760 (vec_concat:<VNARROWQ2>
1761 (truncate:<VNARROWQ>
1762 (match_operand:VQN 1 "register_operand" "w"))
1763 (truncate:<VNARROWQ>
1764 (match_operand:VQN 2 "register_operand" "w"))))]
1765 "TARGET_SIMD"
1766 {
1767 if (!BYTES_BIG_ENDIAN)
1768 return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1769 else
1770 return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1771 }
1772 [(set_attr "type" "neon_permute<q>")]
1773 )
1774
1775 ;; Packing doubles.
1776
1777 (define_expand "vec_pack_trunc_<mode>"
1778 [(match_operand:<VNARROWD> 0 "register_operand")
1779 (match_operand:VDN 1 "register_operand")
1780 (match_operand:VDN 2 "register_operand")]
1781 "TARGET_SIMD"
1782 {
1783 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1784 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1785 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1786
1787 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1788 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1789 emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1790 DONE;
1791 })
1792
1793 ;; Packing quads.
1794
1795 (define_expand "vec_pack_trunc_<mode>"
1796 [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1797 (vec_concat:<VNARROWQ2>
1798 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1799 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1800 "TARGET_SIMD"
1801 {
1802 rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1803 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1804 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1805
1806 emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1807
1808 if (BYTES_BIG_ENDIAN)
1809 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1810 operands[hi]));
1811 else
1812 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1813 operands[hi]));
1814 DONE;
1815 }
1816 )
1817
1818 (define_insn "aarch64_shrn<mode>_insn_le"
1819 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1820 (vec_concat:<VNARROWQ2>
1821 (truncate:<VNARROWQ>
1822 (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
1823 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
1824 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1825 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1826 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1827 [(set_attr "type" "neon_shift_imm_narrow_q")]
1828 )
1829
1830 (define_insn "aarch64_shrn<mode>_insn_be"
1831 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1832 (vec_concat:<VNARROWQ2>
1833 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1834 (truncate:<VNARROWQ>
1835 (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
1836 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1837 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1838 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1839 [(set_attr "type" "neon_shift_imm_narrow_q")]
1840 )
1841
1842 (define_insn "*aarch64_<srn_op>shrn<mode>_vect"
1843 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1844 (truncate:<VNARROWQ>
1845 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1846 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
1847 "TARGET_SIMD"
1848 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1849 [(set_attr "type" "neon_shift_imm_narrow_q")]
1850 )
1851
1852 (define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
1853 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1854 (vec_concat:<VNARROWQ2>
1855 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1856 (truncate:<VNARROWQ>
1857 (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
1858 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1859 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1860 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1861 [(set_attr "type" "neon_shift_imm_narrow_q")]
1862 )
1863
1864 (define_insn "*aarch64_<srn_op>shrn<mode>2_vect_be"
1865 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1866 (vec_concat:<VNARROWQ2>
1867 (truncate:<VNARROWQ>
1868 (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
1869 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
1870 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1871 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1872 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1873 [(set_attr "type" "neon_shift_imm_narrow_q")]
1874 )
1875
1876 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1877 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1878 (vec_concat:<VNARROWQ2>
1879 (truncate:<VNARROWQ>
1880 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1881 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1882 (truncate:<VNARROWQ>
1883 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1884 (match_dup 2)))))]
1885 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1886 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1887 [(set_attr "type" "neon_permute<q>")]
1888 )
1889
1890 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1891 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1892 (vec_concat:<VNARROWQ2>
1893 (truncate:<VNARROWQ>
1894 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1895 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1896 (truncate:<VNARROWQ>
1897 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1898 (match_dup 2)))))]
1899 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1900 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1901 [(set_attr "type" "neon_permute<q>")]
1902 )
1903
1904 (define_expand "aarch64_shrn<mode>"
1905 [(set (match_operand:<VNARROWQ> 0 "register_operand")
1906 (truncate:<VNARROWQ>
1907 (lshiftrt:VQN (match_operand:VQN 1 "register_operand")
1908 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
1909 "TARGET_SIMD"
1910 {
1911 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1912 INTVAL (operands[2]));
1913 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1914 if (BYTES_BIG_ENDIAN)
1915 emit_insn (gen_aarch64_shrn<mode>_insn_be (tmp, operands[1],
1916 operands[2], CONST0_RTX (<VNARROWQ>mode)));
1917 else
1918 emit_insn (gen_aarch64_shrn<mode>_insn_le (tmp, operands[1],
1919 operands[2], CONST0_RTX (<VNARROWQ>mode)));
1920
1921 /* The intrinsic expects a narrow result, so emit a subreg that will get
1922 optimized away as appropriate. */
1923 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1924 <VNARROWQ2>mode));
1925 DONE;
1926 }
1927 )
1928
1929 (define_insn "aarch64_rshrn<mode>_insn_le"
1930 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1931 (vec_concat:<VNARROWQ2>
1932 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
1933 (match_operand:VQN 2
1934 "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
1935 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1936 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1937 "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1938 [(set_attr "type" "neon_shift_imm_narrow_q")]
1939 )
1940
1941 (define_insn "aarch64_rshrn<mode>_insn_be"
1942 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1943 (vec_concat:<VNARROWQ2>
1944 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1945 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
1946 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
1947 UNSPEC_RSHRN)))]
1948 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1949 "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1950 [(set_attr "type" "neon_shift_imm_narrow_q")]
1951 )
1952
1953 (define_expand "aarch64_rshrn<mode>"
1954 [(match_operand:<VNARROWQ> 0 "register_operand")
1955 (match_operand:VQN 1 "register_operand")
1956 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
1957 "TARGET_SIMD"
1958 {
1959 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
1960 {
1961 rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
1962 emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
1963 }
1964 else
1965 {
1966 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1967 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1968 INTVAL (operands[2]));
1969 if (BYTES_BIG_ENDIAN)
1970 emit_insn (
1971 gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
1972 operands[2],
1973 CONST0_RTX (<VNARROWQ>mode)));
1974 else
1975 emit_insn (
1976 gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
1977 operands[2],
1978 CONST0_RTX (<VNARROWQ>mode)));
1979
1980 /* The intrinsic expects a narrow result, so emit a subreg that will
1981 get optimized away as appropriate. */
1982 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1983 <VNARROWQ2>mode));
1984 }
1985 DONE;
1986 }
1987 )
1988
1989 (define_insn "aarch64_shrn2<mode>_insn_le"
1990 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1991 (vec_concat:<VNARROWQ2>
1992 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1993 (truncate:<VNARROWQ>
1994 (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
1995 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1996 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1997 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1998 [(set_attr "type" "neon_shift_imm_narrow_q")]
1999 )
2000
2001 (define_insn "aarch64_shrn2<mode>_insn_be"
2002 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2003 (vec_concat:<VNARROWQ2>
2004 (truncate:<VNARROWQ>
2005 (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
2006 (match_operand:VQN 3
2007 "aarch64_simd_shift_imm_vec_<vn_mode>")))
2008 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
2009 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2010 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
2011 [(set_attr "type" "neon_shift_imm_narrow_q")]
2012 )
2013
2014 (define_expand "aarch64_shrn2<mode>"
2015 [(match_operand:<VNARROWQ2> 0 "register_operand")
2016 (match_operand:<VNARROWQ> 1 "register_operand")
2017 (match_operand:VQN 2 "register_operand")
2018 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
2019 "TARGET_SIMD"
2020 {
2021 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
2022 INTVAL (operands[3]));
2023 if (BYTES_BIG_ENDIAN)
2024 emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1],
2025 operands[2], operands[3]));
2026 else
2027 emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1],
2028 operands[2], operands[3]));
2029 DONE;
2030 }
2031 )
2032
2033 (define_insn "aarch64_rshrn2<mode>_insn_le"
2034 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2035 (vec_concat:<VNARROWQ2>
2036 (match_operand:<VNARROWQ> 1 "register_operand" "0")
2037 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
2038 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
2039 UNSPEC_RSHRN)))]
2040 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2041 "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
2042 [(set_attr "type" "neon_shift_imm_narrow_q")]
2043 )
2044
2045 (define_insn "aarch64_rshrn2<mode>_insn_be"
2046 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2047 (vec_concat:<VNARROWQ2>
2048 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
2049 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
2050 UNSPEC_RSHRN)
2051 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
2052 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2053 "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
2054 [(set_attr "type" "neon_shift_imm_narrow_q")]
2055 )
2056
2057 (define_expand "aarch64_rshrn2<mode>"
2058 [(match_operand:<VNARROWQ2> 0 "register_operand")
2059 (match_operand:<VNARROWQ> 1 "register_operand")
2060 (match_operand:VQN 2 "register_operand")
2061 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
2062 "TARGET_SIMD"
2063 {
2064 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ2>mode))
2065 {
2066 rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
2067 emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
2068 operands[2], tmp));
2069 }
2070 else
2071 {
2072 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
2073 INTVAL (operands[3]));
2074 if (BYTES_BIG_ENDIAN)
2075 emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0],
2076 operands[1],
2077 operands[2],
2078 operands[3]));
2079 else
2080 emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0],
2081 operands[1],
2082 operands[2],
2083 operands[3]));
2084 }
2085 DONE;
2086 }
2087 )
2088
2089 ;; Widening operations.
2090
2091 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
2092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2094 (match_operand:VQW 1 "register_operand" "w")
2095 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
2096 )))]
2097 "TARGET_SIMD"
2098 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
2099 [(set_attr "type" "neon_shift_imm_long")]
2100 )
2101
2102 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
2103 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2104 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2105 (match_operand:VQW 1 "register_operand" "w")
2106 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
2107 )))]
2108 "TARGET_SIMD"
2109 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
2110 [(set_attr "type" "neon_shift_imm_long")]
2111 )
2112
2113 (define_expand "vec_unpack<su>_hi_<mode>"
2114 [(match_operand:<VWIDE> 0 "register_operand")
2115 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2116 "TARGET_SIMD"
2117 {
2118 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2119 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
2120 operands[1], p));
2121 DONE;
2122 }
2123 )
2124
2125 (define_expand "vec_unpack<su>_lo_<mode>"
2126 [(match_operand:<VWIDE> 0 "register_operand")
2127 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2128 "TARGET_SIMD"
2129 {
2130 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2131 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
2132 operands[1], p));
2133 DONE;
2134 }
2135 )
2136
2137 ;; Widening arithmetic.
2138
2139 (define_insn "*aarch64_<su>mlal_lo<mode>"
2140 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2141 (plus:<VWIDE>
2142 (mult:<VWIDE>
2143 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2144 (match_operand:VQW 2 "register_operand" "w")
2145 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2146 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2147 (match_operand:VQW 4 "register_operand" "w")
2148 (match_dup 3))))
2149 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2150 "TARGET_SIMD"
2151 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2152 [(set_attr "type" "neon_mla_<Vetype>_long")]
2153 )
2154
2155 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2156 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2157 (plus:<VWIDE>
2158 (mult:<VWIDE>
2159 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2160 (match_operand:VQW 2 "register_operand" "w")
2161 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2162 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2163 (match_operand:VQW 4 "register_operand" "w")
2164 (match_dup 3))))
2165 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2166 "TARGET_SIMD"
2167 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2168 [(set_attr "type" "neon_mla_<Vetype>_long")]
2169 )
2170
2171 (define_expand "aarch64_<su>mlal_hi<mode>"
2172 [(match_operand:<VWIDE> 0 "register_operand")
2173 (match_operand:<VWIDE> 1 "register_operand")
2174 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2175 (match_operand:VQW 3 "register_operand")]
2176 "TARGET_SIMD"
2177 {
2178 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2179 emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2180 operands[2], p, operands[3]));
2181 DONE;
2182 }
2183 )
2184
2185 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2186 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2187 (plus:<VWIDE>
2188 (mult:<VWIDE>
2189 (ANY_EXTEND:<VWIDE>
2190 (vec_select:<VHALF>
2191 (match_operand:VQ_HSI 2 "register_operand" "w")
2192 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2193 (vec_duplicate:<VWIDE>
2194 (ANY_EXTEND:<VWIDE_S>
2195 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2196 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2197 "TARGET_SIMD"
2198 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2199 [(set_attr "type" "neon_mla_<Vetype>_long")]
2200 )
2201
2202 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2203 [(match_operand:<VWIDE> 0 "register_operand")
2204 (match_operand:<VWIDE> 1 "register_operand")
2205 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2206 (match_operand:<VEL> 3 "register_operand")]
2207 "TARGET_SIMD"
2208 {
2209 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2210 emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2211 operands[1], operands[2], p, operands[3]));
2212 DONE;
2213 }
2214 )
2215
2216 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2218 (minus:<VWIDE>
2219 (match_operand:<VWIDE> 1 "register_operand" "0")
2220 (mult:<VWIDE>
2221 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2222 (match_operand:VQW 2 "register_operand" "w")
2223 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2224 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2225 (match_operand:VQW 4 "register_operand" "w")
2226 (match_dup 3))))))]
2227 "TARGET_SIMD"
2228 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2229 [(set_attr "type" "neon_mla_<Vetype>_long")]
2230 )
2231
2232 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2234 (minus:<VWIDE>
2235 (match_operand:<VWIDE> 1 "register_operand" "0")
2236 (mult:<VWIDE>
2237 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2238 (match_operand:VQW 2 "register_operand" "w")
2239 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2240 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2241 (match_operand:VQW 4 "register_operand" "w")
2242 (match_dup 3))))))]
2243 "TARGET_SIMD"
2244 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2245 [(set_attr "type" "neon_mla_<Vetype>_long")]
2246 )
2247
2248 (define_expand "aarch64_<su>mlsl_hi<mode>"
2249 [(match_operand:<VWIDE> 0 "register_operand")
2250 (match_operand:<VWIDE> 1 "register_operand")
2251 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2252 (match_operand:VQW 3 "register_operand")]
2253 "TARGET_SIMD"
2254 {
2255 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2256 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2257 operands[2], p, operands[3]));
2258 DONE;
2259 }
2260 )
2261
2262 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2263 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2264 (minus:<VWIDE>
2265 (match_operand:<VWIDE> 1 "register_operand" "0")
2266 (mult:<VWIDE>
2267 (ANY_EXTEND:<VWIDE>
2268 (vec_select:<VHALF>
2269 (match_operand:VQ_HSI 2 "register_operand" "w")
2270 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2271 (vec_duplicate:<VWIDE>
2272 (ANY_EXTEND:<VWIDE_S>
2273 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2274 "TARGET_SIMD"
2275 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2276 [(set_attr "type" "neon_mla_<Vetype>_long")]
2277 )
2278
2279 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2280 [(match_operand:<VWIDE> 0 "register_operand")
2281 (match_operand:<VWIDE> 1 "register_operand")
2282 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2283 (match_operand:<VEL> 3 "register_operand")]
2284 "TARGET_SIMD"
2285 {
2286 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2287 emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2288 operands[1], operands[2], p, operands[3]));
2289 DONE;
2290 }
2291 )
2292
2293 (define_insn "aarch64_<su>mlal<mode>"
2294 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2295 (plus:<VWIDE>
2296 (mult:<VWIDE>
2297 (ANY_EXTEND:<VWIDE>
2298 (match_operand:VD_BHSI 2 "register_operand" "w"))
2299 (ANY_EXTEND:<VWIDE>
2300 (match_operand:VD_BHSI 3 "register_operand" "w")))
2301 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2302 "TARGET_SIMD"
2303 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2304 [(set_attr "type" "neon_mla_<Vetype>_long")]
2305 )
2306
2307 (define_insn "aarch64_<su>mlal_n<mode>"
2308 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2309 (plus:<VWIDE>
2310 (mult:<VWIDE>
2311 (ANY_EXTEND:<VWIDE>
2312 (match_operand:VD_HSI 2 "register_operand" "w"))
2313 (vec_duplicate:<VWIDE>
2314 (ANY_EXTEND:<VWIDE_S>
2315 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2316 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2317 "TARGET_SIMD"
2318 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2319 [(set_attr "type" "neon_mla_<Vetype>_long")]
2320 )
2321
2322 (define_insn "aarch64_<su>mlsl<mode>"
2323 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2324 (minus:<VWIDE>
2325 (match_operand:<VWIDE> 1 "register_operand" "0")
2326 (mult:<VWIDE>
2327 (ANY_EXTEND:<VWIDE>
2328 (match_operand:VD_BHSI 2 "register_operand" "w"))
2329 (ANY_EXTEND:<VWIDE>
2330 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2331 "TARGET_SIMD"
2332 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2333 [(set_attr "type" "neon_mla_<Vetype>_long")]
2334 )
2335
2336 (define_insn "aarch64_<su>mlsl_n<mode>"
2337 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2338 (minus:<VWIDE>
2339 (match_operand:<VWIDE> 1 "register_operand" "0")
2340 (mult:<VWIDE>
2341 (ANY_EXTEND:<VWIDE>
2342 (match_operand:VD_HSI 2 "register_operand" "w"))
2343 (vec_duplicate:<VWIDE>
2344 (ANY_EXTEND:<VWIDE_S>
2345 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2346 "TARGET_SIMD"
2347 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2348 [(set_attr "type" "neon_mla_<Vetype>_long")]
2349 )
2350
2351 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2352 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2353 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2354 (match_operand:VQW 1 "register_operand" "w")
2355 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2356 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2357 (match_operand:VQW 2 "register_operand" "w")
2358 (match_dup 3)))))]
2359 "TARGET_SIMD"
2360 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2361 [(set_attr "type" "neon_mul_<Vetype>_long")]
2362 )
2363
2364 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2365 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2366 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2367 (match_operand:VD_BHSI 1 "register_operand" "w"))
2368 (ANY_EXTEND:<VWIDE>
2369 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2370 "TARGET_SIMD"
2371 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2372 [(set_attr "type" "neon_mul_<Vetype>_long")]
2373 )
2374
2375 (define_expand "vec_widen_<su>mult_lo_<mode>"
2376 [(match_operand:<VWIDE> 0 "register_operand")
2377 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2378 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2379 "TARGET_SIMD"
2380 {
2381 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2382 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2383 operands[1],
2384 operands[2], p));
2385 DONE;
2386 }
2387 )
2388
2389 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2390 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2391 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2392 (match_operand:VQW 1 "register_operand" "w")
2393 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2394 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2395 (match_operand:VQW 2 "register_operand" "w")
2396 (match_dup 3)))))]
2397 "TARGET_SIMD"
2398 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2399 [(set_attr "type" "neon_mul_<Vetype>_long")]
2400 )
2401
2402 (define_expand "vec_widen_<su>mult_hi_<mode>"
2403 [(match_operand:<VWIDE> 0 "register_operand")
2404 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2405 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2406 "TARGET_SIMD"
2407 {
2408 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2409 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2410 operands[1],
2411 operands[2], p));
2412 DONE;
2413
2414 }
2415 )
2416
2417 ;; vmull_lane_s16 intrinsics
2418 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2419 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2420 (mult:<VWIDE>
2421 (ANY_EXTEND:<VWIDE>
2422 (match_operand:<VCOND> 1 "register_operand" "w"))
2423 (vec_duplicate:<VWIDE>
2424 (ANY_EXTEND:<VWIDE_S>
2425 (vec_select:<VEL>
2426 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2427 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2428 "TARGET_SIMD"
2429 {
2430 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2431 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2432 }
2433 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2434 )
2435
2436 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2437 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2438 (mult:<VWIDE>
2439 (ANY_EXTEND:<VWIDE>
2440 (vec_select:<VHALF>
2441 (match_operand:VQ_HSI 1 "register_operand" "w")
2442 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2443 (vec_duplicate:<VWIDE>
2444 (ANY_EXTEND:<VWIDE_S>
2445 (vec_select:<VEL>
2446 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2447 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2448 "TARGET_SIMD"
2449 {
2450 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2451 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2452 }
2453 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2454 )
2455
2456 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2457 [(match_operand:<VWIDE> 0 "register_operand")
2458 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2459 (match_operand:<VCOND> 2 "register_operand")
2460 (match_operand:SI 3 "immediate_operand")]
2461 "TARGET_SIMD"
2462 {
2463 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2464 emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2465 operands[1], p, operands[2], operands[3]));
2466 DONE;
2467 }
2468 )
2469
2470 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2471 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2472 (mult:<VWIDE>
2473 (ANY_EXTEND:<VWIDE>
2474 (vec_select:<VHALF>
2475 (match_operand:VQ_HSI 1 "register_operand" "w")
2476 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2477 (vec_duplicate:<VWIDE>
2478 (ANY_EXTEND:<VWIDE_S>
2479 (vec_select:<VEL>
2480 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2481 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2482 "TARGET_SIMD"
2483 {
2484 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2485 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2486 }
2487 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2488 )
2489
2490 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2491 [(match_operand:<VWIDE> 0 "register_operand")
2492 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2493 (match_operand:<VCONQ> 2 "register_operand")
2494 (match_operand:SI 3 "immediate_operand")]
2495 "TARGET_SIMD"
2496 {
2497 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2498 emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2499 operands[1], p, operands[2], operands[3]));
2500 DONE;
2501 }
2502 )
2503
2504 (define_insn "aarch64_<su>mull_n<mode>"
2505 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2506 (mult:<VWIDE>
2507 (ANY_EXTEND:<VWIDE>
2508 (match_operand:VD_HSI 1 "register_operand" "w"))
2509 (vec_duplicate:<VWIDE>
2510 (ANY_EXTEND:<VWIDE_S>
2511 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2512 "TARGET_SIMD"
2513 "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2514 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2515 )
2516
2517 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2518 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2519 (mult:<VWIDE>
2520 (ANY_EXTEND:<VWIDE>
2521 (vec_select:<VHALF>
2522 (match_operand:VQ_HSI 1 "register_operand" "w")
2523 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2524 (vec_duplicate:<VWIDE>
2525 (ANY_EXTEND:<VWIDE_S>
2526 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2527 "TARGET_SIMD"
2528 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2529 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2530 )
2531
2532 (define_expand "aarch64_<su>mull_hi_n<mode>"
2533 [(match_operand:<VWIDE> 0 "register_operand")
2534 (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2535 (match_operand:<VEL> 2 "register_operand")]
2536 "TARGET_SIMD"
2537 {
2538 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2539 emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2540 operands[2], p));
2541 DONE;
2542 }
2543 )
2544
2545 ;; vmlal_lane_s16 intrinsics
2546 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2547 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2548 (plus:<VWIDE>
2549 (mult:<VWIDE>
2550 (ANY_EXTEND:<VWIDE>
2551 (match_operand:<VCOND> 2 "register_operand" "w"))
2552 (vec_duplicate:<VWIDE>
2553 (ANY_EXTEND:<VWIDE_S>
2554 (vec_select:<VEL>
2555 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2556 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2557 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2558 "TARGET_SIMD"
2559 {
2560 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2561 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2562 }
2563 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2564 )
2565
2566 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2567 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2568 (plus:<VWIDE>
2569 (mult:<VWIDE>
2570 (ANY_EXTEND:<VWIDE>
2571 (vec_select:<VHALF>
2572 (match_operand:VQ_HSI 2 "register_operand" "w")
2573 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2574 (vec_duplicate:<VWIDE>
2575 (ANY_EXTEND:<VWIDE_S>
2576 (vec_select:<VEL>
2577 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2578 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2579 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2580 "TARGET_SIMD"
2581 {
2582 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2583 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2584 }
2585 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2586 )
2587
2588 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2589 [(match_operand:<VWIDE> 0 "register_operand")
2590 (match_operand:<VWIDE> 1 "register_operand")
2591 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2592 (match_operand:<VCOND> 3 "register_operand")
2593 (match_operand:SI 4 "immediate_operand")]
2594 "TARGET_SIMD"
2595 {
2596 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2597 emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2598 operands[1], operands[2], p, operands[3], operands[4]));
2599 DONE;
2600 }
2601 )
2602
2603 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2604 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2605 (plus:<VWIDE>
2606 (mult:<VWIDE>
2607 (ANY_EXTEND:<VWIDE>
2608 (vec_select:<VHALF>
2609 (match_operand:VQ_HSI 2 "register_operand" "w")
2610 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2611 (vec_duplicate:<VWIDE>
2612 (ANY_EXTEND:<VWIDE_S>
2613 (vec_select:<VEL>
2614 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2615 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2616 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2617 "TARGET_SIMD"
2618 {
2619 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2620 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2621 }
2622 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2623 )
2624
2625 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2626 [(match_operand:<VWIDE> 0 "register_operand")
2627 (match_operand:<VWIDE> 1 "register_operand")
2628 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2629 (match_operand:<VCONQ> 3 "register_operand")
2630 (match_operand:SI 4 "immediate_operand")]
2631 "TARGET_SIMD"
2632 {
2633 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2634 emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2635 operands[1], operands[2], p, operands[3], operands[4]));
2636 DONE;
2637 }
2638 )
2639
2640 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2641 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2642 (minus:<VWIDE>
2643 (match_operand:<VWIDE> 1 "register_operand" "0")
2644 (mult:<VWIDE>
2645 (ANY_EXTEND:<VWIDE>
2646 (match_operand:<VCOND> 2 "register_operand" "w"))
2647 (vec_duplicate:<VWIDE>
2648 (ANY_EXTEND:<VWIDE_S>
2649 (vec_select:<VEL>
2650 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2651 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2652 "TARGET_SIMD"
2653 {
2654 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2655 return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2656 }
2657 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2658 )
2659
2660 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2661 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2662 (minus:<VWIDE>
2663 (match_operand:<VWIDE> 1 "register_operand" "0")
2664 (mult:<VWIDE>
2665 (ANY_EXTEND:<VWIDE>
2666 (vec_select:<VHALF>
2667 (match_operand:VQ_HSI 2 "register_operand" "w")
2668 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2669 (vec_duplicate:<VWIDE>
2670 (ANY_EXTEND:<VWIDE_S>
2671 (vec_select:<VEL>
2672 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2673 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2674 )))]
2675 "TARGET_SIMD"
2676 {
2677 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2678 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2679 }
2680 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2681 )
2682
2683 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2684 [(match_operand:<VWIDE> 0 "register_operand")
2685 (match_operand:<VWIDE> 1 "register_operand")
2686 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2687 (match_operand:<VCOND> 3 "register_operand")
2688 (match_operand:SI 4 "immediate_operand")]
2689 "TARGET_SIMD"
2690 {
2691 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2692 emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2693 operands[1], operands[2], p, operands[3], operands[4]));
2694 DONE;
2695 }
2696 )
2697
2698 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2699 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2700 (minus:<VWIDE>
2701 (match_operand:<VWIDE> 1 "register_operand" "0")
2702 (mult:<VWIDE>
2703 (ANY_EXTEND:<VWIDE>
2704 (vec_select:<VHALF>
2705 (match_operand:VQ_HSI 2 "register_operand" "w")
2706 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2707 (vec_duplicate:<VWIDE>
2708 (ANY_EXTEND:<VWIDE_S>
2709 (vec_select:<VEL>
2710 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2711 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2712 )))]
2713 "TARGET_SIMD"
2714 {
2715 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2716 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2717 }
2718 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2719 )
2720
2721 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2722 [(match_operand:<VWIDE> 0 "register_operand")
2723 (match_operand:<VWIDE> 1 "register_operand")
2724 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2725 (match_operand:<VCONQ> 3 "register_operand")
2726 (match_operand:SI 4 "immediate_operand")]
2727 "TARGET_SIMD"
2728 {
2729 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2730 emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2731 operands[1], operands[2], p, operands[3], operands[4]));
2732 DONE;
2733 }
2734 )
2735
2736 ;; FP vector operations.
2737 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
2738 ;; double-precision (64-bit) floating-point data types and arithmetic as
2739 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
2740 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2741 ;;
2742 ;; Floating-point operations can raise an exception. Vectorizing such
2743 ;; operations are safe because of reasons explained below.
2744 ;;
2745 ;; ARMv8 permits an extension to enable trapped floating-point
2746 ;; exception handling, however this is an optional feature. In the
2747 ;; event of a floating-point exception being raised by vectorised
2748 ;; code then:
2749 ;; 1. If trapped floating-point exceptions are available, then a trap
2750 ;; will be taken when any lane raises an enabled exception. A trap
2751 ;; handler may determine which lane raised the exception.
2752 ;; 2. Alternatively a sticky exception flag is set in the
2753 ;; floating-point status register (FPSR). Software may explicitly
2754 ;; test the exception flags, in which case the tests will either
2755 ;; prevent vectorisation, allowing precise identification of the
2756 ;; failing operation, or if tested outside of vectorisable regions
2757 ;; then the specific operation and lane are not of interest.
2758
2759 ;; FP arithmetic operations.
2760
2761 (define_insn "add<mode>3"
2762 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2763 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2764 (match_operand:VHSDF 2 "register_operand" "w")))]
2765 "TARGET_SIMD"
2766 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2767 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2768 )
2769
2770 (define_insn "sub<mode>3"
2771 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2772 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2773 (match_operand:VHSDF 2 "register_operand" "w")))]
2774 "TARGET_SIMD"
2775 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2776 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2777 )
2778
2779 (define_insn "mul<mode>3"
2780 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2781 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2782 (match_operand:VHSDF 2 "register_operand" "w")))]
2783 "TARGET_SIMD"
2784 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2785 [(set_attr "type" "neon_fp_mul_<stype><q>")]
2786 )
2787
2788 (define_expand "div<mode>3"
2789 [(set (match_operand:VHSDF 0 "register_operand")
2790 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2791 (match_operand:VHSDF 2 "register_operand")))]
2792 "TARGET_SIMD"
2793 {
2794 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2795 DONE;
2796
2797 operands[1] = force_reg (<MODE>mode, operands[1]);
2798 })
2799
2800 (define_insn "*div<mode>3"
2801 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2802 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2803 (match_operand:VHSDF 2 "register_operand" "w")))]
2804 "TARGET_SIMD"
2805 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2806 [(set_attr "type" "neon_fp_div_<stype><q>")]
2807 )
2808
2809 (define_insn "neg<mode>2"
2810 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2811 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2812 "TARGET_SIMD"
2813 "fneg\\t%0.<Vtype>, %1.<Vtype>"
2814 [(set_attr "type" "neon_fp_neg_<stype><q>")]
2815 )
2816
2817 (define_insn "abs<mode>2"
2818 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2819 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2820 "TARGET_SIMD"
2821 "fabs\\t%0.<Vtype>, %1.<Vtype>"
2822 [(set_attr "type" "neon_fp_abs_<stype><q>")]
2823 )
2824
2825 (define_expand "aarch64_float_mla<mode>"
2826 [(set (match_operand:VDQF_DF 0 "register_operand")
2827 (plus:VDQF_DF
2828 (mult:VDQF_DF
2829 (match_operand:VDQF_DF 2 "register_operand")
2830 (match_operand:VDQF_DF 3 "register_operand"))
2831 (match_operand:VDQF_DF 1 "register_operand")))]
2832 "TARGET_SIMD"
2833 {
2834 rtx scratch = gen_reg_rtx (<MODE>mode);
2835 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2836 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2837 DONE;
2838 }
2839 )
2840
2841 (define_expand "aarch64_float_mls<mode>"
2842 [(set (match_operand:VDQF_DF 0 "register_operand")
2843 (minus:VDQF_DF
2844 (match_operand:VDQF_DF 1 "register_operand")
2845 (mult:VDQF_DF
2846 (match_operand:VDQF_DF 2 "register_operand")
2847 (match_operand:VDQF_DF 3 "register_operand"))))]
2848 "TARGET_SIMD"
2849 {
2850 rtx scratch = gen_reg_rtx (<MODE>mode);
2851 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2852 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2853 DONE;
2854 }
2855 )
2856
2857 (define_expand "aarch64_float_mla_n<mode>"
2858 [(set (match_operand:VDQSF 0 "register_operand")
2859 (plus:VDQSF
2860 (mult:VDQSF
2861 (vec_duplicate:VDQSF
2862 (match_operand:<VEL> 3 "register_operand"))
2863 (match_operand:VDQSF 2 "register_operand"))
2864 (match_operand:VDQSF 1 "register_operand")))]
2865 "TARGET_SIMD"
2866 {
2867 rtx scratch = gen_reg_rtx (<MODE>mode);
2868 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2869 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2870 DONE;
2871 }
2872 )
2873
2874 (define_expand "aarch64_float_mls_n<mode>"
2875 [(set (match_operand:VDQSF 0 "register_operand")
2876 (minus:VDQSF
2877 (match_operand:VDQSF 1 "register_operand")
2878 (mult:VDQSF
2879 (vec_duplicate:VDQSF
2880 (match_operand:<VEL> 3 "register_operand"))
2881 (match_operand:VDQSF 2 "register_operand"))))]
2882 "TARGET_SIMD"
2883 {
2884 rtx scratch = gen_reg_rtx (<MODE>mode);
2885 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2886 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2887 DONE;
2888 }
2889 )
2890
2891 (define_expand "aarch64_float_mla_lane<mode>"
2892 [(set (match_operand:VDQSF 0 "register_operand")
2893 (plus:VDQSF
2894 (mult:VDQSF
2895 (vec_duplicate:VDQSF
2896 (vec_select:<VEL>
2897 (match_operand:V2SF 3 "register_operand")
2898 (parallel [(match_operand:SI 4 "immediate_operand")])))
2899 (match_operand:VDQSF 2 "register_operand"))
2900 (match_operand:VDQSF 1 "register_operand")))]
2901 "TARGET_SIMD"
2902 {
2903 rtx scratch = gen_reg_rtx (<MODE>mode);
2904 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2905 operands[3], operands[4]));
2906 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2907 DONE;
2908 }
2909 )
2910
2911 (define_expand "aarch64_float_mls_lane<mode>"
2912 [(set (match_operand:VDQSF 0 "register_operand")
2913 (minus:VDQSF
2914 (match_operand:VDQSF 1 "register_operand")
2915 (mult:VDQSF
2916 (vec_duplicate:VDQSF
2917 (vec_select:<VEL>
2918 (match_operand:V2SF 3 "register_operand")
2919 (parallel [(match_operand:SI 4 "immediate_operand")])))
2920 (match_operand:VDQSF 2 "register_operand"))))]
2921 "TARGET_SIMD"
2922 {
2923 rtx scratch = gen_reg_rtx (<MODE>mode);
2924 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2925 operands[3], operands[4]));
2926 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2927 DONE;
2928 }
2929 )
2930
2931 (define_expand "aarch64_float_mla_laneq<mode>"
2932 [(set (match_operand:VDQSF 0 "register_operand")
2933 (plus:VDQSF
2934 (mult:VDQSF
2935 (vec_duplicate:VDQSF
2936 (vec_select:<VEL>
2937 (match_operand:V4SF 3 "register_operand")
2938 (parallel [(match_operand:SI 4 "immediate_operand")])))
2939 (match_operand:VDQSF 2 "register_operand"))
2940 (match_operand:VDQSF 1 "register_operand")))]
2941 "TARGET_SIMD"
2942 {
2943 rtx scratch = gen_reg_rtx (<MODE>mode);
2944 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2945 operands[3], operands[4]));
2946 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2947 DONE;
2948 }
2949 )
2950
2951 (define_expand "aarch64_float_mls_laneq<mode>"
2952 [(set (match_operand:VDQSF 0 "register_operand")
2953 (minus:VDQSF
2954 (match_operand:VDQSF 1 "register_operand")
2955 (mult:VDQSF
2956 (vec_duplicate:VDQSF
2957 (vec_select:<VEL>
2958 (match_operand:V4SF 3 "register_operand")
2959 (parallel [(match_operand:SI 4 "immediate_operand")])))
2960 (match_operand:VDQSF 2 "register_operand"))))]
2961 "TARGET_SIMD"
2962 {
2963 rtx scratch = gen_reg_rtx (<MODE>mode);
2964 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2965 operands[3], operands[4]));
2966 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2967 DONE;
2968 }
2969 )
2970
2971 (define_insn "fma<mode>4"
2972 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2973 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2974 (match_operand:VHSDF 2 "register_operand" "w")
2975 (match_operand:VHSDF 3 "register_operand" "0")))]
2976 "TARGET_SIMD"
2977 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2978 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2979 )
2980
2981 (define_insn "*aarch64_fma4_elt<mode>"
2982 [(set (match_operand:VDQF 0 "register_operand" "=w")
2983 (fma:VDQF
2984 (vec_duplicate:VDQF
2985 (vec_select:<VEL>
2986 (match_operand:VDQF 1 "register_operand" "<h_con>")
2987 (parallel [(match_operand:SI 2 "immediate_operand")])))
2988 (match_operand:VDQF 3 "register_operand" "w")
2989 (match_operand:VDQF 4 "register_operand" "0")))]
2990 "TARGET_SIMD"
2991 {
2992 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2993 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2994 }
2995 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2996 )
2997
2998 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2999 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3000 (fma:VDQSF
3001 (vec_duplicate:VDQSF
3002 (vec_select:<VEL>
3003 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
3004 (parallel [(match_operand:SI 2 "immediate_operand")])))
3005 (match_operand:VDQSF 3 "register_operand" "w")
3006 (match_operand:VDQSF 4 "register_operand" "0")))]
3007 "TARGET_SIMD"
3008 {
3009 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3010 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3011 }
3012 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3013 )
3014
3015 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
3016 [(set (match_operand:VMUL 0 "register_operand" "=w")
3017 (fma:VMUL
3018 (vec_duplicate:VMUL
3019 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
3020 (match_operand:VMUL 2 "register_operand" "w")
3021 (match_operand:VMUL 3 "register_operand" "0")))]
3022 "TARGET_SIMD"
3023 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
3024 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
3025 )
3026
3027 (define_insn "*aarch64_fma4_elt_to_64v2df"
3028 [(set (match_operand:DF 0 "register_operand" "=w")
3029 (fma:DF
3030 (vec_select:DF
3031 (match_operand:V2DF 1 "register_operand" "w")
3032 (parallel [(match_operand:SI 2 "immediate_operand")]))
3033 (match_operand:DF 3 "register_operand" "w")
3034 (match_operand:DF 4 "register_operand" "0")))]
3035 "TARGET_SIMD"
3036 {
3037 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3038 return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
3039 }
3040 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
3041 )
3042
3043 (define_insn "fnma<mode>4"
3044 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3045 (fma:VHSDF
3046 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
3047 (match_operand:VHSDF 2 "register_operand" "w")
3048 (match_operand:VHSDF 3 "register_operand" "0")))]
3049 "TARGET_SIMD"
3050 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3051 [(set_attr "type" "neon_fp_mla_<stype><q>")]
3052 )
3053
3054 (define_insn "*aarch64_fnma4_elt<mode>"
3055 [(set (match_operand:VDQF 0 "register_operand" "=w")
3056 (fma:VDQF
3057 (neg:VDQF
3058 (match_operand:VDQF 3 "register_operand" "w"))
3059 (vec_duplicate:VDQF
3060 (vec_select:<VEL>
3061 (match_operand:VDQF 1 "register_operand" "<h_con>")
3062 (parallel [(match_operand:SI 2 "immediate_operand")])))
3063 (match_operand:VDQF 4 "register_operand" "0")))]
3064 "TARGET_SIMD"
3065 {
3066 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3067 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3068 }
3069 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3070 )
3071
3072 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
3073 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3074 (fma:VDQSF
3075 (neg:VDQSF
3076 (match_operand:VDQSF 3 "register_operand" "w"))
3077 (vec_duplicate:VDQSF
3078 (vec_select:<VEL>
3079 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
3080 (parallel [(match_operand:SI 2 "immediate_operand")])))
3081 (match_operand:VDQSF 4 "register_operand" "0")))]
3082 "TARGET_SIMD"
3083 {
3084 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3085 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3086 }
3087 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3088 )
3089
3090 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
3091 [(set (match_operand:VMUL 0 "register_operand" "=w")
3092 (fma:VMUL
3093 (neg:VMUL
3094 (match_operand:VMUL 2 "register_operand" "w"))
3095 (vec_duplicate:VMUL
3096 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
3097 (match_operand:VMUL 3 "register_operand" "0")))]
3098 "TARGET_SIMD"
3099 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
3100 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
3101 )
3102
3103 (define_insn "*aarch64_fnma4_elt_to_64v2df"
3104 [(set (match_operand:DF 0 "register_operand" "=w")
3105 (fma:DF
3106 (vec_select:DF
3107 (match_operand:V2DF 1 "register_operand" "w")
3108 (parallel [(match_operand:SI 2 "immediate_operand")]))
3109 (neg:DF
3110 (match_operand:DF 3 "register_operand" "w"))
3111 (match_operand:DF 4 "register_operand" "0")))]
3112 "TARGET_SIMD"
3113 {
3114 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3115 return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
3116 }
3117 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
3118 )
3119
3120 ;; Vector versions of the floating-point frint patterns.
3121 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
3122 (define_insn "<frint_pattern><mode>2"
3123 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3124 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3125 FRINT))]
3126 "TARGET_SIMD"
3127 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
3128 [(set_attr "type" "neon_fp_round_<stype><q>")]
3129 )
3130
3131 ;; Vector versions of the fcvt standard patterns.
3132 ;; Expands to lbtrunc, lround, lceil, lfloor
3133 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
3134 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3135 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3136 [(match_operand:VHSDF 1 "register_operand" "w")]
3137 FCVT)))]
3138 "TARGET_SIMD"
3139 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
3140 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
3141 )
3142
3143 ;; HF Scalar variants of related SIMD instructions.
3144 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
3145 [(set (match_operand:HI 0 "register_operand" "=w")
3146 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3147 FCVT)))]
3148 "TARGET_SIMD_F16INST"
3149 "fcvt<frint_suffix><su>\t%h0, %h1"
3150 [(set_attr "type" "neon_fp_to_int_s")]
3151 )
3152
3153 (define_insn "<optab>_trunchfhi2"
3154 [(set (match_operand:HI 0 "register_operand" "=w")
3155 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3156 "TARGET_SIMD_F16INST"
3157 "fcvtz<su>\t%h0, %h1"
3158 [(set_attr "type" "neon_fp_to_int_s")]
3159 )
3160
3161 (define_insn "<optab>hihf2"
3162 [(set (match_operand:HF 0 "register_operand" "=w")
3163 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3164 "TARGET_SIMD_F16INST"
3165 "<su_optab>cvtf\t%h0, %h1"
3166 [(set_attr "type" "neon_int_to_fp_s")]
3167 )
3168
3169 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3170 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3171 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3172 [(mult:VDQF
3173 (match_operand:VDQF 1 "register_operand" "w")
3174 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3175 UNSPEC_FRINTZ)))]
3176 "TARGET_SIMD
3177 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3178 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3179 {
3180 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3181 char buf[64];
3182 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3183 output_asm_insn (buf, operands);
3184 return "";
3185 }
3186 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3187 )
3188
3189 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3190 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3191 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3192 [(match_operand:VHSDF 1 "register_operand")]
3193 UNSPEC_FRINTZ)))]
3194 "TARGET_SIMD"
3195 {})
3196
3197 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3198 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3199 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3200 [(match_operand:VHSDF 1 "register_operand")]
3201 UNSPEC_FRINTZ)))]
3202 "TARGET_SIMD"
3203 {})
3204
3205 (define_expand "ftrunc<VHSDF:mode>2"
3206 [(set (match_operand:VHSDF 0 "register_operand")
3207 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3208 UNSPEC_FRINTZ))]
3209 "TARGET_SIMD"
3210 {})
3211
3212 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3213 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3214 (FLOATUORS:VHSDF
3215 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3216 "TARGET_SIMD"
3217 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3218 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3219 )
3220
3221 ;; Conversions between vectors of floats and doubles.
3222 ;; Contains a mix of patterns to match standard pattern names
3223 ;; and those for intrinsics.
3224
3225 ;; Float widening operations.
3226
3227 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3228 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3229 (float_extend:<VWIDE> (vec_select:<VHALF>
3230 (match_operand:VQ_HSF 1 "register_operand" "w")
3231 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3232 )))]
3233 "TARGET_SIMD"
3234 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3235 [(set_attr "type" "neon_fp_cvt_widen_s")]
3236 )
3237
3238 ;; Convert between fixed-point and floating-point (vector modes)
3239
3240 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3241 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3242 (unspec:<VHSDF:FCVT_TARGET>
3243 [(match_operand:VHSDF 1 "register_operand" "w")
3244 (match_operand:SI 2 "immediate_operand" "i")]
3245 FCVT_F2FIXED))]
3246 "TARGET_SIMD"
3247 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3248 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3249 )
3250
3251 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3252 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3253 (unspec:<VDQ_HSDI:FCVT_TARGET>
3254 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3255 (match_operand:SI 2 "immediate_operand" "i")]
3256 FCVT_FIXED2F))]
3257 "TARGET_SIMD"
3258 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3259 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3260 )
3261
3262 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3263 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3264 ;; the meaning of HI and LO changes depending on the target endianness.
3265 ;; While elsewhere we map the higher numbered elements of a vector to
3266 ;; the lower architectural lanes of the vector, for these patterns we want
3267 ;; to always treat "hi" as referring to the higher architectural lanes.
3268 ;; Consequently, while the patterns below look inconsistent with our
3269 ;; other big-endian patterns their behavior is as required.
3270
3271 (define_expand "vec_unpacks_lo_<mode>"
3272 [(match_operand:<VWIDE> 0 "register_operand")
3273 (match_operand:VQ_HSF 1 "register_operand")]
3274 "TARGET_SIMD"
3275 {
3276 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3277 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3278 operands[1], p));
3279 DONE;
3280 }
3281 )
3282
3283 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3284 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3285 (float_extend:<VWIDE> (vec_select:<VHALF>
3286 (match_operand:VQ_HSF 1 "register_operand" "w")
3287 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3288 )))]
3289 "TARGET_SIMD"
3290 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3291 [(set_attr "type" "neon_fp_cvt_widen_s")]
3292 )
3293
3294 (define_expand "vec_unpacks_hi_<mode>"
3295 [(match_operand:<VWIDE> 0 "register_operand")
3296 (match_operand:VQ_HSF 1 "register_operand")]
3297 "TARGET_SIMD"
3298 {
3299 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3300 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3301 operands[1], p));
3302 DONE;
3303 }
3304 )
3305 (define_insn "aarch64_float_extend_lo_<Vwide>"
3306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3307 (float_extend:<VWIDE>
3308 (match_operand:VDF 1 "register_operand" "w")))]
3309 "TARGET_SIMD"
3310 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3311 [(set_attr "type" "neon_fp_cvt_widen_s")]
3312 )
3313
3314 ;; Float narrowing operations.
3315
3316 (define_insn "aarch64_float_trunc_rodd_df"
3317 [(set (match_operand:SF 0 "register_operand" "=w")
3318 (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3319 UNSPEC_FCVTXN))]
3320 "TARGET_SIMD"
3321 "fcvtxn\\t%s0, %d1"
3322 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3323 )
3324
3325 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3326 [(set (match_operand:V2SF 0 "register_operand" "=w")
3327 (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3328 UNSPEC_FCVTXN))]
3329 "TARGET_SIMD"
3330 "fcvtxn\\t%0.2s, %1.2d"
3331 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3332 )
3333
3334 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3335 [(set (match_operand:V4SF 0 "register_operand" "=w")
3336 (vec_concat:V4SF
3337 (match_operand:V2SF 1 "register_operand" "0")
3338 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3339 UNSPEC_FCVTXN)))]
3340 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3341 "fcvtxn2\\t%0.4s, %2.2d"
3342 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3343 )
3344
3345 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3346 [(set (match_operand:V4SF 0 "register_operand" "=w")
3347 (vec_concat:V4SF
3348 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3349 UNSPEC_FCVTXN)
3350 (match_operand:V2SF 1 "register_operand" "0")))]
3351 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3352 "fcvtxn2\\t%0.4s, %2.2d"
3353 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3354 )
3355
3356 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3357 [(match_operand:V4SF 0 "register_operand")
3358 (match_operand:V2SF 1 "register_operand")
3359 (match_operand:V2DF 2 "register_operand")]
3360 "TARGET_SIMD"
3361 {
3362 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3363 ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3364 : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3365 emit_insn (gen (operands[0], operands[1], operands[2]));
3366 DONE;
3367 }
3368 )
3369
3370 (define_insn "aarch64_float_truncate_lo_<mode>"
3371 [(set (match_operand:VDF 0 "register_operand" "=w")
3372 (float_truncate:VDF
3373 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3374 "TARGET_SIMD"
3375 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3376 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3377 )
3378
3379 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3380 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3381 (vec_concat:<VDBL>
3382 (match_operand:VDF 1 "register_operand" "0")
3383 (float_truncate:VDF
3384 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3385 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3386 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3387 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3388 )
3389
3390 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3391 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3392 (vec_concat:<VDBL>
3393 (float_truncate:VDF
3394 (match_operand:<VWIDE> 2 "register_operand" "w"))
3395 (match_operand:VDF 1 "register_operand" "0")))]
3396 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3397 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3398 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3399 )
3400
3401 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3402 [(match_operand:<VDBL> 0 "register_operand")
3403 (match_operand:VDF 1 "register_operand")
3404 (match_operand:<VWIDE> 2 "register_operand")]
3405 "TARGET_SIMD"
3406 {
3407 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3408 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3409 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3410 emit_insn (gen (operands[0], operands[1], operands[2]));
3411 DONE;
3412 }
3413 )
3414
3415 (define_expand "vec_pack_trunc_v2df"
3416 [(set (match_operand:V4SF 0 "register_operand")
3417 (vec_concat:V4SF
3418 (float_truncate:V2SF
3419 (match_operand:V2DF 1 "register_operand"))
3420 (float_truncate:V2SF
3421 (match_operand:V2DF 2 "register_operand"))
3422 ))]
3423 "TARGET_SIMD"
3424 {
3425 rtx tmp = gen_reg_rtx (V2SFmode);
3426 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3427 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3428
3429 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3430 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3431 tmp, operands[hi]));
3432 DONE;
3433 }
3434 )
3435
3436 (define_expand "vec_pack_trunc_df"
3437 [(set (match_operand:V2SF 0 "register_operand")
3438 (vec_concat:V2SF
3439 (float_truncate:SF
3440 (match_operand:DF 1 "register_operand"))
3441 (float_truncate:SF
3442 (match_operand:DF 2 "register_operand"))
3443 ))]
3444 "TARGET_SIMD"
3445 {
3446 rtx tmp = gen_reg_rtx (V2SFmode);
3447 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3448 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3449
3450 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
3451 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
3452 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3453 DONE;
3454 }
3455 )
3456
3457 ;; FP Max/Min
3458 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
3459 ;; expression like:
3460 ;; a = (b < c) ? b : c;
3461 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3462 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3463 ;; -ffast-math.
3464 ;;
3465 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3466 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3467 ;; operand will be returned when both operands are zero (i.e. they may not
3468 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
3469 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3470 ;; NaNs.
3471
3472 (define_insn "<su><maxmin><mode>3"
3473 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3474 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3475 (match_operand:VHSDF 2 "register_operand" "w")))]
3476 "TARGET_SIMD"
3477 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3478 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3479 )
3480
3481 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3482 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3483 ;; which implement the IEEE fmax ()/fmin () functions.
3484 (define_insn "<fmaxmin><mode>3"
3485 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3486 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3487 (match_operand:VHSDF 2 "register_operand" "w")]
3488 FMAXMIN_UNS))]
3489 "TARGET_SIMD"
3490 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3491 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3492 )
3493
3494 ;; 'across lanes' add.
3495
3496 (define_expand "reduc_plus_scal_<mode>"
3497 [(match_operand:<VEL> 0 "register_operand")
3498 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
3499 UNSPEC_ADDV)]
3500 "TARGET_SIMD"
3501 {
3502 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3503 rtx scratch = gen_reg_rtx (<MODE>mode);
3504 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
3505 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3506 DONE;
3507 }
3508 )
3509
3510 (define_insn "aarch64_faddp<mode>"
3511 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3512 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3513 (match_operand:VHSDF 2 "register_operand" "w")]
3514 UNSPEC_FADDV))]
3515 "TARGET_SIMD"
3516 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3517 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3518 )
3519
3520 (define_insn "aarch64_reduc_plus_internal<mode>"
3521 [(set (match_operand:VDQV 0 "register_operand" "=w")
3522 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
3523 UNSPEC_ADDV))]
3524 "TARGET_SIMD"
3525 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3526 [(set_attr "type" "neon_reduc_add<q>")]
3527 )
3528
3529 (define_insn "aarch64_<su>addlv<mode>"
3530 [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3531 (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
3532 USADDLV))]
3533 "TARGET_SIMD"
3534 "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3535 [(set_attr "type" "neon_reduc_add<q>")]
3536 )
3537
3538 (define_insn "aarch64_<su>addlp<mode>"
3539 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3540 (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")]
3541 USADDLP))]
3542 "TARGET_SIMD"
3543 "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3544 [(set_attr "type" "neon_reduc_add<q>")]
3545 )
3546
3547 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3548 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3549 [(set (match_operand:GPI 0 "register_operand" "=w")
3550 (zero_extend:GPI
3551 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3552 UNSPEC_ADDV)))]
3553 "TARGET_SIMD"
3554 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3555 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3556 )
3557
3558 (define_insn "aarch64_reduc_plus_internalv2si"
3559 [(set (match_operand:V2SI 0 "register_operand" "=w")
3560 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3561 UNSPEC_ADDV))]
3562 "TARGET_SIMD"
3563 "addp\\t%0.2s, %1.2s, %1.2s"
3564 [(set_attr "type" "neon_reduc_add")]
3565 )
3566
3567 (define_insn "reduc_plus_scal_<mode>"
3568 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3569 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3570 UNSPEC_FADDV))]
3571 "TARGET_SIMD"
3572 "faddp\\t%<Vetype>0, %1.<Vtype>"
3573 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3574 )
3575
3576 (define_expand "reduc_plus_scal_v4sf"
3577 [(set (match_operand:SF 0 "register_operand")
3578 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
3579 UNSPEC_FADDV))]
3580 "TARGET_SIMD"
3581 {
3582 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3583 rtx scratch = gen_reg_rtx (V4SFmode);
3584 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3585 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3586 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3587 DONE;
3588 })
3589
3590 (define_insn "clrsb<mode>2"
3591 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3592 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3593 "TARGET_SIMD"
3594 "cls\\t%0.<Vtype>, %1.<Vtype>"
3595 [(set_attr "type" "neon_cls<q>")]
3596 )
3597
3598 (define_insn "clz<mode>2"
3599 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3600 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3601 "TARGET_SIMD"
3602 "clz\\t%0.<Vtype>, %1.<Vtype>"
3603 [(set_attr "type" "neon_cls<q>")]
3604 )
3605
3606 (define_insn "popcount<mode>2"
3607 [(set (match_operand:VB 0 "register_operand" "=w")
3608 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3609 "TARGET_SIMD"
3610 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3611 [(set_attr "type" "neon_cnt<q>")]
3612 )
3613
3614 ;; 'across lanes' max and min ops.
3615
3616 ;; Template for outputting a scalar, so we can create __builtins which can be
3617 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
3618 (define_expand "reduc_<optab>_scal_<mode>"
3619 [(match_operand:<VEL> 0 "register_operand")
3620 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3621 FMAXMINV)]
3622 "TARGET_SIMD"
3623 {
3624 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3625 rtx scratch = gen_reg_rtx (<MODE>mode);
3626 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3627 operands[1]));
3628 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3629 DONE;
3630 }
3631 )
3632
3633 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3634 [(match_operand:<VEL> 0 "register_operand")
3635 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3636 FMAXMINNMV)]
3637 "TARGET_SIMD"
3638 {
3639 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3640 DONE;
3641 }
3642 )
3643
3644 ;; Likewise for integer cases, signed and unsigned.
3645 (define_expand "reduc_<optab>_scal_<mode>"
3646 [(match_operand:<VEL> 0 "register_operand")
3647 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3648 MAXMINV)]
3649 "TARGET_SIMD"
3650 {
3651 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3652 rtx scratch = gen_reg_rtx (<MODE>mode);
3653 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3654 operands[1]));
3655 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3656 DONE;
3657 }
3658 )
3659
3660 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3661 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3662 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3663 MAXMINV))]
3664 "TARGET_SIMD"
3665 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3666 [(set_attr "type" "neon_reduc_minmax<q>")]
3667 )
3668
3669 (define_insn "aarch64_reduc_<optab>_internalv2si"
3670 [(set (match_operand:V2SI 0 "register_operand" "=w")
3671 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3672 MAXMINV))]
3673 "TARGET_SIMD"
3674 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3675 [(set_attr "type" "neon_reduc_minmax")]
3676 )
3677
3678 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3679 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3680 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3681 FMAXMINV))]
3682 "TARGET_SIMD"
3683 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3684 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3685 )
3686
3687 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3688 ;; allocation.
3689 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3690 ;; to select.
3691 ;;
3692 ;; Thus our BSL is of the form:
3693 ;; op0 = bsl (mask, op2, op3)
3694 ;; We can use any of:
3695 ;;
3696 ;; if (op0 = mask)
3697 ;; bsl mask, op1, op2
3698 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3699 ;; bit op0, op2, mask
3700 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3701 ;; bif op0, op1, mask
3702 ;;
3703 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3704 ;; Some forms of straight-line code may generate the equivalent form
3705 ;; in *aarch64_simd_bsl<mode>_alt.
3706
3707 (define_insn "aarch64_simd_bsl<mode>_internal"
3708 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3709 (xor:VDQ_I
3710 (and:VDQ_I
3711 (xor:VDQ_I
3712 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
3713 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
3714 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3715 (match_dup:<V_INT_EQUIV> 3)
3716 ))]
3717 "TARGET_SIMD"
3718 "@
3719 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3720 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3721 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
3722 [(set_attr "type" "neon_bsl<q>")]
3723 )
3724
3725 ;; We need this form in addition to the above pattern to match the case
3726 ;; when combine tries merging three insns such that the second operand of
3727 ;; the outer XOR matches the second operand of the inner XOR rather than
3728 ;; the first. The two are equivalent but since recog doesn't try all
3729 ;; permutations of commutative operations, we have to have a separate pattern.
3730
3731 (define_insn "*aarch64_simd_bsl<mode>_alt"
3732 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3733 (xor:VDQ_I
3734 (and:VDQ_I
3735 (xor:VDQ_I
3736 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
3737 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
3738 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3739 (match_dup:<V_INT_EQUIV> 2)))]
3740 "TARGET_SIMD"
3741 "@
3742 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3743 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3744 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
3745 [(set_attr "type" "neon_bsl<q>")]
3746 )
3747
3748 ;; DImode is special, we want to avoid computing operations which are
3749 ;; more naturally computed in general purpose registers in the vector
3750 ;; registers. If we do that, we need to move all three operands from general
3751 ;; purpose registers to vector registers, then back again. However, we
3752 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3753 ;; optimizations based on the component operations of a BSL.
3754 ;;
3755 ;; That means we need a splitter back to the individual operations, if they
3756 ;; would be better calculated on the integer side.
3757
3758 (define_insn_and_split "aarch64_simd_bsldi_internal"
3759 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3760 (xor:DI
3761 (and:DI
3762 (xor:DI
3763 (match_operand:DI 3 "register_operand" "w,0,w,r")
3764 (match_operand:DI 2 "register_operand" "w,w,0,r"))
3765 (match_operand:DI 1 "register_operand" "0,w,w,r"))
3766 (match_dup:DI 3)
3767 ))]
3768 "TARGET_SIMD"
3769 "@
3770 bsl\\t%0.8b, %2.8b, %3.8b
3771 bit\\t%0.8b, %2.8b, %1.8b
3772 bif\\t%0.8b, %3.8b, %1.8b
3773 #"
3774 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3775 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3776 {
3777 /* Split back to individual operations. If we're before reload, and
3778 able to create a temporary register, do so. If we're after reload,
3779 we've got an early-clobber destination register, so use that.
3780 Otherwise, we can't create pseudos and we can't yet guarantee that
3781 operands[0] is safe to write, so FAIL to split. */
3782
3783 rtx scratch;
3784 if (reload_completed)
3785 scratch = operands[0];
3786 else if (can_create_pseudo_p ())
3787 scratch = gen_reg_rtx (DImode);
3788 else
3789 FAIL;
3790
3791 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3792 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3793 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3794 DONE;
3795 }
3796 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3797 (set_attr "length" "4,4,4,12")]
3798 )
3799
3800 (define_insn_and_split "aarch64_simd_bsldi_alt"
3801 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3802 (xor:DI
3803 (and:DI
3804 (xor:DI
3805 (match_operand:DI 3 "register_operand" "w,w,0,r")
3806 (match_operand:DI 2 "register_operand" "w,0,w,r"))
3807 (match_operand:DI 1 "register_operand" "0,w,w,r"))
3808 (match_dup:DI 2)
3809 ))]
3810 "TARGET_SIMD"
3811 "@
3812 bsl\\t%0.8b, %3.8b, %2.8b
3813 bit\\t%0.8b, %3.8b, %1.8b
3814 bif\\t%0.8b, %2.8b, %1.8b
3815 #"
3816 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3817 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3818 {
3819 /* Split back to individual operations. If we're before reload, and
3820 able to create a temporary register, do so. If we're after reload,
3821 we've got an early-clobber destination register, so use that.
3822 Otherwise, we can't create pseudos and we can't yet guarantee that
3823 operands[0] is safe to write, so FAIL to split. */
3824
3825 rtx scratch;
3826 if (reload_completed)
3827 scratch = operands[0];
3828 else if (can_create_pseudo_p ())
3829 scratch = gen_reg_rtx (DImode);
3830 else
3831 FAIL;
3832
3833 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3834 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3835 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3836 DONE;
3837 }
3838 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3839 (set_attr "length" "4,4,4,12")]
3840 )
3841
3842 (define_expand "aarch64_simd_bsl<mode>"
3843 [(match_operand:VALLDIF 0 "register_operand")
3844 (match_operand:<V_INT_EQUIV> 1 "register_operand")
3845 (match_operand:VALLDIF 2 "register_operand")
3846 (match_operand:VALLDIF 3 "register_operand")]
3847 "TARGET_SIMD"
3848 {
3849 /* We can't alias operands together if they have different modes. */
3850 rtx tmp = operands[0];
3851 if (FLOAT_MODE_P (<MODE>mode))
3852 {
3853 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3854 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3855 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3856 }
3857 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3858 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3859 operands[1],
3860 operands[2],
3861 operands[3]));
3862 if (tmp != operands[0])
3863 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3864
3865 DONE;
3866 })
3867
3868 (define_expand "vcond_mask_<mode><v_int_equiv>"
3869 [(match_operand:VALLDI 0 "register_operand")
3870 (match_operand:VALLDI 1 "nonmemory_operand")
3871 (match_operand:VALLDI 2 "nonmemory_operand")
3872 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3873 "TARGET_SIMD"
3874 {
3875 /* If we have (a = (P) ? -1 : 0);
3876 Then we can simply move the generated mask (result must be int). */
3877 if (operands[1] == CONSTM1_RTX (<MODE>mode)
3878 && operands[2] == CONST0_RTX (<MODE>mode))
3879 emit_move_insn (operands[0], operands[3]);
3880 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
3881 else if (operands[1] == CONST0_RTX (<MODE>mode)
3882 && operands[2] == CONSTM1_RTX (<MODE>mode))
3883 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3884 else
3885 {
3886 if (!REG_P (operands[1]))
3887 operands[1] = force_reg (<MODE>mode, operands[1]);
3888 if (!REG_P (operands[2]))
3889 operands[2] = force_reg (<MODE>mode, operands[2]);
3890 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3891 operands[1], operands[2]));
3892 }
3893
3894 DONE;
3895 })
3896
3897 ;; Patterns comparing two vectors to produce a mask.
3898
3899 (define_expand "vec_cmp<mode><mode>"
3900 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3901 (match_operator 1 "comparison_operator"
3902 [(match_operand:VSDQ_I_DI 2 "register_operand")
3903 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3904 "TARGET_SIMD"
3905 {
3906 rtx mask = operands[0];
3907 enum rtx_code code = GET_CODE (operands[1]);
3908
3909 switch (code)
3910 {
3911 case NE:
3912 case LE:
3913 case LT:
3914 case GE:
3915 case GT:
3916 case EQ:
3917 if (operands[3] == CONST0_RTX (<MODE>mode))
3918 break;
3919
3920 /* Fall through. */
3921 default:
3922 if (!REG_P (operands[3]))
3923 operands[3] = force_reg (<MODE>mode, operands[3]);
3924
3925 break;
3926 }
3927
3928 switch (code)
3929 {
3930 case LT:
3931 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3932 break;
3933
3934 case GE:
3935 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3936 break;
3937
3938 case LE:
3939 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3940 break;
3941
3942 case GT:
3943 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3944 break;
3945
3946 case LTU:
3947 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3948 break;
3949
3950 case GEU:
3951 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3952 break;
3953
3954 case LEU:
3955 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3956 break;
3957
3958 case GTU:
3959 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3960 break;
3961
3962 case NE:
3963 /* Handle NE as !EQ. */
3964 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3965 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
3966 break;
3967
3968 case EQ:
3969 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3970 break;
3971
3972 default:
3973 gcc_unreachable ();
3974 }
3975
3976 DONE;
3977 })
3978
3979 (define_expand "vec_cmp<mode><v_int_equiv>"
3980 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3981 (match_operator 1 "comparison_operator"
3982 [(match_operand:VDQF 2 "register_operand")
3983 (match_operand:VDQF 3 "nonmemory_operand")]))]
3984 "TARGET_SIMD"
3985 {
3986 int use_zero_form = 0;
3987 enum rtx_code code = GET_CODE (operands[1]);
3988 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3989
3990 rtx (*comparison) (rtx, rtx, rtx) = NULL;
3991
3992 switch (code)
3993 {
3994 case LE:
3995 case LT:
3996 case GE:
3997 case GT:
3998 case EQ:
3999 if (operands[3] == CONST0_RTX (<MODE>mode))
4000 {
4001 use_zero_form = 1;
4002 break;
4003 }
4004 /* Fall through. */
4005 default:
4006 if (!REG_P (operands[3]))
4007 operands[3] = force_reg (<MODE>mode, operands[3]);
4008
4009 break;
4010 }
4011
4012 switch (code)
4013 {
4014 case LT:
4015 if (use_zero_form)
4016 {
4017 comparison = gen_aarch64_cmlt<mode>;
4018 break;
4019 }
4020 /* Fall through. */
4021 case UNLT:
4022 std::swap (operands[2], operands[3]);
4023 /* Fall through. */
4024 case UNGT:
4025 case GT:
4026 comparison = gen_aarch64_cmgt<mode>;
4027 break;
4028 case LE:
4029 if (use_zero_form)
4030 {
4031 comparison = gen_aarch64_cmle<mode>;
4032 break;
4033 }
4034 /* Fall through. */
4035 case UNLE:
4036 std::swap (operands[2], operands[3]);
4037 /* Fall through. */
4038 case UNGE:
4039 case GE:
4040 comparison = gen_aarch64_cmge<mode>;
4041 break;
4042 case NE:
4043 case EQ:
4044 comparison = gen_aarch64_cmeq<mode>;
4045 break;
4046 case UNEQ:
4047 case ORDERED:
4048 case UNORDERED:
4049 case LTGT:
4050 break;
4051 default:
4052 gcc_unreachable ();
4053 }
4054
4055 switch (code)
4056 {
4057 case UNGE:
4058 case UNGT:
4059 case UNLE:
4060 case UNLT:
4061 {
4062 /* All of the above must not raise any FP exceptions. Thus we first
4063 check each operand for NaNs and force any elements containing NaN to
4064 zero before using them in the compare.
4065 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4066 (cm<cc> (isnan (a) ? 0.0 : a,
4067 isnan (b) ? 0.0 : b))
4068 We use the following transformations for doing the comparisions:
4069 a UNGE b -> a GE b
4070 a UNGT b -> a GT b
4071 a UNLE b -> b GE a
4072 a UNLT b -> b GT a. */
4073
4074 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4075 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4076 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4077 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4078 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4079 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4080 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4081 lowpart_subreg (<V_INT_EQUIV>mode,
4082 operands[2],
4083 <MODE>mode)));
4084 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4085 lowpart_subreg (<V_INT_EQUIV>mode,
4086 operands[3],
4087 <MODE>mode)));
4088 gcc_assert (comparison != NULL);
4089 emit_insn (comparison (operands[0],
4090 lowpart_subreg (<MODE>mode,
4091 tmp0, <V_INT_EQUIV>mode),
4092 lowpart_subreg (<MODE>mode,
4093 tmp1, <V_INT_EQUIV>mode)));
4094 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
4095 }
4096 break;
4097
4098 case LT:
4099 case LE:
4100 case GT:
4101 case GE:
4102 case EQ:
4103 case NE:
4104 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
4105 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
4106 a GE b -> a GE b
4107 a GT b -> a GT b
4108 a LE b -> b GE a
4109 a LT b -> b GT a
4110 a EQ b -> a EQ b
4111 a NE b -> ~(a EQ b) */
4112 gcc_assert (comparison != NULL);
4113 emit_insn (comparison (operands[0], operands[2], operands[3]));
4114 if (code == NE)
4115 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4116 break;
4117
4118 case LTGT:
4119 /* LTGT is not guranteed to not generate a FP exception. So let's
4120 go the faster way : ((a > b) || (b > a)). */
4121 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4122 operands[2], operands[3]));
4123 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4124 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4125 break;
4126
4127 case ORDERED:
4128 case UNORDERED:
4129 case UNEQ:
4130 /* cmeq (a, a) & cmeq (b, b). */
4131 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4132 operands[2], operands[2]));
4133 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4134 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4135
4136 if (code == UNORDERED)
4137 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4138 else if (code == UNEQ)
4139 {
4140 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4141 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
4142 }
4143 break;
4144
4145 default:
4146 gcc_unreachable ();
4147 }
4148
4149 DONE;
4150 })
4151
4152 (define_expand "vec_cmpu<mode><mode>"
4153 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4154 (match_operator 1 "comparison_operator"
4155 [(match_operand:VSDQ_I_DI 2 "register_operand")
4156 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4157 "TARGET_SIMD"
4158 {
4159 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4160 operands[2], operands[3]));
4161 DONE;
4162 })
4163
4164 (define_expand "vcond<mode><mode>"
4165 [(set (match_operand:VALLDI 0 "register_operand")
4166 (if_then_else:VALLDI
4167 (match_operator 3 "comparison_operator"
4168 [(match_operand:VALLDI 4 "register_operand")
4169 (match_operand:VALLDI 5 "nonmemory_operand")])
4170 (match_operand:VALLDI 1 "nonmemory_operand")
4171 (match_operand:VALLDI 2 "nonmemory_operand")))]
4172 "TARGET_SIMD"
4173 {
4174 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4175 enum rtx_code code = GET_CODE (operands[3]);
4176
4177 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4178 it as well as switch operands 1/2 in order to avoid the additional
4179 NOT instruction. */
4180 if (code == NE)
4181 {
4182 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4183 operands[4], operands[5]);
4184 std::swap (operands[1], operands[2]);
4185 }
4186 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4187 operands[4], operands[5]));
4188 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4189 operands[2], mask));
4190
4191 DONE;
4192 })
4193
4194 (define_expand "vcond<v_cmp_mixed><mode>"
4195 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
4196 (if_then_else:<V_cmp_mixed>
4197 (match_operator 3 "comparison_operator"
4198 [(match_operand:VDQF_COND 4 "register_operand")
4199 (match_operand:VDQF_COND 5 "nonmemory_operand")])
4200 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
4201 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
4202 "TARGET_SIMD"
4203 {
4204 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4205 enum rtx_code code = GET_CODE (operands[3]);
4206
4207 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4208 it as well as switch operands 1/2 in order to avoid the additional
4209 NOT instruction. */
4210 if (code == NE)
4211 {
4212 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4213 operands[4], operands[5]);
4214 std::swap (operands[1], operands[2]);
4215 }
4216 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4217 operands[4], operands[5]));
4218 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
4219 operands[0], operands[1],
4220 operands[2], mask));
4221
4222 DONE;
4223 })
4224
4225 (define_expand "vcondu<mode><mode>"
4226 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4227 (if_then_else:VSDQ_I_DI
4228 (match_operator 3 "comparison_operator"
4229 [(match_operand:VSDQ_I_DI 4 "register_operand")
4230 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
4231 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
4232 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4233 "TARGET_SIMD"
4234 {
4235 rtx mask = gen_reg_rtx (<MODE>mode);
4236 enum rtx_code code = GET_CODE (operands[3]);
4237
4238 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4239 it as well as switch operands 1/2 in order to avoid the additional
4240 NOT instruction. */
4241 if (code == NE)
4242 {
4243 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4244 operands[4], operands[5]);
4245 std::swap (operands[1], operands[2]);
4246 }
4247 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
4248 operands[4], operands[5]));
4249 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4250 operands[2], mask));
4251 DONE;
4252 })
4253
4254 (define_expand "vcondu<mode><v_cmp_mixed>"
4255 [(set (match_operand:VDQF 0 "register_operand")
4256 (if_then_else:VDQF
4257 (match_operator 3 "comparison_operator"
4258 [(match_operand:<V_cmp_mixed> 4 "register_operand")
4259 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
4260 (match_operand:VDQF 1 "nonmemory_operand")
4261 (match_operand:VDQF 2 "nonmemory_operand")))]
4262 "TARGET_SIMD"
4263 {
4264 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4265 enum rtx_code code = GET_CODE (operands[3]);
4266
4267 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4268 it as well as switch operands 1/2 in order to avoid the additional
4269 NOT instruction. */
4270 if (code == NE)
4271 {
4272 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4273 operands[4], operands[5]);
4274 std::swap (operands[1], operands[2]);
4275 }
4276 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4277 mask, operands[3],
4278 operands[4], operands[5]));
4279 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4280 operands[2], mask));
4281 DONE;
4282 })
4283
4284 ;; Patterns for AArch64 SIMD Intrinsics.
4285
4286 ;; Lane extraction with sign extension to general purpose register.
4287 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4288 [(set (match_operand:GPI 0 "register_operand" "=r")
4289 (sign_extend:GPI
4290 (vec_select:<VDQQH:VEL>
4291 (match_operand:VDQQH 1 "register_operand" "w")
4292 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4293 "TARGET_SIMD"
4294 {
4295 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4296 INTVAL (operands[2]));
4297 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4298 }
4299 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4300 )
4301
4302 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4303 [(set (match_operand:GPI 0 "register_operand" "=r")
4304 (zero_extend:GPI
4305 (vec_select:<VDQQH:VEL>
4306 (match_operand:VDQQH 1 "register_operand" "w")
4307 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4308 "TARGET_SIMD"
4309 {
4310 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4311 INTVAL (operands[2]));
4312 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4313 }
4314 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4315 )
4316
4317 ;; Lane extraction of a value, neither sign nor zero extension
4318 ;; is guaranteed so upper bits should be considered undefined.
4319 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4320 ;; Extracting lane zero is split into a simple move when it is between SIMD
4321 ;; registers or a store.
4322 (define_insn_and_split "aarch64_get_lane<mode>"
4323 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4324 (vec_select:<VEL>
4325 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4326 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4327 "TARGET_SIMD"
4328 {
4329 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4330 switch (which_alternative)
4331 {
4332 case 0:
4333 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4334 case 1:
4335 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4336 case 2:
4337 return "st1\\t{%1.<Vetype>}[%2], %0";
4338 default:
4339 gcc_unreachable ();
4340 }
4341 }
4342 "&& reload_completed
4343 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4344 [(set (match_dup 0) (match_dup 1))]
4345 {
4346 operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4347 }
4348 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4349 )
4350
4351 (define_insn "load_pair_lanes<mode>"
4352 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4353 (vec_concat:<VDBL>
4354 (match_operand:VDC 1 "memory_operand" "Utq")
4355 (match_operand:VDC 2 "memory_operand" "m")))]
4356 "TARGET_SIMD && !STRICT_ALIGNMENT
4357 && rtx_equal_p (XEXP (operands[2], 0),
4358 plus_constant (Pmode,
4359 XEXP (operands[1], 0),
4360 GET_MODE_SIZE (<MODE>mode)))"
4361 "ldr\\t%q0, %1"
4362 [(set_attr "type" "neon_load1_1reg_q")]
4363 )
4364
4365 (define_insn "store_pair_lanes<mode>"
4366 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
4367 (vec_concat:<VDBL>
4368 (match_operand:VDC 1 "register_operand" "w, r")
4369 (match_operand:VDC 2 "register_operand" "w, r")))]
4370 "TARGET_SIMD"
4371 "@
4372 stp\\t%d1, %d2, %y0
4373 stp\\t%x1, %x2, %y0"
4374 [(set_attr "type" "neon_stp, store_16")]
4375 )
4376
4377 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4378 ;; dest vector.
4379
4380 (define_insn "@aarch64_combinez<mode>"
4381 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4382 (vec_concat:<VDBL>
4383 (match_operand:VDC 1 "general_operand" "w,?r,m")
4384 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
4385 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4386 "@
4387 mov\\t%0.8b, %1.8b
4388 fmov\t%d0, %1
4389 ldr\\t%d0, %1"
4390 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
4391 (set_attr "arch" "simd,fp,simd")]
4392 )
4393
4394 (define_insn "@aarch64_combinez_be<mode>"
4395 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4396 (vec_concat:<VDBL>
4397 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
4398 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
4399 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4400 "@
4401 mov\\t%0.8b, %1.8b
4402 fmov\t%d0, %1
4403 ldr\\t%d0, %1"
4404 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
4405 (set_attr "arch" "simd,fp,simd")]
4406 )
4407
4408 (define_expand "aarch64_combine<mode>"
4409 [(match_operand:<VDBL> 0 "register_operand")
4410 (match_operand:VDC 1 "register_operand")
4411 (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
4412 "TARGET_SIMD"
4413 {
4414 if (operands[2] == CONST0_RTX (<MODE>mode))
4415 {
4416 if (BYTES_BIG_ENDIAN)
4417 emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
4418 operands[2]));
4419 else
4420 emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
4421 operands[2]));
4422 }
4423 else
4424 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
4425 DONE;
4426 }
4427 )
4428
4429 (define_expand "@aarch64_simd_combine<mode>"
4430 [(match_operand:<VDBL> 0 "register_operand")
4431 (match_operand:VDC 1 "register_operand")
4432 (match_operand:VDC 2 "register_operand")]
4433 "TARGET_SIMD"
4434 {
4435 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
4436 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
4437 DONE;
4438 }
4439 [(set_attr "type" "multiple")]
4440 )
4441
4442 ;; <su><addsub>l<q>.
4443
4444 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4445 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4446 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4447 (match_operand:VQW 1 "register_operand" "w")
4448 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4449 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4450 (match_operand:VQW 2 "register_operand" "w")
4451 (match_dup 3)))))]
4452 "TARGET_SIMD"
4453 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4454 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4455 )
4456
4457 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4458 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4459 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4460 (match_operand:VQW 1 "register_operand" "w")
4461 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4462 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4463 (match_operand:VQW 2 "register_operand" "w")
4464 (match_dup 3)))))]
4465 "TARGET_SIMD"
4466 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4467 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4468 )
4469
4470 (define_expand "vec_widen_<su>addl_lo_<mode>"
4471 [(match_operand:<VWIDE> 0 "register_operand")
4472 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4473 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4474 "TARGET_SIMD"
4475 {
4476 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4477 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4478 operands[2], p));
4479 DONE;
4480 })
4481
4482 (define_expand "vec_widen_<su>addl_hi_<mode>"
4483 [(match_operand:<VWIDE> 0 "register_operand")
4484 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4485 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4486 "TARGET_SIMD"
4487 {
4488 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4489 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4490 operands[2], p));
4491 DONE;
4492 })
4493
4494 (define_expand "vec_widen_<su>subl_lo_<mode>"
4495 [(match_operand:<VWIDE> 0 "register_operand")
4496 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4497 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4498 "TARGET_SIMD"
4499 {
4500 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4501 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4502 operands[2], p));
4503 DONE;
4504 })
4505
4506 (define_expand "vec_widen_<su>subl_hi_<mode>"
4507 [(match_operand:<VWIDE> 0 "register_operand")
4508 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4509 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4510 "TARGET_SIMD"
4511 {
4512 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4513 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4514 operands[2], p));
4515 DONE;
4516 })
4517
4518 (define_expand "aarch64_saddl2<mode>"
4519 [(match_operand:<VWIDE> 0 "register_operand")
4520 (match_operand:VQW 1 "register_operand")
4521 (match_operand:VQW 2 "register_operand")]
4522 "TARGET_SIMD"
4523 {
4524 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4525 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4526 operands[2], p));
4527 DONE;
4528 })
4529
4530 (define_expand "aarch64_uaddl2<mode>"
4531 [(match_operand:<VWIDE> 0 "register_operand")
4532 (match_operand:VQW 1 "register_operand")
4533 (match_operand:VQW 2 "register_operand")]
4534 "TARGET_SIMD"
4535 {
4536 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4537 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4538 operands[2], p));
4539 DONE;
4540 })
4541
4542 (define_expand "aarch64_ssubl2<mode>"
4543 [(match_operand:<VWIDE> 0 "register_operand")
4544 (match_operand:VQW 1 "register_operand")
4545 (match_operand:VQW 2 "register_operand")]
4546 "TARGET_SIMD"
4547 {
4548 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4549 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4550 operands[2], p));
4551 DONE;
4552 })
4553
4554 (define_expand "aarch64_usubl2<mode>"
4555 [(match_operand:<VWIDE> 0 "register_operand")
4556 (match_operand:VQW 1 "register_operand")
4557 (match_operand:VQW 2 "register_operand")]
4558 "TARGET_SIMD"
4559 {
4560 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4561 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4562 operands[2], p));
4563 DONE;
4564 })
4565
4566 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4567 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4568 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4569 (match_operand:VD_BHSI 1 "register_operand" "w"))
4570 (ANY_EXTEND:<VWIDE>
4571 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4572 "TARGET_SIMD"
4573 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4574 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4575 )
4576
4577 ;; <su><addsub>w<q>.
4578
4579 (define_expand "widen_ssum<mode>3"
4580 [(set (match_operand:<VDBLW> 0 "register_operand")
4581 (plus:<VDBLW> (sign_extend:<VDBLW>
4582 (match_operand:VQW 1 "register_operand"))
4583 (match_operand:<VDBLW> 2 "register_operand")))]
4584 "TARGET_SIMD"
4585 {
4586 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4587 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4588
4589 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4590 operands[1], p));
4591 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4592 DONE;
4593 }
4594 )
4595
4596 (define_expand "widen_ssum<mode>3"
4597 [(set (match_operand:<VWIDE> 0 "register_operand")
4598 (plus:<VWIDE> (sign_extend:<VWIDE>
4599 (match_operand:VD_BHSI 1 "register_operand"))
4600 (match_operand:<VWIDE> 2 "register_operand")))]
4601 "TARGET_SIMD"
4602 {
4603 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4604 DONE;
4605 })
4606
4607 (define_expand "widen_usum<mode>3"
4608 [(set (match_operand:<VDBLW> 0 "register_operand")
4609 (plus:<VDBLW> (zero_extend:<VDBLW>
4610 (match_operand:VQW 1 "register_operand"))
4611 (match_operand:<VDBLW> 2 "register_operand")))]
4612 "TARGET_SIMD"
4613 {
4614 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4615 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4616
4617 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4618 operands[1], p));
4619 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4620 DONE;
4621 }
4622 )
4623
4624 (define_expand "widen_usum<mode>3"
4625 [(set (match_operand:<VWIDE> 0 "register_operand")
4626 (plus:<VWIDE> (zero_extend:<VWIDE>
4627 (match_operand:VD_BHSI 1 "register_operand"))
4628 (match_operand:<VWIDE> 2 "register_operand")))]
4629 "TARGET_SIMD"
4630 {
4631 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4632 DONE;
4633 })
4634
4635 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4636 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4637 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4638 (ANY_EXTEND:<VWIDE>
4639 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4640 "TARGET_SIMD"
4641 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4642 [(set_attr "type" "neon_sub_widen")]
4643 )
4644
4645 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4646 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4647 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4648 (ANY_EXTEND:<VWIDE>
4649 (vec_select:<VHALF>
4650 (match_operand:VQW 2 "register_operand" "w")
4651 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4652 "TARGET_SIMD"
4653 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4654 [(set_attr "type" "neon_sub_widen")]
4655 )
4656
4657 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4658 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4659 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4660 (ANY_EXTEND:<VWIDE>
4661 (vec_select:<VHALF>
4662 (match_operand:VQW 2 "register_operand" "w")
4663 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4664 "TARGET_SIMD"
4665 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4666 [(set_attr "type" "neon_sub_widen")]
4667 )
4668
4669 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4670 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4671 (plus:<VWIDE>
4672 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4673 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4674 "TARGET_SIMD"
4675 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4676 [(set_attr "type" "neon_add_widen")]
4677 )
4678
4679 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4680 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4681 (plus:<VWIDE>
4682 (ANY_EXTEND:<VWIDE>
4683 (vec_select:<VHALF>
4684 (match_operand:VQW 2 "register_operand" "w")
4685 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4686 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4687 "TARGET_SIMD"
4688 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4689 [(set_attr "type" "neon_add_widen")]
4690 )
4691
4692 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4693 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4694 (plus:<VWIDE>
4695 (ANY_EXTEND:<VWIDE>
4696 (vec_select:<VHALF>
4697 (match_operand:VQW 2 "register_operand" "w")
4698 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4699 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4700 "TARGET_SIMD"
4701 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4702 [(set_attr "type" "neon_add_widen")]
4703 )
4704
4705 (define_expand "aarch64_saddw2<mode>"
4706 [(match_operand:<VWIDE> 0 "register_operand")
4707 (match_operand:<VWIDE> 1 "register_operand")
4708 (match_operand:VQW 2 "register_operand")]
4709 "TARGET_SIMD"
4710 {
4711 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4712 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
4713 operands[2], p));
4714 DONE;
4715 })
4716
4717 (define_expand "aarch64_uaddw2<mode>"
4718 [(match_operand:<VWIDE> 0 "register_operand")
4719 (match_operand:<VWIDE> 1 "register_operand")
4720 (match_operand:VQW 2 "register_operand")]
4721 "TARGET_SIMD"
4722 {
4723 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4724 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
4725 operands[2], p));
4726 DONE;
4727 })
4728
4729
4730 (define_expand "aarch64_ssubw2<mode>"
4731 [(match_operand:<VWIDE> 0 "register_operand")
4732 (match_operand:<VWIDE> 1 "register_operand")
4733 (match_operand:VQW 2 "register_operand")]
4734 "TARGET_SIMD"
4735 {
4736 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4737 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
4738 operands[2], p));
4739 DONE;
4740 })
4741
4742 (define_expand "aarch64_usubw2<mode>"
4743 [(match_operand:<VWIDE> 0 "register_operand")
4744 (match_operand:<VWIDE> 1 "register_operand")
4745 (match_operand:VQW 2 "register_operand")]
4746 "TARGET_SIMD"
4747 {
4748 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4749 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
4750 operands[2], p));
4751 DONE;
4752 })
4753
4754 ;; <su><r>h<addsub>.
4755
4756 (define_expand "<u>avg<mode>3_floor"
4757 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4758 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4759 (match_operand:VDQ_BHSI 2 "register_operand")]
4760 HADD))]
4761 "TARGET_SIMD"
4762 )
4763
4764 (define_expand "<u>avg<mode>3_ceil"
4765 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4766 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4767 (match_operand:VDQ_BHSI 2 "register_operand")]
4768 RHADD))]
4769 "TARGET_SIMD"
4770 )
4771
4772 (define_insn "aarch64_<sur>h<addsub><mode>"
4773 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4774 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
4775 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
4776 HADDSUB))]
4777 "TARGET_SIMD"
4778 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4779 [(set_attr "type" "neon_<addsub>_halve<q>")]
4780 )
4781
4782 ;; <r><addsub>hn<q>.
4783
4784 (define_insn "aarch64_<sur><addsub>hn<mode>_insn_le"
4785 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4786 (vec_concat:<VNARROWQ2>
4787 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
4788 (match_operand:VQN 2 "register_operand" "w")]
4789 ADDSUBHN)
4790 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
4791 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4792 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4793 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4794 )
4795
4796 (define_insn "aarch64_<sur><addsub>hn<mode>_insn_be"
4797 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4798 (vec_concat:<VNARROWQ2>
4799 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
4800 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
4801 (match_operand:VQN 2 "register_operand" "w")]
4802 ADDSUBHN)))]
4803 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4804 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4805 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4806 )
4807
4808 (define_expand "aarch64_<sur><addsub>hn<mode>"
4809 [(set (match_operand:<VNARROWQ> 0 "register_operand")
4810 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
4811 (match_operand:VQN 2 "register_operand")]
4812 ADDSUBHN))]
4813 "TARGET_SIMD"
4814 {
4815 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
4816 if (BYTES_BIG_ENDIAN)
4817 emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_be (tmp, operands[1],
4818 operands[2], CONST0_RTX (<VNARROWQ>mode)));
4819 else
4820 emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_le (tmp, operands[1],
4821 operands[2], CONST0_RTX (<VNARROWQ>mode)));
4822
4823 /* The intrinsic expects a narrow result, so emit a subreg that will get
4824 optimized away as appropriate. */
4825 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
4826 <VNARROWQ2>mode));
4827 DONE;
4828 }
4829 )
4830
4831 (define_insn "aarch64_<sur><addsub>hn2<mode>_insn_le"
4832 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4833 (vec_concat:<VNARROWQ2>
4834 (match_operand:<VNARROWQ> 1 "register_operand" "0")
4835 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
4836 (match_operand:VQN 3 "register_operand" "w")]
4837 ADDSUBHN)))]
4838 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4839 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4840 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4841 )
4842
4843 (define_insn "aarch64_<sur><addsub>hn2<mode>_insn_be"
4844 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4845 (vec_concat:<VNARROWQ2>
4846 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
4847 (match_operand:VQN 3 "register_operand" "w")]
4848 ADDSUBHN)
4849 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4850 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4851 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4852 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4853 )
4854
4855 (define_expand "aarch64_<sur><addsub>hn2<mode>"
4856 [(match_operand:<VNARROWQ2> 0 "register_operand")
4857 (match_operand:<VNARROWQ> 1 "register_operand")
4858 (unspec [(match_operand:VQN 2 "register_operand")
4859 (match_operand:VQN 3 "register_operand")]
4860 ADDSUBHN)]
4861 "TARGET_SIMD"
4862 {
4863 if (BYTES_BIG_ENDIAN)
4864 emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_be (operands[0],
4865 operands[1], operands[2], operands[3]));
4866 else
4867 emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_le (operands[0],
4868 operands[1], operands[2], operands[3]));
4869 DONE;
4870 }
4871 )
4872
4873 ;; pmul.
4874
4875 (define_insn "aarch64_pmul<mode>"
4876 [(set (match_operand:VB 0 "register_operand" "=w")
4877 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
4878 (match_operand:VB 2 "register_operand" "w")]
4879 UNSPEC_PMUL))]
4880 "TARGET_SIMD"
4881 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4882 [(set_attr "type" "neon_mul_<Vetype><q>")]
4883 )
4884
4885 (define_insn "aarch64_pmullv8qi"
4886 [(set (match_operand:V8HI 0 "register_operand" "=w")
4887 (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
4888 (match_operand:V8QI 2 "register_operand" "w")]
4889 UNSPEC_PMULL))]
4890 "TARGET_SIMD"
4891 "pmull\\t%0.8h, %1.8b, %2.8b"
4892 [(set_attr "type" "neon_mul_b_long")]
4893 )
4894
4895 (define_insn "aarch64_pmull_hiv16qi_insn"
4896 [(set (match_operand:V8HI 0 "register_operand" "=w")
4897 (unspec:V8HI
4898 [(vec_select:V8QI
4899 (match_operand:V16QI 1 "register_operand" "w")
4900 (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
4901 (vec_select:V8QI
4902 (match_operand:V16QI 2 "register_operand" "w")
4903 (match_dup 3))]
4904 UNSPEC_PMULL))]
4905 "TARGET_SIMD"
4906 "pmull2\\t%0.8h, %1.16b, %2.16b"
4907 [(set_attr "type" "neon_mul_b_long")]
4908 )
4909
4910 (define_expand "aarch64_pmull_hiv16qi"
4911 [(match_operand:V8HI 0 "register_operand")
4912 (match_operand:V16QI 1 "register_operand")
4913 (match_operand:V16QI 2 "register_operand")]
4914 "TARGET_SIMD"
4915 {
4916 rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
4917 emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
4918 operands[2], p));
4919 DONE;
4920 }
4921 )
4922
4923 ;; fmulx.
4924
4925 (define_insn "aarch64_fmulx<mode>"
4926 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
4927 (unspec:VHSDF_HSDF
4928 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
4929 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
4930 UNSPEC_FMULX))]
4931 "TARGET_SIMD"
4932 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4933 [(set_attr "type" "neon_fp_mul_<stype>")]
4934 )
4935
4936 ;; vmulxq_lane_f32, and vmulx_laneq_f32
4937
4938 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
4939 [(set (match_operand:VDQSF 0 "register_operand" "=w")
4940 (unspec:VDQSF
4941 [(match_operand:VDQSF 1 "register_operand" "w")
4942 (vec_duplicate:VDQSF
4943 (vec_select:<VEL>
4944 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
4945 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4946 UNSPEC_FMULX))]
4947 "TARGET_SIMD"
4948 {
4949 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
4950 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4951 }
4952 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
4953 )
4954
4955 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
4956
4957 (define_insn "*aarch64_mulx_elt<mode>"
4958 [(set (match_operand:VDQF 0 "register_operand" "=w")
4959 (unspec:VDQF
4960 [(match_operand:VDQF 1 "register_operand" "w")
4961 (vec_duplicate:VDQF
4962 (vec_select:<VEL>
4963 (match_operand:VDQF 2 "register_operand" "w")
4964 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4965 UNSPEC_FMULX))]
4966 "TARGET_SIMD"
4967 {
4968 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4969 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4970 }
4971 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
4972 )
4973
4974 ;; vmulxq_lane
4975
4976 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
4977 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4978 (unspec:VHSDF
4979 [(match_operand:VHSDF 1 "register_operand" "w")
4980 (vec_duplicate:VHSDF
4981 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
4982 UNSPEC_FMULX))]
4983 "TARGET_SIMD"
4984 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
4985 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
4986 )
4987
4988 ;; vmulxs_lane_f32, vmulxs_laneq_f32
4989 ;; vmulxd_lane_f64 == vmulx_lane_f64
4990 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
4991
4992 (define_insn "*aarch64_vgetfmulx<mode>"
4993 [(set (match_operand:<VEL> 0 "register_operand" "=w")
4994 (unspec:<VEL>
4995 [(match_operand:<VEL> 1 "register_operand" "w")
4996 (vec_select:<VEL>
4997 (match_operand:VDQF 2 "register_operand" "w")
4998 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4999 UNSPEC_FMULX))]
5000 "TARGET_SIMD"
5001 {
5002 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5003 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5004 }
5005 [(set_attr "type" "fmul<Vetype>")]
5006 )
5007 ;; <su>q<addsub>
5008
5009 (define_insn "aarch64_<su_optab>q<addsub><mode>"
5010 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5011 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
5012 (match_operand:VSDQ_I 2 "register_operand" "w")))]
5013 "TARGET_SIMD"
5014 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5015 [(set_attr "type" "neon_q<addsub><q>")]
5016 )
5017
5018 ;; suqadd and usqadd
5019
5020 (define_insn "aarch64_<sur>qadd<mode>"
5021 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5022 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5023 (match_operand:VSDQ_I 2 "register_operand" "w")]
5024 USSUQADD))]
5025 "TARGET_SIMD"
5026 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5027 [(set_attr "type" "neon_qadd<q>")]
5028 )
5029
5030 ;; sqmovn and uqmovn
5031
5032 (define_insn "aarch64_<su>qmovn<mode>"
5033 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5034 (SAT_TRUNC:<VNARROWQ>
5035 (match_operand:SD_HSDI 1 "register_operand" "w")))]
5036 "TARGET_SIMD"
5037 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5038 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5039 )
5040
5041 (define_insn "aarch64_<su>qmovn<mode>_insn_le"
5042 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5043 (vec_concat:<VNARROWQ2>
5044 (SAT_TRUNC:<VNARROWQ>
5045 (match_operand:VQN 1 "register_operand" "w"))
5046 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
5047 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5048 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5049 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5050 )
5051
5052 (define_insn "aarch64_<su>qmovn<mode>_insn_be"
5053 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5054 (vec_concat:<VNARROWQ2>
5055 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
5056 (SAT_TRUNC:<VNARROWQ>
5057 (match_operand:VQN 1 "register_operand" "w"))))]
5058 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5059 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5060 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5061 )
5062
5063 (define_expand "aarch64_<su>qmovn<mode>"
5064 [(set (match_operand:<VNARROWQ> 0 "register_operand")
5065 (SAT_TRUNC:<VNARROWQ>
5066 (match_operand:VQN 1 "register_operand")))]
5067 "TARGET_SIMD"
5068 {
5069 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
5070 if (BYTES_BIG_ENDIAN)
5071 emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
5072 CONST0_RTX (<VNARROWQ>mode)));
5073 else
5074 emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
5075 CONST0_RTX (<VNARROWQ>mode)));
5076
5077 /* The intrinsic expects a narrow result, so emit a subreg that will get
5078 optimized away as appropriate. */
5079 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
5080 <VNARROWQ2>mode));
5081 DONE;
5082 }
5083 )
5084
5085 (define_insn "aarch64_<su>qxtn2<mode>_le"
5086 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5087 (vec_concat:<VNARROWQ2>
5088 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5089 (SAT_TRUNC:<VNARROWQ>
5090 (match_operand:VQN 2 "register_operand" "w"))))]
5091 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5092 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5093 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5094 )
5095
5096 (define_insn "aarch64_<su>qxtn2<mode>_be"
5097 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5098 (vec_concat:<VNARROWQ2>
5099 (SAT_TRUNC:<VNARROWQ>
5100 (match_operand:VQN 2 "register_operand" "w"))
5101 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5102 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5103 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5104 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5105 )
5106
5107 (define_expand "aarch64_<su>qxtn2<mode>"
5108 [(match_operand:<VNARROWQ2> 0 "register_operand")
5109 (match_operand:<VNARROWQ> 1 "register_operand")
5110 (SAT_TRUNC:<VNARROWQ>
5111 (match_operand:VQN 2 "register_operand"))]
5112 "TARGET_SIMD"
5113 {
5114 if (BYTES_BIG_ENDIAN)
5115 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5116 operands[2]));
5117 else
5118 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5119 operands[2]));
5120 DONE;
5121 }
5122 )
5123
5124 ;; sqmovun
5125
5126 (define_insn "aarch64_sqmovun<mode>"
5127 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5128 (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")]
5129 UNSPEC_SQXTUN))]
5130 "TARGET_SIMD"
5131 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5132 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5133 )
5134
5135 (define_insn "aarch64_sqmovun<mode>_insn_le"
5136 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5137 (vec_concat:<VNARROWQ2>
5138 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
5139 UNSPEC_SQXTUN)
5140 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
5141 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5142 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5143 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5144 )
5145
5146 (define_insn "aarch64_sqmovun<mode>_insn_be"
5147 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5148 (vec_concat:<VNARROWQ2>
5149 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
5150 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
5151 UNSPEC_SQXTUN)))]
5152 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5153 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5154 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5155 )
5156
5157 (define_expand "aarch64_sqmovun<mode>"
5158 [(set (match_operand:<VNARROWQ> 0 "register_operand")
5159 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")]
5160 UNSPEC_SQXTUN))]
5161 "TARGET_SIMD"
5162 {
5163 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
5164 if (BYTES_BIG_ENDIAN)
5165 emit_insn (gen_aarch64_sqmovun<mode>_insn_be (tmp, operands[1],
5166 CONST0_RTX (<VNARROWQ>mode)));
5167 else
5168 emit_insn (gen_aarch64_sqmovun<mode>_insn_le (tmp, operands[1],
5169 CONST0_RTX (<VNARROWQ>mode)));
5170
5171 /* The intrinsic expects a narrow result, so emit a subreg that will get
5172 optimized away as appropriate. */
5173 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
5174 <VNARROWQ2>mode));
5175 DONE;
5176 }
5177 )
5178
5179 (define_insn "aarch64_sqxtun2<mode>_le"
5180 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5181 (vec_concat:<VNARROWQ2>
5182 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5183 (unspec:<VNARROWQ>
5184 [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)))]
5185 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5186 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5187 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5188 )
5189
5190 (define_insn "aarch64_sqxtun2<mode>_be"
5191 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5192 (vec_concat:<VNARROWQ2>
5193 (unspec:<VNARROWQ>
5194 [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)
5195 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5196 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5197 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5198 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5199 )
5200
5201 (define_expand "aarch64_sqxtun2<mode>"
5202 [(match_operand:<VNARROWQ2> 0 "register_operand")
5203 (match_operand:<VNARROWQ> 1 "register_operand")
5204 (unspec:<VNARROWQ>
5205 [(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN)]
5206 "TARGET_SIMD"
5207 {
5208 if (BYTES_BIG_ENDIAN)
5209 emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5210 operands[2]));
5211 else
5212 emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5213 operands[2]));
5214 DONE;
5215 }
5216 )
5217
5218 ;; <su>q<absneg>
5219
5220 (define_insn "aarch64_s<optab><mode>"
5221 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5222 (UNQOPS:VSDQ_I
5223 (match_operand:VSDQ_I 1 "register_operand" "w")))]
5224 "TARGET_SIMD"
5225 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5226 [(set_attr "type" "neon_<optab><q>")]
5227 )
5228
5229 ;; sq<r>dmulh.
5230
5231 (define_insn "aarch64_sq<r>dmulh<mode>"
5232 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5233 (unspec:VSDQ_HSI
5234 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5235 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5236 VQDMULH))]
5237 "TARGET_SIMD"
5238 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5239 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5240 )
5241
5242 (define_insn "aarch64_sq<r>dmulh_n<mode>"
5243 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5244 (unspec:VDQHS
5245 [(match_operand:VDQHS 1 "register_operand" "w")
5246 (vec_duplicate:VDQHS
5247 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5248 VQDMULH))]
5249 "TARGET_SIMD"
5250 "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5251 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5252 )
5253
5254 ;; sq<r>dmulh_lane
5255
5256 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5257 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5258 (unspec:VDQHS
5259 [(match_operand:VDQHS 1 "register_operand" "w")
5260 (vec_select:<VEL>
5261 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5262 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5263 VQDMULH))]
5264 "TARGET_SIMD"
5265 "*
5266 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5267 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5268 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5269 )
5270
5271 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5272 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5273 (unspec:VDQHS
5274 [(match_operand:VDQHS 1 "register_operand" "w")
5275 (vec_select:<VEL>
5276 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5277 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5278 VQDMULH))]
5279 "TARGET_SIMD"
5280 "*
5281 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5282 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5283 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5284 )
5285
5286 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5287 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5288 (unspec:SD_HSI
5289 [(match_operand:SD_HSI 1 "register_operand" "w")
5290 (vec_select:<VEL>
5291 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5292 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5293 VQDMULH))]
5294 "TARGET_SIMD"
5295 "*
5296 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5297 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5298 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5299 )
5300
5301 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5302 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5303 (unspec:SD_HSI
5304 [(match_operand:SD_HSI 1 "register_operand" "w")
5305 (vec_select:<VEL>
5306 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5307 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5308 VQDMULH))]
5309 "TARGET_SIMD"
5310 "*
5311 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5312 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5313 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5314 )
5315
5316 ;; sqrdml[as]h.
5317
5318 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
5319 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5320 (unspec:VSDQ_HSI
5321 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5322 (match_operand:VSDQ_HSI 2 "register_operand" "w")
5323 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5324 SQRDMLH_AS))]
5325 "TARGET_SIMD_RDMA"
5326 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5327 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5328 )
5329
5330 ;; sqrdml[as]h_lane.
5331
5332 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
5333 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5334 (unspec:VDQHS
5335 [(match_operand:VDQHS 1 "register_operand" "0")
5336 (match_operand:VDQHS 2 "register_operand" "w")
5337 (vec_select:<VEL>
5338 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5339 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5340 SQRDMLH_AS))]
5341 "TARGET_SIMD_RDMA"
5342 {
5343 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5344 return
5345 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5346 }
5347 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5348 )
5349
5350 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
5351 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5352 (unspec:SD_HSI
5353 [(match_operand:SD_HSI 1 "register_operand" "0")
5354 (match_operand:SD_HSI 2 "register_operand" "w")
5355 (vec_select:<VEL>
5356 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5357 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5358 SQRDMLH_AS))]
5359 "TARGET_SIMD_RDMA"
5360 {
5361 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5362 return
5363 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5364 }
5365 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5366 )
5367
5368 ;; sqrdml[as]h_laneq.
5369
5370 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
5371 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5372 (unspec:VDQHS
5373 [(match_operand:VDQHS 1 "register_operand" "0")
5374 (match_operand:VDQHS 2 "register_operand" "w")
5375 (vec_select:<VEL>
5376 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5377 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5378 SQRDMLH_AS))]
5379 "TARGET_SIMD_RDMA"
5380 {
5381 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5382 return
5383 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5384 }
5385 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5386 )
5387
5388 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
5389 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5390 (unspec:SD_HSI
5391 [(match_operand:SD_HSI 1 "register_operand" "0")
5392 (match_operand:SD_HSI 2 "register_operand" "w")
5393 (vec_select:<VEL>
5394 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5395 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5396 SQRDMLH_AS))]
5397 "TARGET_SIMD_RDMA"
5398 {
5399 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5400 return
5401 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5402 }
5403 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5404 )
5405
5406 ;; vqdml[sa]l
5407
5408 (define_insn "aarch64_sqdmlal<mode>"
5409 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5410 (ss_plus:<VWIDE>
5411 (ss_ashift:<VWIDE>
5412 (mult:<VWIDE>
5413 (sign_extend:<VWIDE>
5414 (match_operand:VSD_HSI 2 "register_operand" "w"))
5415 (sign_extend:<VWIDE>
5416 (match_operand:VSD_HSI 3 "register_operand" "w")))
5417 (const_int 1))
5418 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5419 "TARGET_SIMD"
5420 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5421 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5422 )
5423
5424 (define_insn "aarch64_sqdmlsl<mode>"
5425 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5426 (ss_minus:<VWIDE>
5427 (match_operand:<VWIDE> 1 "register_operand" "0")
5428 (ss_ashift:<VWIDE>
5429 (mult:<VWIDE>
5430 (sign_extend:<VWIDE>
5431 (match_operand:VSD_HSI 2 "register_operand" "w"))
5432 (sign_extend:<VWIDE>
5433 (match_operand:VSD_HSI 3 "register_operand" "w")))
5434 (const_int 1))))]
5435 "TARGET_SIMD"
5436 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5437 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5438 )
5439
5440 ;; vqdml[sa]l_lane
5441
5442 (define_insn "aarch64_sqdmlal_lane<mode>"
5443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5444 (ss_plus:<VWIDE>
5445 (ss_ashift:<VWIDE>
5446 (mult:<VWIDE>
5447 (sign_extend:<VWIDE>
5448 (match_operand:VD_HSI 2 "register_operand" "w"))
5449 (vec_duplicate:<VWIDE>
5450 (sign_extend:<VWIDE_S>
5451 (vec_select:<VEL>
5452 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5453 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5454 ))
5455 (const_int 1))
5456 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5457 "TARGET_SIMD"
5458 {
5459 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5460 return
5461 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5462 }
5463 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5464 )
5465
5466 (define_insn "aarch64_sqdmlsl_lane<mode>"
5467 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5468 (ss_minus:<VWIDE>
5469 (match_operand:<VWIDE> 1 "register_operand" "0")
5470 (ss_ashift:<VWIDE>
5471 (mult:<VWIDE>
5472 (sign_extend:<VWIDE>
5473 (match_operand:VD_HSI 2 "register_operand" "w"))
5474 (vec_duplicate:<VWIDE>
5475 (sign_extend:<VWIDE_S>
5476 (vec_select:<VEL>
5477 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5478 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5479 ))
5480 (const_int 1))))]
5481 "TARGET_SIMD"
5482 {
5483 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5484 return
5485 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5486 }
5487 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5488 )
5489
5490
5491 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5492 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5493 (ss_minus:<VWIDE>
5494 (match_operand:<VWIDE> 1 "register_operand" "0")
5495 (ss_ashift:<VWIDE>
5496 (mult:<VWIDE>
5497 (sign_extend:<VWIDE>
5498 (match_operand:VD_HSI 2 "register_operand" "w"))
5499 (vec_duplicate:<VWIDE>
5500 (sign_extend:<VWIDE_S>
5501 (vec_select:<VEL>
5502 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5503 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5504 ))
5505 (const_int 1))))]
5506 "TARGET_SIMD"
5507 {
5508 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5509 return
5510 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5511 }
5512 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5513 )
5514
5515 (define_insn "aarch64_sqdmlal_laneq<mode>"
5516 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5517 (ss_plus:<VWIDE>
5518 (ss_ashift:<VWIDE>
5519 (mult:<VWIDE>
5520 (sign_extend:<VWIDE>
5521 (match_operand:VD_HSI 2 "register_operand" "w"))
5522 (vec_duplicate:<VWIDE>
5523 (sign_extend:<VWIDE_S>
5524 (vec_select:<VEL>
5525 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5526 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5527 ))
5528 (const_int 1))
5529 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5530 "TARGET_SIMD"
5531 {
5532 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5533 return
5534 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5535 }
5536 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5537 )
5538
5539
5540 (define_insn "aarch64_sqdmlal_lane<mode>"
5541 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5542 (ss_plus:<VWIDE>
5543 (ss_ashift:<VWIDE>
5544 (mult:<VWIDE>
5545 (sign_extend:<VWIDE>
5546 (match_operand:SD_HSI 2 "register_operand" "w"))
5547 (sign_extend:<VWIDE>
5548 (vec_select:<VEL>
5549 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5550 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5551 )
5552 (const_int 1))
5553 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5554 "TARGET_SIMD"
5555 {
5556 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5557 return
5558 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5559 }
5560 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5561 )
5562
5563 (define_insn "aarch64_sqdmlsl_lane<mode>"
5564 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5565 (ss_minus:<VWIDE>
5566 (match_operand:<VWIDE> 1 "register_operand" "0")
5567 (ss_ashift:<VWIDE>
5568 (mult:<VWIDE>
5569 (sign_extend:<VWIDE>
5570 (match_operand:SD_HSI 2 "register_operand" "w"))
5571 (sign_extend:<VWIDE>
5572 (vec_select:<VEL>
5573 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5574 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5575 )
5576 (const_int 1))))]
5577 "TARGET_SIMD"
5578 {
5579 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5580 return
5581 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5582 }
5583 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5584 )
5585
5586
5587 (define_insn "aarch64_sqdmlal_laneq<mode>"
5588 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5589 (ss_plus:<VWIDE>
5590 (ss_ashift:<VWIDE>
5591 (mult:<VWIDE>
5592 (sign_extend:<VWIDE>
5593 (match_operand:SD_HSI 2 "register_operand" "w"))
5594 (sign_extend:<VWIDE>
5595 (vec_select:<VEL>
5596 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5597 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5598 )
5599 (const_int 1))
5600 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5601 "TARGET_SIMD"
5602 {
5603 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5604 return
5605 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5606 }
5607 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5608 )
5609
5610 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5611 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5612 (ss_minus:<VWIDE>
5613 (match_operand:<VWIDE> 1 "register_operand" "0")
5614 (ss_ashift:<VWIDE>
5615 (mult:<VWIDE>
5616 (sign_extend:<VWIDE>
5617 (match_operand:SD_HSI 2 "register_operand" "w"))
5618 (sign_extend:<VWIDE>
5619 (vec_select:<VEL>
5620 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5621 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5622 )
5623 (const_int 1))))]
5624 "TARGET_SIMD"
5625 {
5626 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5627 return
5628 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5629 }
5630 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5631 )
5632
5633 ;; vqdml[sa]l_n
5634
5635 (define_insn "aarch64_sqdmlsl_n<mode>"
5636 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5637 (ss_minus:<VWIDE>
5638 (match_operand:<VWIDE> 1 "register_operand" "0")
5639 (ss_ashift:<VWIDE>
5640 (mult:<VWIDE>
5641 (sign_extend:<VWIDE>
5642 (match_operand:VD_HSI 2 "register_operand" "w"))
5643 (vec_duplicate:<VWIDE>
5644 (sign_extend:<VWIDE_S>
5645 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5646 (const_int 1))))]
5647 "TARGET_SIMD"
5648 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5649 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5650 )
5651
5652 (define_insn "aarch64_sqdmlal_n<mode>"
5653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5654 (ss_plus:<VWIDE>
5655 (ss_ashift:<VWIDE>
5656 (mult:<VWIDE>
5657 (sign_extend:<VWIDE>
5658 (match_operand:VD_HSI 2 "register_operand" "w"))
5659 (vec_duplicate:<VWIDE>
5660 (sign_extend:<VWIDE_S>
5661 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5662 (const_int 1))
5663 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5664 "TARGET_SIMD"
5665 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5666 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5667 )
5668
5669
5670 ;; sqdml[as]l2
5671
5672 (define_insn "aarch64_sqdmlal2<mode>_internal"
5673 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5674 (ss_plus:<VWIDE>
5675 (ss_ashift:<VWIDE>
5676 (mult:<VWIDE>
5677 (sign_extend:<VWIDE>
5678 (vec_select:<VHALF>
5679 (match_operand:VQ_HSI 2 "register_operand" "w")
5680 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5681 (sign_extend:<VWIDE>
5682 (vec_select:<VHALF>
5683 (match_operand:VQ_HSI 3 "register_operand" "w")
5684 (match_dup 4))))
5685 (const_int 1))
5686 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5687 "TARGET_SIMD"
5688 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5689 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5690 )
5691
5692 (define_insn "aarch64_sqdmlsl2<mode>_internal"
5693 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5694 (ss_minus:<VWIDE>
5695 (match_operand:<VWIDE> 1 "register_operand" "0")
5696 (ss_ashift:<VWIDE>
5697 (mult:<VWIDE>
5698 (sign_extend:<VWIDE>
5699 (vec_select:<VHALF>
5700 (match_operand:VQ_HSI 2 "register_operand" "w")
5701 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5702 (sign_extend:<VWIDE>
5703 (vec_select:<VHALF>
5704 (match_operand:VQ_HSI 3 "register_operand" "w")
5705 (match_dup 4))))
5706 (const_int 1))))]
5707 "TARGET_SIMD"
5708 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5709 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5710 )
5711
5712 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
5713 [(match_operand:<VWIDE> 0 "register_operand")
5714 (SBINQOPS:<VWIDE>
5715 (match_operand:<VWIDE> 1 "register_operand")
5716 (match_dup 1))
5717 (match_operand:VQ_HSI 2 "register_operand")
5718 (match_operand:VQ_HSI 3 "register_operand")]
5719 "TARGET_SIMD"
5720 {
5721 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5722 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
5723 operands[1], operands[2],
5724 operands[3], p));
5725 DONE;
5726 })
5727
5728 ;; vqdml[sa]l2_lane
5729
5730 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
5731 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5732 (ss_minus:<VWIDE>
5733 (match_operand:<VWIDE> 1 "register_operand" "0")
5734 (ss_ashift:<VWIDE>
5735 (mult:<VWIDE>
5736 (sign_extend:<VWIDE>
5737 (vec_select:<VHALF>
5738 (match_operand:VQ_HSI 2 "register_operand" "w")
5739 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5740 (vec_duplicate:<VWIDE>
5741 (sign_extend:<VWIDE_S>
5742 (vec_select:<VEL>
5743 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5744 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5745 ))))
5746 (const_int 1))))]
5747 "TARGET_SIMD"
5748 {
5749 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5750 return
5751 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5752 }
5753 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5754 )
5755
5756 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5757 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5758 (ss_plus:<VWIDE>
5759 (ss_ashift:<VWIDE>
5760 (mult:<VWIDE>
5761 (sign_extend:<VWIDE>
5762 (vec_select:<VHALF>
5763 (match_operand:VQ_HSI 2 "register_operand" "w")
5764 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5765 (vec_duplicate:<VWIDE>
5766 (sign_extend:<VWIDE_S>
5767 (vec_select:<VEL>
5768 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5769 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5770 ))))
5771 (const_int 1))
5772 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5773 "TARGET_SIMD"
5774 {
5775 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5776 return
5777 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5778 }
5779 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5780 )
5781
5782 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
5783 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5784 (ss_minus:<VWIDE>
5785 (match_operand:<VWIDE> 1 "register_operand" "0")
5786 (ss_ashift:<VWIDE>
5787 (mult:<VWIDE>
5788 (sign_extend:<VWIDE>
5789 (vec_select:<VHALF>
5790 (match_operand:VQ_HSI 2 "register_operand" "w")
5791 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5792 (vec_duplicate:<VWIDE>
5793 (sign_extend:<VWIDE_S>
5794 (vec_select:<VEL>
5795 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5796 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5797 ))))
5798 (const_int 1))))]
5799 "TARGET_SIMD"
5800 {
5801 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5802 return
5803 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5804 }
5805 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5806 )
5807
5808 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5809 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5810 (ss_plus:<VWIDE>
5811 (ss_ashift:<VWIDE>
5812 (mult:<VWIDE>
5813 (sign_extend:<VWIDE>
5814 (vec_select:<VHALF>
5815 (match_operand:VQ_HSI 2 "register_operand" "w")
5816 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5817 (vec_duplicate:<VWIDE>
5818 (sign_extend:<VWIDE_S>
5819 (vec_select:<VEL>
5820 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5821 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5822 ))))
5823 (const_int 1))
5824 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5825 "TARGET_SIMD"
5826 {
5827 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5828 return
5829 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5830 }
5831 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5832 )
5833
5834 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
5835 [(match_operand:<VWIDE> 0 "register_operand")
5836 (SBINQOPS:<VWIDE>
5837 (match_operand:<VWIDE> 1 "register_operand")
5838 (match_dup 1))
5839 (match_operand:VQ_HSI 2 "register_operand")
5840 (match_operand:<VCOND> 3 "register_operand")
5841 (match_operand:SI 4 "immediate_operand")]
5842 "TARGET_SIMD"
5843 {
5844 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5845 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
5846 operands[1], operands[2],
5847 operands[3], operands[4], p));
5848 DONE;
5849 })
5850
5851 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
5852 [(match_operand:<VWIDE> 0 "register_operand")
5853 (SBINQOPS:<VWIDE>
5854 (match_operand:<VWIDE> 1 "register_operand")
5855 (match_dup 1))
5856 (match_operand:VQ_HSI 2 "register_operand")
5857 (match_operand:<VCONQ> 3 "register_operand")
5858 (match_operand:SI 4 "immediate_operand")]
5859 "TARGET_SIMD"
5860 {
5861 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5862 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
5863 operands[1], operands[2],
5864 operands[3], operands[4], p));
5865 DONE;
5866 })
5867
5868 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
5869 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5870 (ss_minus:<VWIDE>
5871 (match_operand:<VWIDE> 1 "register_operand" "0")
5872 (ss_ashift:<VWIDE>
5873 (mult:<VWIDE>
5874 (sign_extend:<VWIDE>
5875 (vec_select:<VHALF>
5876 (match_operand:VQ_HSI 2 "register_operand" "w")
5877 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5878 (vec_duplicate:<VWIDE>
5879 (sign_extend:<VWIDE_S>
5880 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5881 (const_int 1))))]
5882 "TARGET_SIMD"
5883 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5884 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5885 )
5886
5887 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
5888 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5889 (ss_plus:<VWIDE>
5890 (ss_ashift:<VWIDE>
5891 (mult:<VWIDE>
5892 (sign_extend:<VWIDE>
5893 (vec_select:<VHALF>
5894 (match_operand:VQ_HSI 2 "register_operand" "w")
5895 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5896 (vec_duplicate:<VWIDE>
5897 (sign_extend:<VWIDE_S>
5898 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5899 (const_int 1))
5900 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5901 "TARGET_SIMD"
5902 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5903 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5904 )
5905
5906 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
5907 [(match_operand:<VWIDE> 0 "register_operand")
5908 (SBINQOPS:<VWIDE>
5909 (match_operand:<VWIDE> 1 "register_operand")
5910 (match_dup 1))
5911 (match_operand:VQ_HSI 2 "register_operand")
5912 (match_operand:<VEL> 3 "register_operand")]
5913 "TARGET_SIMD"
5914 {
5915 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5916 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
5917 operands[1], operands[2],
5918 operands[3], p));
5919 DONE;
5920 })
5921
5922 ;; vqdmull
5923
5924 (define_insn "aarch64_sqdmull<mode>"
5925 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5926 (ss_ashift:<VWIDE>
5927 (mult:<VWIDE>
5928 (sign_extend:<VWIDE>
5929 (match_operand:VSD_HSI 1 "register_operand" "w"))
5930 (sign_extend:<VWIDE>
5931 (match_operand:VSD_HSI 2 "register_operand" "w")))
5932 (const_int 1)))]
5933 "TARGET_SIMD"
5934 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5935 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
5936 )
5937
5938 ;; vqdmull_lane
5939
5940 (define_insn "aarch64_sqdmull_lane<mode>"
5941 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5942 (ss_ashift:<VWIDE>
5943 (mult:<VWIDE>
5944 (sign_extend:<VWIDE>
5945 (match_operand:VD_HSI 1 "register_operand" "w"))
5946 (vec_duplicate:<VWIDE>
5947 (sign_extend:<VWIDE_S>
5948 (vec_select:<VEL>
5949 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5950 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5951 ))
5952 (const_int 1)))]
5953 "TARGET_SIMD"
5954 {
5955 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5956 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5957 }
5958 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5959 )
5960
5961 (define_insn "aarch64_sqdmull_laneq<mode>"
5962 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5963 (ss_ashift:<VWIDE>
5964 (mult:<VWIDE>
5965 (sign_extend:<VWIDE>
5966 (match_operand:VD_HSI 1 "register_operand" "w"))
5967 (vec_duplicate:<VWIDE>
5968 (sign_extend:<VWIDE_S>
5969 (vec_select:<VEL>
5970 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5971 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5972 ))
5973 (const_int 1)))]
5974 "TARGET_SIMD"
5975 {
5976 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5977 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5978 }
5979 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5980 )
5981
5982 (define_insn "aarch64_sqdmull_lane<mode>"
5983 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5984 (ss_ashift:<VWIDE>
5985 (mult:<VWIDE>
5986 (sign_extend:<VWIDE>
5987 (match_operand:SD_HSI 1 "register_operand" "w"))
5988 (sign_extend:<VWIDE>
5989 (vec_select:<VEL>
5990 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5991 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
5992 ))
5993 (const_int 1)))]
5994 "TARGET_SIMD"
5995 {
5996 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5997 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5998 }
5999 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6000 )
6001
6002 (define_insn "aarch64_sqdmull_laneq<mode>"
6003 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6004 (ss_ashift:<VWIDE>
6005 (mult:<VWIDE>
6006 (sign_extend:<VWIDE>
6007 (match_operand:SD_HSI 1 "register_operand" "w"))
6008 (sign_extend:<VWIDE>
6009 (vec_select:<VEL>
6010 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6011 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6012 ))
6013 (const_int 1)))]
6014 "TARGET_SIMD"
6015 {
6016 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6017 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6018 }
6019 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6020 )
6021
6022 ;; vqdmull_n
6023
6024 (define_insn "aarch64_sqdmull_n<mode>"
6025 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6026 (ss_ashift:<VWIDE>
6027 (mult:<VWIDE>
6028 (sign_extend:<VWIDE>
6029 (match_operand:VD_HSI 1 "register_operand" "w"))
6030 (vec_duplicate:<VWIDE>
6031 (sign_extend:<VWIDE_S>
6032 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6033 )
6034 (const_int 1)))]
6035 "TARGET_SIMD"
6036 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6037 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6038 )
6039
6040 ;; vqdmull2
6041
6042 (define_insn "aarch64_sqdmull2<mode>_internal"
6043 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6044 (ss_ashift:<VWIDE>
6045 (mult:<VWIDE>
6046 (sign_extend:<VWIDE>
6047 (vec_select:<VHALF>
6048 (match_operand:VQ_HSI 1 "register_operand" "w")
6049 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6050 (sign_extend:<VWIDE>
6051 (vec_select:<VHALF>
6052 (match_operand:VQ_HSI 2 "register_operand" "w")
6053 (match_dup 3)))
6054 )
6055 (const_int 1)))]
6056 "TARGET_SIMD"
6057 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6058 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6059 )
6060
6061 (define_expand "aarch64_sqdmull2<mode>"
6062 [(match_operand:<VWIDE> 0 "register_operand")
6063 (match_operand:VQ_HSI 1 "register_operand")
6064 (match_operand:VQ_HSI 2 "register_operand")]
6065 "TARGET_SIMD"
6066 {
6067 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6068 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6069 operands[2], p));
6070 DONE;
6071 })
6072
6073 ;; vqdmull2_lane
6074
6075 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6076 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6077 (ss_ashift:<VWIDE>
6078 (mult:<VWIDE>
6079 (sign_extend:<VWIDE>
6080 (vec_select:<VHALF>
6081 (match_operand:VQ_HSI 1 "register_operand" "w")
6082 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6083 (vec_duplicate:<VWIDE>
6084 (sign_extend:<VWIDE_S>
6085 (vec_select:<VEL>
6086 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6087 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6088 ))
6089 (const_int 1)))]
6090 "TARGET_SIMD"
6091 {
6092 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6093 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6094 }
6095 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6096 )
6097
6098 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6099 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6100 (ss_ashift:<VWIDE>
6101 (mult:<VWIDE>
6102 (sign_extend:<VWIDE>
6103 (vec_select:<VHALF>
6104 (match_operand:VQ_HSI 1 "register_operand" "w")
6105 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6106 (vec_duplicate:<VWIDE>
6107 (sign_extend:<VWIDE_S>
6108 (vec_select:<VEL>
6109 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6110 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6111 ))
6112 (const_int 1)))]
6113 "TARGET_SIMD"
6114 {
6115 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6116 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6117 }
6118 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6119 )
6120
6121 (define_expand "aarch64_sqdmull2_lane<mode>"
6122 [(match_operand:<VWIDE> 0 "register_operand")
6123 (match_operand:VQ_HSI 1 "register_operand")
6124 (match_operand:<VCOND> 2 "register_operand")
6125 (match_operand:SI 3 "immediate_operand")]
6126 "TARGET_SIMD"
6127 {
6128 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6129 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6130 operands[2], operands[3],
6131 p));
6132 DONE;
6133 })
6134
6135 (define_expand "aarch64_sqdmull2_laneq<mode>"
6136 [(match_operand:<VWIDE> 0 "register_operand")
6137 (match_operand:VQ_HSI 1 "register_operand")
6138 (match_operand:<VCONQ> 2 "register_operand")
6139 (match_operand:SI 3 "immediate_operand")]
6140 "TARGET_SIMD"
6141 {
6142 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6143 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6144 operands[2], operands[3],
6145 p));
6146 DONE;
6147 })
6148
6149 ;; vqdmull2_n
6150
6151 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6152 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6153 (ss_ashift:<VWIDE>
6154 (mult:<VWIDE>
6155 (sign_extend:<VWIDE>
6156 (vec_select:<VHALF>
6157 (match_operand:VQ_HSI 1 "register_operand" "w")
6158 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6159 (vec_duplicate:<VWIDE>
6160 (sign_extend:<VWIDE_S>
6161 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6162 )
6163 (const_int 1)))]
6164 "TARGET_SIMD"
6165 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6166 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6167 )
6168
6169 (define_expand "aarch64_sqdmull2_n<mode>"
6170 [(match_operand:<VWIDE> 0 "register_operand")
6171 (match_operand:VQ_HSI 1 "register_operand")
6172 (match_operand:<VEL> 2 "register_operand")]
6173 "TARGET_SIMD"
6174 {
6175 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6176 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6177 operands[2], p));
6178 DONE;
6179 })
6180
6181 ;; vshl
6182
6183 (define_insn "aarch64_<sur>shl<mode>"
6184 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6185 (unspec:VSDQ_I_DI
6186 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6187 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6188 VSHL))]
6189 "TARGET_SIMD"
6190 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6191 [(set_attr "type" "neon_shift_reg<q>")]
6192 )
6193
6194
6195 ;; vqshl
6196
6197 (define_insn "aarch64_<sur>q<r>shl<mode>"
6198 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6199 (unspec:VSDQ_I
6200 [(match_operand:VSDQ_I 1 "register_operand" "w")
6201 (match_operand:VSDQ_I 2 "register_operand" "w")]
6202 VQSHL))]
6203 "TARGET_SIMD"
6204 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6205 [(set_attr "type" "neon_sat_shift_reg<q>")]
6206 )
6207
6208 (define_expand "vec_widen_<sur>shiftl_lo_<mode>"
6209 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6210 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6211 (match_operand:SI 2
6212 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6213 VSHLL))]
6214 "TARGET_SIMD"
6215 {
6216 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
6217 emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
6218 p, operands[2]));
6219 DONE;
6220 }
6221 )
6222
6223 (define_expand "vec_widen_<sur>shiftl_hi_<mode>"
6224 [(set (match_operand:<VWIDE> 0 "register_operand")
6225 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6226 (match_operand:SI 2
6227 "immediate_operand" "i")]
6228 VSHLL))]
6229 "TARGET_SIMD"
6230 {
6231 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6232 emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
6233 p, operands[2]));
6234 DONE;
6235 }
6236 )
6237
6238 ;; vshll_n
6239
6240 (define_insn "aarch64_<sur>shll<mode>_internal"
6241 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6242 (unspec:<VWIDE> [(vec_select:<VHALF>
6243 (match_operand:VQW 1 "register_operand" "w")
6244 (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
6245 (match_operand:SI 3
6246 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6247 VSHLL))]
6248 "TARGET_SIMD"
6249 {
6250 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6251 return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
6252 else
6253 return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
6254 }
6255 [(set_attr "type" "neon_shift_imm_long")]
6256 )
6257
6258 (define_insn "aarch64_<sur>shll2<mode>_internal"
6259 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6260 (unspec:<VWIDE> [(vec_select:<VHALF>
6261 (match_operand:VQW 1 "register_operand" "w")
6262 (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
6263 (match_operand:SI 3
6264 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6265 VSHLL))]
6266 "TARGET_SIMD"
6267 {
6268 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6269 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
6270 else
6271 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
6272 }
6273 [(set_attr "type" "neon_shift_imm_long")]
6274 )
6275
6276 (define_insn "aarch64_<sur>shll_n<mode>"
6277 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6278 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
6279 (match_operand:SI 2
6280 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6281 VSHLL))]
6282 "TARGET_SIMD"
6283 {
6284 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6285 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
6286 else
6287 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
6288 }
6289 [(set_attr "type" "neon_shift_imm_long")]
6290 )
6291
6292 ;; vshll_high_n
6293
6294 (define_insn "aarch64_<sur>shll2_n<mode>"
6295 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6296 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6297 (match_operand:SI 2 "immediate_operand" "i")]
6298 VSHLL))]
6299 "TARGET_SIMD"
6300 {
6301 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6302 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
6303 else
6304 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
6305 }
6306 [(set_attr "type" "neon_shift_imm_long")]
6307 )
6308
6309 ;; vrshr_n
6310
6311 (define_insn "aarch64_<sur>shr_n<mode>"
6312 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6313 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6314 (match_operand:SI 2
6315 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
6316 VRSHR_N))]
6317 "TARGET_SIMD"
6318 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6319 [(set_attr "type" "neon_sat_shift_imm<q>")]
6320 )
6321
6322 ;; v(r)sra_n
6323
6324 (define_insn "aarch64_<sur>sra_n<mode>"
6325 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6326 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6327 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6328 (match_operand:SI 3
6329 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
6330 VSRA))]
6331 "TARGET_SIMD"
6332 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6333 [(set_attr "type" "neon_shift_acc<q>")]
6334 )
6335
6336 ;; vs<lr>i_n
6337
6338 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6339 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6340 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6341 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6342 (match_operand:SI 3
6343 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6344 VSLRI))]
6345 "TARGET_SIMD"
6346 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6347 [(set_attr "type" "neon_shift_imm<q>")]
6348 )
6349
6350 ;; vqshl(u)
6351
6352 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6353 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6354 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6355 (match_operand:SI 2
6356 "aarch64_simd_shift_imm_<ve_mode>" "i")]
6357 VQSHL_N))]
6358 "TARGET_SIMD"
6359 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6360 [(set_attr "type" "neon_sat_shift_imm<q>")]
6361 )
6362
6363
6364 ;; vq(r)shr(u)n_n
6365
6366 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
6367 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6368 (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
6369 (match_operand:SI 2
6370 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
6371 VQSHRN_N))]
6372 "TARGET_SIMD"
6373 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6374 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6375 )
6376
6377 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
6378 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6379 (vec_concat:<VNARROWQ2>
6380 (unspec:<VNARROWQ>
6381 [(match_operand:VQN 1 "register_operand" "w")
6382 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6383 VQSHRN_N)
6384 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
6385 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6386 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6387 [(set_attr "type" "neon_shift_imm_narrow_q")]
6388 )
6389
6390 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
6391 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6392 (vec_concat:<VNARROWQ2>
6393 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
6394 (unspec:<VNARROWQ>
6395 [(match_operand:VQN 1 "register_operand" "w")
6396 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6397 VQSHRN_N)))]
6398 "TARGET_SIMD && BYTES_BIG_ENDIAN"
6399 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6400 [(set_attr "type" "neon_shift_imm_narrow_q")]
6401 )
6402
6403 (define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>"
6404 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6405 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
6406 (match_operand:SI 2
6407 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6408 VQSHRN_N))]
6409 "TARGET_SIMD"
6410 {
6411 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6412 INTVAL (operands[2]));
6413 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
6414 if (BYTES_BIG_ENDIAN)
6415 emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
6416 operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
6417 else
6418 emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
6419 operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
6420
6421 /* The intrinsic expects a narrow result, so emit a subreg that will get
6422 optimized away as appropriate. */
6423 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
6424 <VNARROWQ2>mode));
6425 DONE;
6426 }
6427 )
6428
6429 (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le"
6430 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6431 (vec_concat:<VNARROWQ2>
6432 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6433 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
6434 (match_operand:VQN 3
6435 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6436 VQSHRN_N)))]
6437 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6438 "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6439 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6440 )
6441
6442 (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be"
6443 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6444 (vec_concat:<VNARROWQ2>
6445 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
6446 (match_operand:VQN 3
6447 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6448 VQSHRN_N)
6449 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6450 "TARGET_SIMD && BYTES_BIG_ENDIAN"
6451 "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6452 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6453 )
6454
6455 (define_expand "aarch64_<sur>q<r>shr<u>n2_n<mode>"
6456 [(match_operand:<VNARROWQ2> 0 "register_operand")
6457 (match_operand:<VNARROWQ> 1 "register_operand")
6458 (unspec:<VNARROWQ>
6459 [(match_operand:VQN 2 "register_operand")
6460 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6461 VQSHRN_N)]
6462 "TARGET_SIMD"
6463 {
6464 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6465 INTVAL (operands[3]));
6466
6467 if (BYTES_BIG_ENDIAN)
6468 emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be (operands[0],
6469 operands[1], operands[2], operands[3]));
6470 else
6471 emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le (operands[0],
6472 operands[1], operands[2], operands[3]));
6473 DONE;
6474 }
6475 )
6476
6477
6478 ;; cm(eq|ge|gt|lt|le)
6479 ;; Note, we have constraints for Dz and Z as different expanders
6480 ;; have different ideas of what should be passed to this pattern.
6481
6482 (define_insn "aarch64_cm<optab><mode>"
6483 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
6484 (neg:<V_INT_EQUIV>
6485 (COMPARISONS:<V_INT_EQUIV>
6486 (match_operand:VDQ_I 1 "register_operand" "w,w")
6487 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
6488 )))]
6489 "TARGET_SIMD"
6490 "@
6491 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6492 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
6493 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
6494 )
6495
6496 (define_insn_and_split "aarch64_cm<optab>di"
6497 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
6498 (neg:DI
6499 (COMPARISONS:DI
6500 (match_operand:DI 1 "register_operand" "w,w,r")
6501 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
6502 )))
6503 (clobber (reg:CC CC_REGNUM))]
6504 "TARGET_SIMD"
6505 "#"
6506 "&& reload_completed"
6507 [(set (match_operand:DI 0 "register_operand")
6508 (neg:DI
6509 (COMPARISONS:DI
6510 (match_operand:DI 1 "register_operand")
6511 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
6512 )))]
6513 {
6514 /* If we are in the general purpose register file,
6515 we split to a sequence of comparison and store. */
6516 if (GP_REGNUM_P (REGNO (operands[0]))
6517 && GP_REGNUM_P (REGNO (operands[1])))
6518 {
6519 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
6520 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6521 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6522 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6523 DONE;
6524 }
6525 /* Otherwise, we expand to a similar pattern which does not
6526 clobber CC_REGNUM. */
6527 }
6528 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
6529 )
6530
6531 (define_insn "*aarch64_cm<optab>di"
6532 [(set (match_operand:DI 0 "register_operand" "=w,w")
6533 (neg:DI
6534 (COMPARISONS:DI
6535 (match_operand:DI 1 "register_operand" "w,w")
6536 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
6537 )))]
6538 "TARGET_SIMD && reload_completed"
6539 "@
6540 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
6541 cm<optab>\t%d0, %d1, #0"
6542 [(set_attr "type" "neon_compare, neon_compare_zero")]
6543 )
6544
6545 ;; cm(hs|hi)
6546
6547 (define_insn "aarch64_cm<optab><mode>"
6548 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6549 (neg:<V_INT_EQUIV>
6550 (UCOMPARISONS:<V_INT_EQUIV>
6551 (match_operand:VDQ_I 1 "register_operand" "w")
6552 (match_operand:VDQ_I 2 "register_operand" "w")
6553 )))]
6554 "TARGET_SIMD"
6555 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
6556 [(set_attr "type" "neon_compare<q>")]
6557 )
6558
6559 (define_insn_and_split "aarch64_cm<optab>di"
6560 [(set (match_operand:DI 0 "register_operand" "=w,r")
6561 (neg:DI
6562 (UCOMPARISONS:DI
6563 (match_operand:DI 1 "register_operand" "w,r")
6564 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
6565 )))
6566 (clobber (reg:CC CC_REGNUM))]
6567 "TARGET_SIMD"
6568 "#"
6569 "&& reload_completed"
6570 [(set (match_operand:DI 0 "register_operand")
6571 (neg:DI
6572 (UCOMPARISONS:DI
6573 (match_operand:DI 1 "register_operand")
6574 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
6575 )))]
6576 {
6577 /* If we are in the general purpose register file,
6578 we split to a sequence of comparison and store. */
6579 if (GP_REGNUM_P (REGNO (operands[0]))
6580 && GP_REGNUM_P (REGNO (operands[1])))
6581 {
6582 machine_mode mode = CCmode;
6583 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6584 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6585 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6586 DONE;
6587 }
6588 /* Otherwise, we expand to a similar pattern which does not
6589 clobber CC_REGNUM. */
6590 }
6591 [(set_attr "type" "neon_compare,multiple")]
6592 )
6593
6594 (define_insn "*aarch64_cm<optab>di"
6595 [(set (match_operand:DI 0 "register_operand" "=w")
6596 (neg:DI
6597 (UCOMPARISONS:DI
6598 (match_operand:DI 1 "register_operand" "w")
6599 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
6600 )))]
6601 "TARGET_SIMD && reload_completed"
6602 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
6603 [(set_attr "type" "neon_compare")]
6604 )
6605
6606 ;; cmtst
6607
6608 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
6609 ;; we don't have any insns using ne, and aarch64_vcond outputs
6610 ;; not (neg (eq (and x y) 0))
6611 ;; which is rewritten by simplify_rtx as
6612 ;; plus (eq (and x y) 0) -1.
6613
6614 (define_insn "aarch64_cmtst<mode>"
6615 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6616 (plus:<V_INT_EQUIV>
6617 (eq:<V_INT_EQUIV>
6618 (and:VDQ_I
6619 (match_operand:VDQ_I 1 "register_operand" "w")
6620 (match_operand:VDQ_I 2 "register_operand" "w"))
6621 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
6622 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
6623 ]
6624 "TARGET_SIMD"
6625 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6626 [(set_attr "type" "neon_tst<q>")]
6627 )
6628
6629 ;; One can also get a cmtsts by having to combine a
6630 ;; not (neq (eq x 0)) in which case you rewrite it to
6631 ;; a comparison against itself
6632
6633 (define_insn "*aarch64_cmtst_same_<mode>"
6634 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6635 (plus:<V_INT_EQUIV>
6636 (eq:<V_INT_EQUIV>
6637 (match_operand:VDQ_I 1 "register_operand" "w")
6638 (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
6639 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
6640 ]
6641 "TARGET_SIMD"
6642 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
6643 [(set_attr "type" "neon_tst<q>")]
6644 )
6645
6646 (define_insn_and_split "aarch64_cmtstdi"
6647 [(set (match_operand:DI 0 "register_operand" "=w,r")
6648 (neg:DI
6649 (ne:DI
6650 (and:DI
6651 (match_operand:DI 1 "register_operand" "w,r")
6652 (match_operand:DI 2 "register_operand" "w,r"))
6653 (const_int 0))))
6654 (clobber (reg:CC CC_REGNUM))]
6655 "TARGET_SIMD"
6656 "#"
6657 "&& reload_completed"
6658 [(set (match_operand:DI 0 "register_operand")
6659 (neg:DI
6660 (ne:DI
6661 (and:DI
6662 (match_operand:DI 1 "register_operand")
6663 (match_operand:DI 2 "register_operand"))
6664 (const_int 0))))]
6665 {
6666 /* If we are in the general purpose register file,
6667 we split to a sequence of comparison and store. */
6668 if (GP_REGNUM_P (REGNO (operands[0]))
6669 && GP_REGNUM_P (REGNO (operands[1])))
6670 {
6671 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
6672 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
6673 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
6674 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
6675 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6676 DONE;
6677 }
6678 /* Otherwise, we expand to a similar pattern which does not
6679 clobber CC_REGNUM. */
6680 }
6681 [(set_attr "type" "neon_tst,multiple")]
6682 )
6683
6684 (define_insn "*aarch64_cmtstdi"
6685 [(set (match_operand:DI 0 "register_operand" "=w")
6686 (neg:DI
6687 (ne:DI
6688 (and:DI
6689 (match_operand:DI 1 "register_operand" "w")
6690 (match_operand:DI 2 "register_operand" "w"))
6691 (const_int 0))))]
6692 "TARGET_SIMD"
6693 "cmtst\t%d0, %d1, %d2"
6694 [(set_attr "type" "neon_tst")]
6695 )
6696
6697 ;; fcm(eq|ge|gt|le|lt)
6698
6699 (define_insn "aarch64_cm<optab><mode>"
6700 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
6701 (neg:<V_INT_EQUIV>
6702 (COMPARISONS:<V_INT_EQUIV>
6703 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
6704 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
6705 )))]
6706 "TARGET_SIMD"
6707 "@
6708 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6709 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
6710 [(set_attr "type" "neon_fp_compare_<stype><q>")]
6711 )
6712
6713 ;; fac(ge|gt)
6714 ;; Note we can also handle what would be fac(le|lt) by
6715 ;; generating fac(ge|gt).
6716
6717 (define_insn "aarch64_fac<optab><mode>"
6718 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6719 (neg:<V_INT_EQUIV>
6720 (FAC_COMPARISONS:<V_INT_EQUIV>
6721 (abs:VHSDF_HSDF
6722 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
6723 (abs:VHSDF_HSDF
6724 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
6725 )))]
6726 "TARGET_SIMD"
6727 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
6728 [(set_attr "type" "neon_fp_compare_<stype><q>")]
6729 )
6730
6731 ;; addp
6732
6733 (define_insn "aarch64_addp<mode>"
6734 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
6735 (unspec:VDQ_I
6736 [(match_operand:VDQ_I 1 "register_operand" "w")
6737 (match_operand:VDQ_I 2 "register_operand" "w")]
6738 UNSPEC_ADDP))]
6739 "TARGET_SIMD"
6740 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6741 [(set_attr "type" "neon_reduc_add<q>")]
6742 )
6743
6744 (define_insn "aarch64_addpdi"
6745 [(set (match_operand:DI 0 "register_operand" "=w")
6746 (unspec:DI
6747 [(match_operand:V2DI 1 "register_operand" "w")]
6748 UNSPEC_ADDP))]
6749 "TARGET_SIMD"
6750 "addp\t%d0, %1.2d"
6751 [(set_attr "type" "neon_reduc_add")]
6752 )
6753
6754 ;; sqrt
6755
6756 (define_expand "sqrt<mode>2"
6757 [(set (match_operand:VHSDF 0 "register_operand")
6758 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
6759 "TARGET_SIMD"
6760 {
6761 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
6762 DONE;
6763 })
6764
6765 (define_insn "*sqrt<mode>2"
6766 [(set (match_operand:VHSDF 0 "register_operand" "=w")
6767 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
6768 "TARGET_SIMD"
6769 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
6770 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
6771 )
6772
6773 ;; Patterns for vector struct loads and stores.
6774
6775 (define_insn "aarch64_simd_ld2<vstruct_elt>"
6776 [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
6777 (unspec:VSTRUCT_2Q [
6778 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
6779 UNSPEC_LD2))]
6780 "TARGET_SIMD"
6781 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6782 [(set_attr "type" "neon_load2_2reg<q>")]
6783 )
6784
6785 (define_insn "aarch64_simd_ld2r<vstruct_elt>"
6786 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
6787 (unspec:VSTRUCT_2QD [
6788 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6789 UNSPEC_LD2_DUP))]
6790 "TARGET_SIMD"
6791 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6792 [(set_attr "type" "neon_load2_all_lanes<q>")]
6793 )
6794
6795 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6796 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
6797 (unspec:VSTRUCT_2QD [
6798 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6799 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
6800 (match_operand:SI 3 "immediate_operand" "i")]
6801 UNSPEC_LD2_LANE))]
6802 "TARGET_SIMD"
6803 {
6804 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6805 INTVAL (operands[3]));
6806 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
6807 }
6808 [(set_attr "type" "neon_load2_one_lane")]
6809 )
6810
6811 (define_expand "vec_load_lanes<mode><vstruct_elt>"
6812 [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
6813 (unspec:VSTRUCT_2Q [
6814 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
6815 UNSPEC_LD2))]
6816 "TARGET_SIMD"
6817 {
6818 if (BYTES_BIG_ENDIAN)
6819 {
6820 rtx tmp = gen_reg_rtx (<MODE>mode);
6821 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6822 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6823 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
6824 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
6825 }
6826 else
6827 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
6828 DONE;
6829 })
6830
6831 (define_insn "aarch64_simd_st2<vstruct_elt>"
6832 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
6833 (unspec:VSTRUCT_2Q [
6834 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
6835 UNSPEC_ST2))]
6836 "TARGET_SIMD"
6837 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
6838 [(set_attr "type" "neon_store2_2reg<q>")]
6839 )
6840
6841 ;; RTL uses GCC vector extension indices, so flip only for assembly.
6842 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
6843 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6844 (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
6845 (match_operand:SI 2 "immediate_operand" "i")]
6846 UNSPEC_ST2_LANE))]
6847 "TARGET_SIMD"
6848 {
6849 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6850 INTVAL (operands[2]));
6851 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
6852 }
6853 [(set_attr "type" "neon_store2_one_lane<q>")]
6854 )
6855
6856 (define_expand "vec_store_lanes<mode><vstruct_elt>"
6857 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
6858 (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
6859 UNSPEC_ST2))]
6860 "TARGET_SIMD"
6861 {
6862 if (BYTES_BIG_ENDIAN)
6863 {
6864 rtx tmp = gen_reg_rtx (<MODE>mode);
6865 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6866 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6867 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
6868 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
6869 }
6870 else
6871 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
6872 DONE;
6873 })
6874
6875 (define_insn "aarch64_simd_ld3<vstruct_elt>"
6876 [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
6877 (unspec:VSTRUCT_3Q [
6878 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
6879 UNSPEC_LD3))]
6880 "TARGET_SIMD"
6881 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6882 [(set_attr "type" "neon_load3_3reg<q>")]
6883 )
6884
6885 (define_insn "aarch64_simd_ld3r<vstruct_elt>"
6886 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
6887 (unspec:VSTRUCT_3QD [
6888 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6889 UNSPEC_LD3_DUP))]
6890 "TARGET_SIMD"
6891 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6892 [(set_attr "type" "neon_load3_all_lanes<q>")]
6893 )
6894
6895 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6896 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
6897 (unspec:VSTRUCT_3QD [
6898 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6899 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
6900 (match_operand:SI 3 "immediate_operand" "i")]
6901 UNSPEC_LD3_LANE))]
6902 "TARGET_SIMD"
6903 {
6904 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6905 INTVAL (operands[3]));
6906 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
6907 }
6908 [(set_attr "type" "neon_load3_one_lane")]
6909 )
6910
6911 (define_expand "vec_load_lanes<mode><vstruct_elt>"
6912 [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
6913 (unspec:VSTRUCT_3Q [
6914 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
6915 UNSPEC_LD3))]
6916 "TARGET_SIMD"
6917 {
6918 if (BYTES_BIG_ENDIAN)
6919 {
6920 rtx tmp = gen_reg_rtx (<MODE>mode);
6921 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6922 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6923 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
6924 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
6925 }
6926 else
6927 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
6928 DONE;
6929 })
6930
6931 (define_insn "aarch64_simd_st3<vstruct_elt>"
6932 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
6933 (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
6934 UNSPEC_ST3))]
6935 "TARGET_SIMD"
6936 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6937 [(set_attr "type" "neon_store3_3reg<q>")]
6938 )
6939
6940 ;; RTL uses GCC vector extension indices, so flip only for assembly.
6941 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
6942 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6943 (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
6944 (match_operand:SI 2 "immediate_operand" "i")]
6945 UNSPEC_ST3_LANE))]
6946 "TARGET_SIMD"
6947 {
6948 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6949 INTVAL (operands[2]));
6950 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
6951 }
6952 [(set_attr "type" "neon_store3_one_lane<q>")]
6953 )
6954
6955 (define_expand "vec_store_lanes<mode><vstruct_elt>"
6956 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
6957 (unspec:VSTRUCT_3Q [
6958 (match_operand:VSTRUCT_3Q 1 "register_operand")]
6959 UNSPEC_ST3))]
6960 "TARGET_SIMD"
6961 {
6962 if (BYTES_BIG_ENDIAN)
6963 {
6964 rtx tmp = gen_reg_rtx (<MODE>mode);
6965 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6966 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6967 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
6968 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
6969 }
6970 else
6971 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
6972 DONE;
6973 })
6974
6975 (define_insn "aarch64_simd_ld4<vstruct_elt>"
6976 [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
6977 (unspec:VSTRUCT_4Q [
6978 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
6979 UNSPEC_LD4))]
6980 "TARGET_SIMD"
6981 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6982 [(set_attr "type" "neon_load4_4reg<q>")]
6983 )
6984
6985 (define_insn "aarch64_simd_ld4r<vstruct_elt>"
6986 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
6987 (unspec:VSTRUCT_4QD [
6988 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6989 UNSPEC_LD4_DUP))]
6990 "TARGET_SIMD"
6991 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6992 [(set_attr "type" "neon_load4_all_lanes<q>")]
6993 )
6994
6995 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6996 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
6997 (unspec:VSTRUCT_4QD [
6998 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6999 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7000 (match_operand:SI 3 "immediate_operand" "i")]
7001 UNSPEC_LD4_LANE))]
7002 "TARGET_SIMD"
7003 {
7004 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7005 INTVAL (operands[3]));
7006 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7007 }
7008 [(set_attr "type" "neon_load4_one_lane")]
7009 )
7010
7011 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7012 [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7013 (unspec:VSTRUCT_4Q [
7014 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7015 UNSPEC_LD4))]
7016 "TARGET_SIMD"
7017 {
7018 if (BYTES_BIG_ENDIAN)
7019 {
7020 rtx tmp = gen_reg_rtx (<MODE>mode);
7021 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7022 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7023 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7024 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7025 }
7026 else
7027 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7028 DONE;
7029 })
7030
7031 (define_insn "aarch64_simd_st4<vstruct_elt>"
7032 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7033 (unspec:VSTRUCT_4Q [
7034 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7035 UNSPEC_ST4))]
7036 "TARGET_SIMD"
7037 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7038 [(set_attr "type" "neon_store4_4reg<q>")]
7039 )
7040
7041 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7042 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7043 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7044 (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7045 (match_operand:SI 2 "immediate_operand" "i")]
7046 UNSPEC_ST4_LANE))]
7047 "TARGET_SIMD"
7048 {
7049 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7050 INTVAL (operands[2]));
7051 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7052 }
7053 [(set_attr "type" "neon_store4_one_lane<q>")]
7054 )
7055
7056 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7057 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7058 (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7059 UNSPEC_ST4))]
7060 "TARGET_SIMD"
7061 {
7062 if (BYTES_BIG_ENDIAN)
7063 {
7064 rtx tmp = gen_reg_rtx (<MODE>mode);
7065 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7066 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7067 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7068 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7069 }
7070 else
7071 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7072 DONE;
7073 })
7074
7075 (define_insn_and_split "aarch64_rev_reglist<mode>"
7076 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7077 (unspec:VSTRUCT_QD
7078 [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7079 (match_operand:V16QI 2 "register_operand" "w")]
7080 UNSPEC_REV_REGLIST))]
7081 "TARGET_SIMD"
7082 "#"
7083 "&& reload_completed"
7084 [(const_int 0)]
7085 {
7086 int i;
7087 int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7088 for (i = 0; i < nregs; i++)
7089 {
7090 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7091 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7092 emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7093 }
7094 DONE;
7095 }
7096 [(set_attr "type" "neon_tbl1_q")
7097 (set_attr "length" "<insn_count>")]
7098 )
7099
7100 ;; Reload patterns for AdvSIMD register list operands.
7101
7102 (define_expand "mov<mode>"
7103 [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7104 (match_operand:VSTRUCT_QD 1 "general_operand"))]
7105 "TARGET_SIMD"
7106 {
7107 if (can_create_pseudo_p ())
7108 {
7109 if (GET_CODE (operands[0]) != REG)
7110 operands[1] = force_reg (<MODE>mode, operands[1]);
7111 }
7112 })
7113
7114 (define_expand "mov<mode>"
7115 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7116 (match_operand:VSTRUCT 1 "general_operand"))]
7117 "TARGET_SIMD"
7118 {
7119 if (can_create_pseudo_p ())
7120 {
7121 if (GET_CODE (operands[0]) != REG)
7122 operands[1] = force_reg (<MODE>mode, operands[1]);
7123 }
7124 })
7125
7126 (define_expand "movv8di"
7127 [(set (match_operand:V8DI 0 "nonimmediate_operand")
7128 (match_operand:V8DI 1 "general_operand"))]
7129 "TARGET_SIMD"
7130 {
7131 if (can_create_pseudo_p () && MEM_P (operands[0]))
7132 operands[1] = force_reg (V8DImode, operands[1]);
7133 })
7134
7135 (define_expand "aarch64_ld1x3<vstruct_elt>"
7136 [(match_operand:VSTRUCT_3QD 0 "register_operand")
7137 (match_operand:DI 1 "register_operand")]
7138 "TARGET_SIMD"
7139 {
7140 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7141 emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7142 DONE;
7143 })
7144
7145 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
7146 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7147 (unspec:VSTRUCT_3QD
7148 [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
7149 UNSPEC_LD1))]
7150 "TARGET_SIMD"
7151 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7152 [(set_attr "type" "neon_load1_3reg<q>")]
7153 )
7154
7155 (define_expand "aarch64_ld1x4<vstruct_elt>"
7156 [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7157 (match_operand:DI 1 "register_operand" "r")]
7158 "TARGET_SIMD"
7159 {
7160 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7161 emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
7162 DONE;
7163 })
7164
7165 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
7166 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7167 (unspec:VSTRUCT_4QD
7168 [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
7169 UNSPEC_LD1))]
7170 "TARGET_SIMD"
7171 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7172 [(set_attr "type" "neon_load1_4reg<q>")]
7173 )
7174
7175 (define_expand "aarch64_st1x2<vstruct_elt>"
7176 [(match_operand:DI 0 "register_operand")
7177 (match_operand:VSTRUCT_2QD 1 "register_operand")]
7178 "TARGET_SIMD"
7179 {
7180 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7181 emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
7182 DONE;
7183 })
7184
7185 (define_insn "aarch64_st1_x2_<vstruct_elt>"
7186 [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
7187 (unspec:VSTRUCT_2QD
7188 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
7189 UNSPEC_ST1))]
7190 "TARGET_SIMD"
7191 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7192 [(set_attr "type" "neon_store1_2reg<q>")]
7193 )
7194
7195 (define_expand "aarch64_st1x3<vstruct_elt>"
7196 [(match_operand:DI 0 "register_operand")
7197 (match_operand:VSTRUCT_3QD 1 "register_operand")]
7198 "TARGET_SIMD"
7199 {
7200 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7201 emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
7202 DONE;
7203 })
7204
7205 (define_insn "aarch64_st1_x3_<vstruct_elt>"
7206 [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
7207 (unspec:VSTRUCT_3QD
7208 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
7209 UNSPEC_ST1))]
7210 "TARGET_SIMD"
7211 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7212 [(set_attr "type" "neon_store1_3reg<q>")]
7213 )
7214
7215 (define_expand "aarch64_st1x4<vstruct_elt>"
7216 [(match_operand:DI 0 "register_operand" "")
7217 (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
7218 "TARGET_SIMD"
7219 {
7220 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7221 emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
7222 DONE;
7223 })
7224
7225 (define_insn "aarch64_st1_x4_<vstruct_elt>"
7226 [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
7227 (unspec:VSTRUCT_4QD
7228 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
7229 UNSPEC_ST1))]
7230 "TARGET_SIMD"
7231 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7232 [(set_attr "type" "neon_store1_4reg<q>")]
7233 )
7234
7235 (define_insn "*aarch64_mov<mode>"
7236 [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7237 (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7238 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7239 && (register_operand (operands[0], <MODE>mode)
7240 || register_operand (operands[1], <MODE>mode))"
7241 "@
7242 #
7243 st1\\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
7244 ld1\\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1"
7245 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7246 neon_load<nregs>_<nregs>reg_q")
7247 (set_attr "length" "<insn_count>,4,4")]
7248 )
7249
7250 (define_insn "*aarch64_mov<mode>"
7251 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7252 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7253 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7254 && (register_operand (operands[0], <MODE>mode)
7255 || register_operand (operands[1], <MODE>mode))"
7256 "@
7257 #
7258 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
7259 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
7260 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7261 neon_load<nregs>_<nregs>reg_q")
7262 (set_attr "length" "<insn_count>,4,4")]
7263 )
7264
7265 (define_insn "*aarch64_movv8di"
7266 [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
7267 (match_operand:V8DI 1 "general_operand" " r,r,m"))]
7268 "!BYTES_BIG_ENDIAN
7269 && (register_operand (operands[0], V8DImode)
7270 || register_operand (operands[1], V8DImode))"
7271 "#"
7272 [(set_attr "type" "multiple,multiple,multiple")
7273 (set_attr "length" "32,16,16")]
7274 )
7275
7276 (define_insn "aarch64_be_ld1<mode>"
7277 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
7278 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
7279 "aarch64_simd_struct_operand" "Utv")]
7280 UNSPEC_LD1))]
7281 "TARGET_SIMD"
7282 "ld1\\t{%0<Vmtype>}, %1"
7283 [(set_attr "type" "neon_load1_1reg<q>")]
7284 )
7285
7286 (define_insn "aarch64_be_st1<mode>"
7287 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
7288 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
7289 UNSPEC_ST1))]
7290 "TARGET_SIMD"
7291 "st1\\t{%1<Vmtype>}, %0"
7292 [(set_attr "type" "neon_store1_1reg<q>")]
7293 )
7294
7295 (define_insn "*aarch64_be_mov<mode>"
7296 [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
7297 (match_operand:VSTRUCT_2D 1 "general_operand" " w,w,m"))]
7298 "TARGET_SIMD && BYTES_BIG_ENDIAN
7299 && (register_operand (operands[0], <MODE>mode)
7300 || register_operand (operands[1], <MODE>mode))"
7301 "@
7302 #
7303 stp\\t%d1, %R1, %0
7304 ldp\\t%d0, %R0, %1"
7305 [(set_attr "type" "multiple,neon_stp,neon_ldp")
7306 (set_attr "length" "8,4,4")]
7307 )
7308
7309 (define_insn "*aarch64_be_mov<mode>"
7310 [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
7311 (match_operand:VSTRUCT_2Q 1 "general_operand" " w,w,m"))]
7312 "TARGET_SIMD && BYTES_BIG_ENDIAN
7313 && (register_operand (operands[0], <MODE>mode)
7314 || register_operand (operands[1], <MODE>mode))"
7315 "@
7316 #
7317 stp\\t%q1, %R1, %0
7318 ldp\\t%q0, %R0, %1"
7319 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7320 (set_attr "length" "8,4,4")]
7321 )
7322
7323 (define_insn "*aarch64_be_movoi"
7324 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
7325 (match_operand:OI 1 "general_operand" " w,w,m"))]
7326 "TARGET_SIMD && BYTES_BIG_ENDIAN
7327 && (register_operand (operands[0], OImode)
7328 || register_operand (operands[1], OImode))"
7329 "@
7330 #
7331 stp\\t%q1, %R1, %0
7332 ldp\\t%q0, %R0, %1"
7333 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7334 (set_attr "length" "8,4,4")]
7335 )
7336
7337 (define_insn "*aarch64_be_mov<mode>"
7338 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
7339 (match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
7340 "TARGET_SIMD && BYTES_BIG_ENDIAN
7341 && (register_operand (operands[0], <MODE>mode)
7342 || register_operand (operands[1], <MODE>mode))"
7343 "#"
7344 [(set_attr "type" "multiple")
7345 (set_attr "length" "12,8,8")]
7346 )
7347
7348 (define_insn "*aarch64_be_movci"
7349 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
7350 (match_operand:CI 1 "general_operand" " w,w,o"))]
7351 "TARGET_SIMD && BYTES_BIG_ENDIAN
7352 && (register_operand (operands[0], CImode)
7353 || register_operand (operands[1], CImode))"
7354 "#"
7355 [(set_attr "type" "multiple")
7356 (set_attr "length" "12,4,4")]
7357 )
7358
7359 (define_insn "*aarch64_be_mov<mode>"
7360 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
7361 (match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
7362 "TARGET_SIMD && BYTES_BIG_ENDIAN
7363 && (register_operand (operands[0], <MODE>mode)
7364 || register_operand (operands[1], <MODE>mode))"
7365 "#"
7366 [(set_attr "type" "multiple")
7367 (set_attr "length" "16,8,8")]
7368 )
7369
7370 (define_insn "*aarch64_be_movxi"
7371 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
7372 (match_operand:XI 1 "general_operand" " w,w,o"))]
7373 "TARGET_SIMD && BYTES_BIG_ENDIAN
7374 && (register_operand (operands[0], XImode)
7375 || register_operand (operands[1], XImode))"
7376 "#"
7377 [(set_attr "type" "multiple")
7378 (set_attr "length" "16,4,4")]
7379 )
7380
7381 (define_split
7382 [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
7383 (match_operand:VSTRUCT_2QD 1 "register_operand"))]
7384 "TARGET_SIMD && reload_completed"
7385 [(const_int 0)]
7386 {
7387 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
7388 DONE;
7389 })
7390
7391 (define_split
7392 [(set (match_operand:OI 0 "register_operand")
7393 (match_operand:OI 1 "register_operand"))]
7394 "TARGET_SIMD && reload_completed"
7395 [(const_int 0)]
7396 {
7397 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
7398 DONE;
7399 })
7400
7401 (define_split
7402 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
7403 (match_operand:VSTRUCT_3QD 1 "general_operand"))]
7404 "TARGET_SIMD && reload_completed"
7405 [(const_int 0)]
7406 {
7407 if (register_operand (operands[0], <MODE>mode)
7408 && register_operand (operands[1], <MODE>mode))
7409 {
7410 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
7411 DONE;
7412 }
7413 else if (BYTES_BIG_ENDIAN)
7414 {
7415 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
7416 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
7417 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7418 <MODE>mode, 0),
7419 simplify_gen_subreg (pair_mode, operands[1],
7420 <MODE>mode, 0));
7421 emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
7422 simplify_gen_subreg (<VSTRUCT_ELT>mode,
7423 operands[0],
7424 <MODE>mode,
7425 2 * elt_size)),
7426 gen_lowpart (<VSTRUCT_ELT>mode,
7427 simplify_gen_subreg (<VSTRUCT_ELT>mode,
7428 operands[1],
7429 <MODE>mode,
7430 2 * elt_size)));
7431 DONE;
7432 }
7433 else
7434 FAIL;
7435 })
7436
7437 (define_split
7438 [(set (match_operand:CI 0 "nonimmediate_operand")
7439 (match_operand:CI 1 "general_operand"))]
7440 "TARGET_SIMD && reload_completed"
7441 [(const_int 0)]
7442 {
7443 if (register_operand (operands[0], CImode)
7444 && register_operand (operands[1], CImode))
7445 {
7446 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
7447 DONE;
7448 }
7449 else if (BYTES_BIG_ENDIAN)
7450 {
7451 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
7452 simplify_gen_subreg (OImode, operands[1], CImode, 0));
7453 emit_move_insn (gen_lowpart (V16QImode,
7454 simplify_gen_subreg (TImode, operands[0],
7455 CImode, 32)),
7456 gen_lowpart (V16QImode,
7457 simplify_gen_subreg (TImode, operands[1],
7458 CImode, 32)));
7459 DONE;
7460 }
7461 else
7462 FAIL;
7463 })
7464
7465 (define_split
7466 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
7467 (match_operand:VSTRUCT_4QD 1 "general_operand"))]
7468 "TARGET_SIMD && reload_completed"
7469 [(const_int 0)]
7470 {
7471 if (register_operand (operands[0], <MODE>mode)
7472 && register_operand (operands[1], <MODE>mode))
7473 {
7474 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
7475 DONE;
7476 }
7477 else if (BYTES_BIG_ENDIAN)
7478 {
7479 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
7480 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
7481 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7482 <MODE>mode, 0),
7483 simplify_gen_subreg (pair_mode, operands[1],
7484 <MODE>mode, 0));
7485 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7486 <MODE>mode, 2 * elt_size),
7487 simplify_gen_subreg (pair_mode, operands[1],
7488 <MODE>mode, 2 * elt_size));
7489 DONE;
7490 }
7491 else
7492 FAIL;
7493 })
7494
7495 (define_split
7496 [(set (match_operand:XI 0 "nonimmediate_operand")
7497 (match_operand:XI 1 "general_operand"))]
7498 "TARGET_SIMD && reload_completed"
7499 [(const_int 0)]
7500 {
7501 if (register_operand (operands[0], XImode)
7502 && register_operand (operands[1], XImode))
7503 {
7504 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
7505 DONE;
7506 }
7507 else if (BYTES_BIG_ENDIAN)
7508 {
7509 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
7510 simplify_gen_subreg (OImode, operands[1], XImode, 0));
7511 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
7512 simplify_gen_subreg (OImode, operands[1], XImode, 32));
7513 DONE;
7514 }
7515 else
7516 FAIL;
7517 })
7518
7519 (define_split
7520 [(set (match_operand:V8DI 0 "nonimmediate_operand")
7521 (match_operand:V8DI 1 "general_operand"))]
7522 "TARGET_SIMD && reload_completed"
7523 [(const_int 0)]
7524 {
7525 if (register_operand (operands[0], V8DImode)
7526 && register_operand (operands[1], V8DImode))
7527 {
7528 aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
7529 DONE;
7530 }
7531 else if ((register_operand (operands[0], V8DImode)
7532 && memory_operand (operands[1], V8DImode))
7533 || (memory_operand (operands[0], V8DImode)
7534 && register_operand (operands[1], V8DImode)))
7535 {
7536 for (int offset = 0; offset < 64; offset += 16)
7537 emit_move_insn (simplify_gen_subreg (TImode, operands[0],
7538 V8DImode, offset),
7539 simplify_gen_subreg (TImode, operands[1],
7540 V8DImode, offset));
7541 DONE;
7542 }
7543 else
7544 FAIL;
7545 })
7546
7547 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
7548 [(match_operand:VSTRUCT_QD 0 "register_operand")
7549 (match_operand:DI 1 "register_operand")]
7550 "TARGET_SIMD"
7551 {
7552 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7553 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
7554
7555 emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
7556 DONE;
7557 })
7558
7559 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
7560 [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
7561 (unspec:VSTRUCT_2DNX [
7562 (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
7563 UNSPEC_LD2_DREG))]
7564 "TARGET_SIMD"
7565 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7566 [(set_attr "type" "neon_load2_2reg<q>")]
7567 )
7568
7569 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
7570 [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
7571 (unspec:VSTRUCT_2DX [
7572 (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
7573 UNSPEC_LD2_DREG))]
7574 "TARGET_SIMD"
7575 "ld1\\t{%S0.1d - %T0.1d}, %1"
7576 [(set_attr "type" "neon_load1_2reg<q>")]
7577 )
7578
7579 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
7580 [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
7581 (unspec:VSTRUCT_3DNX [
7582 (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
7583 UNSPEC_LD3_DREG))]
7584 "TARGET_SIMD"
7585 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7586 [(set_attr "type" "neon_load3_3reg<q>")]
7587 )
7588
7589 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
7590 [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
7591 (unspec:VSTRUCT_3DX [
7592 (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
7593 UNSPEC_LD3_DREG))]
7594 "TARGET_SIMD"
7595 "ld1\\t{%S0.1d - %U0.1d}, %1"
7596 [(set_attr "type" "neon_load1_3reg<q>")]
7597 )
7598
7599 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
7600 [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
7601 (unspec:VSTRUCT_4DNX [
7602 (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
7603 UNSPEC_LD4_DREG))]
7604 "TARGET_SIMD"
7605 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7606 [(set_attr "type" "neon_load4_4reg<q>")]
7607 )
7608
7609 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
7610 [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
7611 (unspec:VSTRUCT_4DX [
7612 (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
7613 UNSPEC_LD4_DREG))]
7614 "TARGET_SIMD"
7615 "ld1\\t{%S0.1d - %V0.1d}, %1"
7616 [(set_attr "type" "neon_load1_4reg<q>")]
7617 )
7618
7619 (define_expand "aarch64_ld<nregs><vstruct_elt>"
7620 [(match_operand:VSTRUCT_D 0 "register_operand")
7621 (match_operand:DI 1 "register_operand")]
7622 "TARGET_SIMD"
7623 {
7624 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7625 emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
7626 DONE;
7627 })
7628
7629 (define_expand "aarch64_ld1<VALL_F16:mode>"
7630 [(match_operand:VALL_F16 0 "register_operand")
7631 (match_operand:DI 1 "register_operand")]
7632 "TARGET_SIMD"
7633 {
7634 machine_mode mode = <VALL_F16:MODE>mode;
7635 rtx mem = gen_rtx_MEM (mode, operands[1]);
7636
7637 if (BYTES_BIG_ENDIAN)
7638 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
7639 else
7640 emit_move_insn (operands[0], mem);
7641 DONE;
7642 })
7643
7644 (define_expand "aarch64_ld<nregs><vstruct_elt>"
7645 [(match_operand:VSTRUCT_Q 0 "register_operand")
7646 (match_operand:DI 1 "register_operand")]
7647 "TARGET_SIMD"
7648 {
7649 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7650 emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
7651 DONE;
7652 })
7653
7654 (define_expand "aarch64_ld1x2<vstruct_elt>"
7655 [(match_operand:VSTRUCT_2QD 0 "register_operand")
7656 (match_operand:DI 1 "register_operand")]
7657 "TARGET_SIMD"
7658 {
7659 machine_mode mode = <MODE>mode;
7660 rtx mem = gen_rtx_MEM (mode, operands[1]);
7661
7662 emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
7663 DONE;
7664 })
7665
7666 (define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
7667 [(match_operand:VSTRUCT_QD 0 "register_operand")
7668 (match_operand:DI 1 "register_operand")
7669 (match_operand:VSTRUCT_QD 2 "register_operand")
7670 (match_operand:SI 3 "immediate_operand")]
7671 "TARGET_SIMD"
7672 {
7673 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7674 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
7675
7676 aarch64_simd_lane_bounds (operands[3], 0,
7677 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
7678 emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
7679 mem, operands[2], operands[3]));
7680 DONE;
7681 })
7682
7683 ;; Permuted-store expanders for neon intrinsics.
7684
7685 ;; Permute instructions
7686
7687 ;; vec_perm support
7688
7689 (define_expand "vec_perm<mode>"
7690 [(match_operand:VB 0 "register_operand")
7691 (match_operand:VB 1 "register_operand")
7692 (match_operand:VB 2 "register_operand")
7693 (match_operand:VB 3 "register_operand")]
7694 "TARGET_SIMD"
7695 {
7696 aarch64_expand_vec_perm (operands[0], operands[1],
7697 operands[2], operands[3], <nunits>);
7698 DONE;
7699 })
7700
7701 (define_insn "aarch64_qtbl1<mode>"
7702 [(set (match_operand:VB 0 "register_operand" "=w")
7703 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
7704 (match_operand:VB 2 "register_operand" "w")]
7705 UNSPEC_TBL))]
7706 "TARGET_SIMD"
7707 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
7708 [(set_attr "type" "neon_tbl1<q>")]
7709 )
7710
7711 (define_insn "aarch64_qtbx1<mode>"
7712 [(set (match_operand:VB 0 "register_operand" "=w")
7713 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7714 (match_operand:V16QI 2 "register_operand" "w")
7715 (match_operand:VB 3 "register_operand" "w")]
7716 UNSPEC_TBX))]
7717 "TARGET_SIMD"
7718 "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
7719 [(set_attr "type" "neon_tbl1<q>")]
7720 )
7721
7722 ;; Two source registers.
7723
7724 (define_insn "aarch64_qtbl2<mode>"
7725 [(set (match_operand:VB 0 "register_operand" "=w")
7726 (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
7727 (match_operand:VB 2 "register_operand" "w")]
7728 UNSPEC_TBL))]
7729 "TARGET_SIMD"
7730 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
7731 [(set_attr "type" "neon_tbl2")]
7732 )
7733
7734 (define_insn "aarch64_qtbx2<mode>"
7735 [(set (match_operand:VB 0 "register_operand" "=w")
7736 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7737 (match_operand:V2x16QI 2 "register_operand" "w")
7738 (match_operand:VB 3 "register_operand" "w")]
7739 UNSPEC_TBX))]
7740 "TARGET_SIMD"
7741 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
7742 [(set_attr "type" "neon_tbl2")]
7743 )
7744
7745 ;; Three source registers.
7746
7747 (define_insn "aarch64_qtbl3<mode>"
7748 [(set (match_operand:VB 0 "register_operand" "=w")
7749 (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
7750 (match_operand:VB 2 "register_operand" "w")]
7751 UNSPEC_TBL))]
7752 "TARGET_SIMD"
7753 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
7754 [(set_attr "type" "neon_tbl3")]
7755 )
7756
7757 (define_insn "aarch64_qtbx3<mode>"
7758 [(set (match_operand:VB 0 "register_operand" "=w")
7759 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7760 (match_operand:V3x16QI 2 "register_operand" "w")
7761 (match_operand:VB 3 "register_operand" "w")]
7762 UNSPEC_TBX))]
7763 "TARGET_SIMD"
7764 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
7765 [(set_attr "type" "neon_tbl3")]
7766 )
7767
7768 ;; Four source registers.
7769
7770 (define_insn "aarch64_qtbl4<mode>"
7771 [(set (match_operand:VB 0 "register_operand" "=w")
7772 (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
7773 (match_operand:VB 2 "register_operand" "w")]
7774 UNSPEC_TBL))]
7775 "TARGET_SIMD"
7776 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
7777 [(set_attr "type" "neon_tbl4")]
7778 )
7779
7780 (define_insn "aarch64_qtbx4<mode>"
7781 [(set (match_operand:VB 0 "register_operand" "=w")
7782 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7783 (match_operand:V4x16QI 2 "register_operand" "w")
7784 (match_operand:VB 3 "register_operand" "w")]
7785 UNSPEC_TBX))]
7786 "TARGET_SIMD"
7787 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
7788 [(set_attr "type" "neon_tbl4")]
7789 )
7790
7791 (define_insn_and_split "aarch64_combinev16qi"
7792 [(set (match_operand:V2x16QI 0 "register_operand" "=w")
7793 (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
7794 (match_operand:V16QI 2 "register_operand" "w")]
7795 UNSPEC_CONCAT))]
7796 "TARGET_SIMD"
7797 "#"
7798 "&& reload_completed"
7799 [(const_int 0)]
7800 {
7801 aarch64_split_combinev16qi (operands);
7802 DONE;
7803 }
7804 [(set_attr "type" "multiple")]
7805 )
7806
7807 ;; This instruction's pattern is generated directly by
7808 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7809 ;; need corresponding changes there.
7810 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
7811 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7812 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
7813 (match_operand:VALL_F16 2 "register_operand" "w")]
7814 PERMUTE))]
7815 "TARGET_SIMD"
7816 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7817 [(set_attr "type" "neon_permute<q>")]
7818 )
7819
7820 ;; This instruction's pattern is generated directly by
7821 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7822 ;; need corresponding changes there. Note that the immediate (third)
7823 ;; operand is a lane index not a byte index.
7824 (define_insn "aarch64_ext<mode>"
7825 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7826 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
7827 (match_operand:VALL_F16 2 "register_operand" "w")
7828 (match_operand:SI 3 "immediate_operand" "i")]
7829 UNSPEC_EXT))]
7830 "TARGET_SIMD"
7831 {
7832 operands[3] = GEN_INT (INTVAL (operands[3])
7833 * GET_MODE_UNIT_SIZE (<MODE>mode));
7834 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
7835 }
7836 [(set_attr "type" "neon_ext<q>")]
7837 )
7838
7839 ;; This instruction's pattern is generated directly by
7840 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7841 ;; need corresponding changes there.
7842 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
7843 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7844 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
7845 REVERSE))]
7846 "TARGET_SIMD"
7847 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
7848 [(set_attr "type" "neon_rev<q>")]
7849 )
7850
7851 (define_insn "aarch64_st2<vstruct_elt>_dreg"
7852 [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
7853 (unspec:VSTRUCT_2DNX [
7854 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
7855 UNSPEC_ST2))]
7856 "TARGET_SIMD"
7857 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7858 [(set_attr "type" "neon_store2_2reg")]
7859 )
7860
7861 (define_insn "aarch64_st2<vstruct_elt>_dreg"
7862 [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
7863 (unspec:VSTRUCT_2DX [
7864 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
7865 UNSPEC_ST2))]
7866 "TARGET_SIMD"
7867 "st1\\t{%S1.1d - %T1.1d}, %0"
7868 [(set_attr "type" "neon_store1_2reg")]
7869 )
7870
7871 (define_insn "aarch64_st3<vstruct_elt>_dreg"
7872 [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
7873 (unspec:VSTRUCT_3DNX [
7874 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
7875 UNSPEC_ST3))]
7876 "TARGET_SIMD"
7877 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7878 [(set_attr "type" "neon_store3_3reg")]
7879 )
7880
7881 (define_insn "aarch64_st3<vstruct_elt>_dreg"
7882 [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
7883 (unspec:VSTRUCT_3DX [
7884 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
7885 UNSPEC_ST3))]
7886 "TARGET_SIMD"
7887 "st1\\t{%S1.1d - %U1.1d}, %0"
7888 [(set_attr "type" "neon_store1_3reg")]
7889 )
7890
7891 (define_insn "aarch64_st4<vstruct_elt>_dreg"
7892 [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
7893 (unspec:VSTRUCT_4DNX [
7894 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
7895 UNSPEC_ST4))]
7896 "TARGET_SIMD"
7897 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7898 [(set_attr "type" "neon_store4_4reg")]
7899 )
7900
7901 (define_insn "aarch64_st4<vstruct_elt>_dreg"
7902 [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
7903 (unspec:VSTRUCT_4DX [
7904 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
7905 UNSPEC_ST4))]
7906 "TARGET_SIMD"
7907 "st1\\t{%S1.1d - %V1.1d}, %0"
7908 [(set_attr "type" "neon_store1_4reg")]
7909 )
7910
7911 (define_expand "aarch64_st<nregs><vstruct_elt>"
7912 [(match_operand:DI 0 "register_operand")
7913 (match_operand:VSTRUCT_D 1 "register_operand")]
7914 "TARGET_SIMD"
7915 {
7916 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7917 emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
7918 DONE;
7919 })
7920
7921 (define_expand "aarch64_st<nregs><vstruct_elt>"
7922 [(match_operand:DI 0 "register_operand")
7923 (match_operand:VSTRUCT_Q 1 "register_operand")]
7924 "TARGET_SIMD"
7925 {
7926 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7927 emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
7928 DONE;
7929 })
7930
7931 (define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
7932 [(match_operand:DI 0 "register_operand")
7933 (match_operand:VSTRUCT_QD 1 "register_operand")
7934 (match_operand:SI 2 "immediate_operand")]
7935 "TARGET_SIMD"
7936 {
7937 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7938 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
7939
7940 aarch64_simd_lane_bounds (operands[2], 0,
7941 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
7942 emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
7943 operands[1], operands[2]));
7944 DONE;
7945 })
7946
7947 (define_expand "aarch64_st1<VALL_F16:mode>"
7948 [(match_operand:DI 0 "register_operand")
7949 (match_operand:VALL_F16 1 "register_operand")]
7950 "TARGET_SIMD"
7951 {
7952 machine_mode mode = <VALL_F16:MODE>mode;
7953 rtx mem = gen_rtx_MEM (mode, operands[0]);
7954
7955 if (BYTES_BIG_ENDIAN)
7956 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
7957 else
7958 emit_move_insn (mem, operands[1]);
7959 DONE;
7960 })
7961
7962 ;; Standard pattern name vec_init<mode><Vel>.
7963
7964 (define_expand "vec_init<mode><Vel>"
7965 [(match_operand:VALL_F16 0 "register_operand")
7966 (match_operand 1 "" "")]
7967 "TARGET_SIMD"
7968 {
7969 aarch64_expand_vector_init (operands[0], operands[1]);
7970 DONE;
7971 })
7972
7973 (define_expand "vec_init<mode><Vhalf>"
7974 [(match_operand:VQ_NO2E 0 "register_operand")
7975 (match_operand 1 "" "")]
7976 "TARGET_SIMD"
7977 {
7978 aarch64_expand_vector_init (operands[0], operands[1]);
7979 DONE;
7980 })
7981
7982 (define_insn "*aarch64_simd_ld1r<mode>"
7983 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7984 (vec_duplicate:VALL_F16
7985 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
7986 "TARGET_SIMD"
7987 "ld1r\\t{%0.<Vtype>}, %1"
7988 [(set_attr "type" "neon_load1_all_lanes")]
7989 )
7990
7991 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
7992 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7993 (unspec:VSTRUCT_2QD [
7994 (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
7995 UNSPEC_LD1))]
7996 "TARGET_SIMD"
7997 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7998 [(set_attr "type" "neon_load1_2reg<q>")]
7999 )
8000
8001
8002 (define_insn "@aarch64_frecpe<mode>"
8003 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8004 (unspec:VHSDF_HSDF
8005 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8006 UNSPEC_FRECPE))]
8007 "TARGET_SIMD"
8008 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8009 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8010 )
8011
8012 (define_insn "aarch64_frecpx<mode>"
8013 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8014 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8015 UNSPEC_FRECPX))]
8016 "TARGET_SIMD"
8017 "frecpx\t%<s>0, %<s>1"
8018 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8019 )
8020
8021 (define_insn "@aarch64_frecps<mode>"
8022 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8023 (unspec:VHSDF_HSDF
8024 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8025 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8026 UNSPEC_FRECPS))]
8027 "TARGET_SIMD"
8028 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8029 [(set_attr "type" "neon_fp_recps_<stype><q>")]
8030 )
8031
8032 (define_insn "aarch64_urecpe<mode>"
8033 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8034 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8035 UNSPEC_URECPE))]
8036 "TARGET_SIMD"
8037 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8038 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8039
8040 ;; Standard pattern name vec_extract<mode><Vel>.
8041
8042 (define_expand "vec_extract<mode><Vel>"
8043 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8044 (match_operand:VALL_F16 1 "register_operand")
8045 (match_operand:SI 2 "immediate_operand")]
8046 "TARGET_SIMD"
8047 {
8048 emit_insn
8049 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8050 DONE;
8051 })
8052
8053 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8054 (define_expand "vec_extract<mode><Vhalf>"
8055 [(match_operand:<VHALF> 0 "register_operand")
8056 (match_operand:VQMOV_NO2E 1 "register_operand")
8057 (match_operand 2 "immediate_operand")]
8058 "TARGET_SIMD"
8059 {
8060 int start = INTVAL (operands[2]);
8061 if (start != 0 && start != <nunits> / 2)
8062 FAIL;
8063 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8064 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8065 DONE;
8066 })
8067
8068 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8069 (define_expand "vec_extractv2dfv1df"
8070 [(match_operand:V1DF 0 "register_operand")
8071 (match_operand:V2DF 1 "register_operand")
8072 (match_operand 2 "immediate_operand")]
8073 "TARGET_SIMD"
8074 {
8075 /* V1DF is rarely used by other patterns, so it should be better to hide
8076 it in a subreg destination of a normal DF op. */
8077 rtx scalar0 = gen_lowpart (DFmode, operands[0]);
8078 emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
8079 DONE;
8080 })
8081
8082 ;; aes
8083
8084 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8085 [(set (match_operand:V16QI 0 "register_operand" "=w")
8086 (unspec:V16QI
8087 [(xor:V16QI
8088 (match_operand:V16QI 1 "register_operand" "%0")
8089 (match_operand:V16QI 2 "register_operand" "w"))]
8090 CRYPTO_AES))]
8091 "TARGET_SIMD && TARGET_AES"
8092 "aes<aes_op>\\t%0.16b, %2.16b"
8093 [(set_attr "type" "crypto_aese")]
8094 )
8095
8096 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8097 [(set (match_operand:V16QI 0 "register_operand" "=w")
8098 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8099 CRYPTO_AESMC))]
8100 "TARGET_SIMD && TARGET_AES"
8101 "aes<aesmc_op>\\t%0.16b, %1.16b"
8102 [(set_attr "type" "crypto_aesmc")]
8103 )
8104
8105 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8106 ;; and enforce the register dependency without scheduling or register
8107 ;; allocation messing up the order or introducing moves inbetween.
8108 ;; Mash the two together during combine.
8109
8110 (define_insn "*aarch64_crypto_aese_fused"
8111 [(set (match_operand:V16QI 0 "register_operand" "=w")
8112 (unspec:V16QI
8113 [(unspec:V16QI
8114 [(xor:V16QI
8115 (match_operand:V16QI 1 "register_operand" "%0")
8116 (match_operand:V16QI 2 "register_operand" "w"))]
8117 UNSPEC_AESE)]
8118 UNSPEC_AESMC))]
8119 "TARGET_SIMD && TARGET_AES
8120 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8121 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8122 [(set_attr "type" "crypto_aese")
8123 (set_attr "length" "8")]
8124 )
8125
8126 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8127 ;; and enforce the register dependency without scheduling or register
8128 ;; allocation messing up the order or introducing moves inbetween.
8129 ;; Mash the two together during combine.
8130
8131 (define_insn "*aarch64_crypto_aesd_fused"
8132 [(set (match_operand:V16QI 0 "register_operand" "=w")
8133 (unspec:V16QI
8134 [(unspec:V16QI
8135 [(xor:V16QI
8136 (match_operand:V16QI 1 "register_operand" "%0")
8137 (match_operand:V16QI 2 "register_operand" "w"))]
8138 UNSPEC_AESD)]
8139 UNSPEC_AESIMC))]
8140 "TARGET_SIMD && TARGET_AES
8141 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8142 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8143 [(set_attr "type" "crypto_aese")
8144 (set_attr "length" "8")]
8145 )
8146
8147 ;; sha1
8148
8149 (define_insn "aarch64_crypto_sha1hsi"
8150 [(set (match_operand:SI 0 "register_operand" "=w")
8151 (unspec:SI [(match_operand:SI 1
8152 "register_operand" "w")]
8153 UNSPEC_SHA1H))]
8154 "TARGET_SIMD && TARGET_SHA2"
8155 "sha1h\\t%s0, %s1"
8156 [(set_attr "type" "crypto_sha1_fast")]
8157 )
8158
8159 (define_insn "aarch64_crypto_sha1hv4si"
8160 [(set (match_operand:SI 0 "register_operand" "=w")
8161 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8162 (parallel [(const_int 0)]))]
8163 UNSPEC_SHA1H))]
8164 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8165 "sha1h\\t%s0, %s1"
8166 [(set_attr "type" "crypto_sha1_fast")]
8167 )
8168
8169 (define_insn "aarch64_be_crypto_sha1hv4si"
8170 [(set (match_operand:SI 0 "register_operand" "=w")
8171 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8172 (parallel [(const_int 3)]))]
8173 UNSPEC_SHA1H))]
8174 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
8175 "sha1h\\t%s0, %s1"
8176 [(set_attr "type" "crypto_sha1_fast")]
8177 )
8178
8179 (define_insn "aarch64_crypto_sha1su1v4si"
8180 [(set (match_operand:V4SI 0 "register_operand" "=w")
8181 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8182 (match_operand:V4SI 2 "register_operand" "w")]
8183 UNSPEC_SHA1SU1))]
8184 "TARGET_SIMD && TARGET_SHA2"
8185 "sha1su1\\t%0.4s, %2.4s"
8186 [(set_attr "type" "crypto_sha1_fast")]
8187 )
8188
8189 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8190 [(set (match_operand:V4SI 0 "register_operand" "=w")
8191 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8192 (match_operand:SI 2 "register_operand" "w")
8193 (match_operand:V4SI 3 "register_operand" "w")]
8194 CRYPTO_SHA1))]
8195 "TARGET_SIMD && TARGET_SHA2"
8196 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
8197 [(set_attr "type" "crypto_sha1_slow")]
8198 )
8199
8200 (define_insn "aarch64_crypto_sha1su0v4si"
8201 [(set (match_operand:V4SI 0 "register_operand" "=w")
8202 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8203 (match_operand:V4SI 2 "register_operand" "w")
8204 (match_operand:V4SI 3 "register_operand" "w")]
8205 UNSPEC_SHA1SU0))]
8206 "TARGET_SIMD && TARGET_SHA2"
8207 "sha1su0\\t%0.4s, %2.4s, %3.4s"
8208 [(set_attr "type" "crypto_sha1_xor")]
8209 )
8210
8211 ;; sha256
8212
8213 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8214 [(set (match_operand:V4SI 0 "register_operand" "=w")
8215 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8216 (match_operand:V4SI 2 "register_operand" "w")
8217 (match_operand:V4SI 3 "register_operand" "w")]
8218 CRYPTO_SHA256))]
8219 "TARGET_SIMD && TARGET_SHA2"
8220 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
8221 [(set_attr "type" "crypto_sha256_slow")]
8222 )
8223
8224 (define_insn "aarch64_crypto_sha256su0v4si"
8225 [(set (match_operand:V4SI 0 "register_operand" "=w")
8226 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8227 (match_operand:V4SI 2 "register_operand" "w")]
8228 UNSPEC_SHA256SU0))]
8229 "TARGET_SIMD && TARGET_SHA2"
8230 "sha256su0\\t%0.4s, %2.4s"
8231 [(set_attr "type" "crypto_sha256_fast")]
8232 )
8233
8234 (define_insn "aarch64_crypto_sha256su1v4si"
8235 [(set (match_operand:V4SI 0 "register_operand" "=w")
8236 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8237 (match_operand:V4SI 2 "register_operand" "w")
8238 (match_operand:V4SI 3 "register_operand" "w")]
8239 UNSPEC_SHA256SU1))]
8240 "TARGET_SIMD && TARGET_SHA2"
8241 "sha256su1\\t%0.4s, %2.4s, %3.4s"
8242 [(set_attr "type" "crypto_sha256_slow")]
8243 )
8244
8245 ;; sha512
8246
8247 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8248 [(set (match_operand:V2DI 0 "register_operand" "=w")
8249 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8250 (match_operand:V2DI 2 "register_operand" "w")
8251 (match_operand:V2DI 3 "register_operand" "w")]
8252 CRYPTO_SHA512))]
8253 "TARGET_SIMD && TARGET_SHA3"
8254 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
8255 [(set_attr "type" "crypto_sha512")]
8256 )
8257
8258 (define_insn "aarch64_crypto_sha512su0qv2di"
8259 [(set (match_operand:V2DI 0 "register_operand" "=w")
8260 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8261 (match_operand:V2DI 2 "register_operand" "w")]
8262 UNSPEC_SHA512SU0))]
8263 "TARGET_SIMD && TARGET_SHA3"
8264 "sha512su0\\t%0.2d, %2.2d"
8265 [(set_attr "type" "crypto_sha512")]
8266 )
8267
8268 (define_insn "aarch64_crypto_sha512su1qv2di"
8269 [(set (match_operand:V2DI 0 "register_operand" "=w")
8270 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8271 (match_operand:V2DI 2 "register_operand" "w")
8272 (match_operand:V2DI 3 "register_operand" "w")]
8273 UNSPEC_SHA512SU1))]
8274 "TARGET_SIMD && TARGET_SHA3"
8275 "sha512su1\\t%0.2d, %2.2d, %3.2d"
8276 [(set_attr "type" "crypto_sha512")]
8277 )
8278
8279 ;; sha3
8280
8281 (define_insn "eor3q<mode>4"
8282 [(set (match_operand:VQ_I 0 "register_operand" "=w")
8283 (xor:VQ_I
8284 (xor:VQ_I
8285 (match_operand:VQ_I 2 "register_operand" "w")
8286 (match_operand:VQ_I 3 "register_operand" "w"))
8287 (match_operand:VQ_I 1 "register_operand" "w")))]
8288 "TARGET_SIMD && TARGET_SHA3"
8289 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
8290 [(set_attr "type" "crypto_sha3")]
8291 )
8292
8293 (define_insn "aarch64_rax1qv2di"
8294 [(set (match_operand:V2DI 0 "register_operand" "=w")
8295 (xor:V2DI
8296 (rotate:V2DI
8297 (match_operand:V2DI 2 "register_operand" "w")
8298 (const_int 1))
8299 (match_operand:V2DI 1 "register_operand" "w")))]
8300 "TARGET_SIMD && TARGET_SHA3"
8301 "rax1\\t%0.2d, %1.2d, %2.2d"
8302 [(set_attr "type" "crypto_sha3")]
8303 )
8304
8305 (define_insn "aarch64_xarqv2di"
8306 [(set (match_operand:V2DI 0 "register_operand" "=w")
8307 (rotatert:V2DI
8308 (xor:V2DI
8309 (match_operand:V2DI 1 "register_operand" "%w")
8310 (match_operand:V2DI 2 "register_operand" "w"))
8311 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
8312 "TARGET_SIMD && TARGET_SHA3"
8313 "xar\\t%0.2d, %1.2d, %2.2d, %3"
8314 [(set_attr "type" "crypto_sha3")]
8315 )
8316
8317 (define_insn "bcaxq<mode>4"
8318 [(set (match_operand:VQ_I 0 "register_operand" "=w")
8319 (xor:VQ_I
8320 (and:VQ_I
8321 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
8322 (match_operand:VQ_I 2 "register_operand" "w"))
8323 (match_operand:VQ_I 1 "register_operand" "w")))]
8324 "TARGET_SIMD && TARGET_SHA3"
8325 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
8326 [(set_attr "type" "crypto_sha3")]
8327 )
8328
8329 ;; SM3
8330
8331 (define_insn "aarch64_sm3ss1qv4si"
8332 [(set (match_operand:V4SI 0 "register_operand" "=w")
8333 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8334 (match_operand:V4SI 2 "register_operand" "w")
8335 (match_operand:V4SI 3 "register_operand" "w")]
8336 UNSPEC_SM3SS1))]
8337 "TARGET_SIMD && TARGET_SM4"
8338 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
8339 [(set_attr "type" "crypto_sm3")]
8340 )
8341
8342
8343 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
8344 [(set (match_operand:V4SI 0 "register_operand" "=w")
8345 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8346 (match_operand:V4SI 2 "register_operand" "w")
8347 (match_operand:V4SI 3 "register_operand" "w")
8348 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
8349 CRYPTO_SM3TT))]
8350 "TARGET_SIMD && TARGET_SM4"
8351 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
8352 [(set_attr "type" "crypto_sm3")]
8353 )
8354
8355 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
8356 [(set (match_operand:V4SI 0 "register_operand" "=w")
8357 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8358 (match_operand:V4SI 2 "register_operand" "w")
8359 (match_operand:V4SI 3 "register_operand" "w")]
8360 CRYPTO_SM3PART))]
8361 "TARGET_SIMD && TARGET_SM4"
8362 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
8363 [(set_attr "type" "crypto_sm3")]
8364 )
8365
8366 ;; SM4
8367
8368 (define_insn "aarch64_sm4eqv4si"
8369 [(set (match_operand:V4SI 0 "register_operand" "=w")
8370 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8371 (match_operand:V4SI 2 "register_operand" "w")]
8372 UNSPEC_SM4E))]
8373 "TARGET_SIMD && TARGET_SM4"
8374 "sm4e\\t%0.4s, %2.4s"
8375 [(set_attr "type" "crypto_sm4")]
8376 )
8377
8378 (define_insn "aarch64_sm4ekeyqv4si"
8379 [(set (match_operand:V4SI 0 "register_operand" "=w")
8380 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8381 (match_operand:V4SI 2 "register_operand" "w")]
8382 UNSPEC_SM4EKEY))]
8383 "TARGET_SIMD && TARGET_SM4"
8384 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
8385 [(set_attr "type" "crypto_sm4")]
8386 )
8387
8388 ;; fp16fml
8389
8390 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
8391 [(set (match_operand:VDQSF 0 "register_operand")
8392 (unspec:VDQSF
8393 [(match_operand:VDQSF 1 "register_operand")
8394 (match_operand:<VFMLA_W> 2 "register_operand")
8395 (match_operand:<VFMLA_W> 3 "register_operand")]
8396 VFMLA16_LOW))]
8397 "TARGET_F16FML"
8398 {
8399 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
8400 <nunits> * 2, false);
8401 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
8402 <nunits> * 2, false);
8403
8404 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
8405 operands[1],
8406 operands[2],
8407 operands[3],
8408 p1, p2));
8409 DONE;
8410
8411 })
8412
8413 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
8414 [(set (match_operand:VDQSF 0 "register_operand")
8415 (unspec:VDQSF
8416 [(match_operand:VDQSF 1 "register_operand")
8417 (match_operand:<VFMLA_W> 2 "register_operand")
8418 (match_operand:<VFMLA_W> 3 "register_operand")]
8419 VFMLA16_HIGH))]
8420 "TARGET_F16FML"
8421 {
8422 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
8423 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
8424
8425 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
8426 operands[1],
8427 operands[2],
8428 operands[3],
8429 p1, p2));
8430 DONE;
8431 })
8432
8433 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
8434 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8435 (fma:VDQSF
8436 (float_extend:VDQSF
8437 (vec_select:<VFMLA_SEL_W>
8438 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8439 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
8440 (float_extend:VDQSF
8441 (vec_select:<VFMLA_SEL_W>
8442 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8443 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
8444 (match_operand:VDQSF 1 "register_operand" "0")))]
8445 "TARGET_F16FML"
8446 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8447 [(set_attr "type" "neon_fp_mul_s")]
8448 )
8449
8450 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
8451 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8452 (fma:VDQSF
8453 (float_extend:VDQSF
8454 (neg:<VFMLA_SEL_W>
8455 (vec_select:<VFMLA_SEL_W>
8456 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8457 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
8458 (float_extend:VDQSF
8459 (vec_select:<VFMLA_SEL_W>
8460 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8461 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
8462 (match_operand:VDQSF 1 "register_operand" "0")))]
8463 "TARGET_F16FML"
8464 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8465 [(set_attr "type" "neon_fp_mul_s")]
8466 )
8467
8468 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
8469 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8470 (fma:VDQSF
8471 (float_extend:VDQSF
8472 (vec_select:<VFMLA_SEL_W>
8473 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8474 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
8475 (float_extend:VDQSF
8476 (vec_select:<VFMLA_SEL_W>
8477 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8478 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
8479 (match_operand:VDQSF 1 "register_operand" "0")))]
8480 "TARGET_F16FML"
8481 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8482 [(set_attr "type" "neon_fp_mul_s")]
8483 )
8484
8485 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
8486 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8487 (fma:VDQSF
8488 (float_extend:VDQSF
8489 (neg:<VFMLA_SEL_W>
8490 (vec_select:<VFMLA_SEL_W>
8491 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8492 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
8493 (float_extend:VDQSF
8494 (vec_select:<VFMLA_SEL_W>
8495 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8496 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
8497 (match_operand:VDQSF 1 "register_operand" "0")))]
8498 "TARGET_F16FML"
8499 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8500 [(set_attr "type" "neon_fp_mul_s")]
8501 )
8502
8503 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
8504 [(set (match_operand:V2SF 0 "register_operand")
8505 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8506 (match_operand:V4HF 2 "register_operand")
8507 (match_operand:V4HF 3 "register_operand")
8508 (match_operand:SI 4 "aarch64_imm2")]
8509 VFMLA16_LOW))]
8510 "TARGET_F16FML"
8511 {
8512 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
8513 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8514
8515 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
8516 operands[1],
8517 operands[2],
8518 operands[3],
8519 p1, lane));
8520 DONE;
8521 }
8522 )
8523
8524 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
8525 [(set (match_operand:V2SF 0 "register_operand")
8526 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8527 (match_operand:V4HF 2 "register_operand")
8528 (match_operand:V4HF 3 "register_operand")
8529 (match_operand:SI 4 "aarch64_imm2")]
8530 VFMLA16_HIGH))]
8531 "TARGET_F16FML"
8532 {
8533 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
8534 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8535
8536 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
8537 operands[1],
8538 operands[2],
8539 operands[3],
8540 p1, lane));
8541 DONE;
8542 })
8543
8544 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
8545 [(set (match_operand:V2SF 0 "register_operand" "=w")
8546 (fma:V2SF
8547 (float_extend:V2SF
8548 (vec_select:V2HF
8549 (match_operand:V4HF 2 "register_operand" "w")
8550 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
8551 (float_extend:V2SF
8552 (vec_duplicate:V2HF
8553 (vec_select:HF
8554 (match_operand:V4HF 3 "register_operand" "x")
8555 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8556 (match_operand:V2SF 1 "register_operand" "0")))]
8557 "TARGET_F16FML"
8558 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
8559 [(set_attr "type" "neon_fp_mul_s")]
8560 )
8561
8562 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
8563 [(set (match_operand:V2SF 0 "register_operand" "=w")
8564 (fma:V2SF
8565 (float_extend:V2SF
8566 (neg:V2HF
8567 (vec_select:V2HF
8568 (match_operand:V4HF 2 "register_operand" "w")
8569 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
8570 (float_extend:V2SF
8571 (vec_duplicate:V2HF
8572 (vec_select:HF
8573 (match_operand:V4HF 3 "register_operand" "x")
8574 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8575 (match_operand:V2SF 1 "register_operand" "0")))]
8576 "TARGET_F16FML"
8577 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
8578 [(set_attr "type" "neon_fp_mul_s")]
8579 )
8580
8581 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
8582 [(set (match_operand:V2SF 0 "register_operand" "=w")
8583 (fma:V2SF
8584 (float_extend:V2SF
8585 (vec_select:V2HF
8586 (match_operand:V4HF 2 "register_operand" "w")
8587 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
8588 (float_extend:V2SF
8589 (vec_duplicate:V2HF
8590 (vec_select:HF
8591 (match_operand:V4HF 3 "register_operand" "x")
8592 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8593 (match_operand:V2SF 1 "register_operand" "0")))]
8594 "TARGET_F16FML"
8595 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
8596 [(set_attr "type" "neon_fp_mul_s")]
8597 )
8598
8599 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
8600 [(set (match_operand:V2SF 0 "register_operand" "=w")
8601 (fma:V2SF
8602 (float_extend:V2SF
8603 (neg:V2HF
8604 (vec_select:V2HF
8605 (match_operand:V4HF 2 "register_operand" "w")
8606 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
8607 (float_extend:V2SF
8608 (vec_duplicate:V2HF
8609 (vec_select:HF
8610 (match_operand:V4HF 3 "register_operand" "x")
8611 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8612 (match_operand:V2SF 1 "register_operand" "0")))]
8613 "TARGET_F16FML"
8614 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
8615 [(set_attr "type" "neon_fp_mul_s")]
8616 )
8617
8618 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
8619 [(set (match_operand:V4SF 0 "register_operand")
8620 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8621 (match_operand:V8HF 2 "register_operand")
8622 (match_operand:V8HF 3 "register_operand")
8623 (match_operand:SI 4 "aarch64_lane_imm3")]
8624 VFMLA16_LOW))]
8625 "TARGET_F16FML"
8626 {
8627 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
8628 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8629
8630 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
8631 operands[1],
8632 operands[2],
8633 operands[3],
8634 p1, lane));
8635 DONE;
8636 })
8637
8638 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
8639 [(set (match_operand:V4SF 0 "register_operand")
8640 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8641 (match_operand:V8HF 2 "register_operand")
8642 (match_operand:V8HF 3 "register_operand")
8643 (match_operand:SI 4 "aarch64_lane_imm3")]
8644 VFMLA16_HIGH))]
8645 "TARGET_F16FML"
8646 {
8647 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
8648 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8649
8650 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
8651 operands[1],
8652 operands[2],
8653 operands[3],
8654 p1, lane));
8655 DONE;
8656 })
8657
8658 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
8659 [(set (match_operand:V4SF 0 "register_operand" "=w")
8660 (fma:V4SF
8661 (float_extend:V4SF
8662 (vec_select:V4HF
8663 (match_operand:V8HF 2 "register_operand" "w")
8664 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
8665 (float_extend:V4SF
8666 (vec_duplicate:V4HF
8667 (vec_select:HF
8668 (match_operand:V8HF 3 "register_operand" "x")
8669 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8670 (match_operand:V4SF 1 "register_operand" "0")))]
8671 "TARGET_F16FML"
8672 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
8673 [(set_attr "type" "neon_fp_mul_s")]
8674 )
8675
8676 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
8677 [(set (match_operand:V4SF 0 "register_operand" "=w")
8678 (fma:V4SF
8679 (float_extend:V4SF
8680 (neg:V4HF
8681 (vec_select:V4HF
8682 (match_operand:V8HF 2 "register_operand" "w")
8683 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
8684 (float_extend:V4SF
8685 (vec_duplicate:V4HF
8686 (vec_select:HF
8687 (match_operand:V8HF 3 "register_operand" "x")
8688 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8689 (match_operand:V4SF 1 "register_operand" "0")))]
8690 "TARGET_F16FML"
8691 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
8692 [(set_attr "type" "neon_fp_mul_s")]
8693 )
8694
8695 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
8696 [(set (match_operand:V4SF 0 "register_operand" "=w")
8697 (fma:V4SF
8698 (float_extend:V4SF
8699 (vec_select:V4HF
8700 (match_operand:V8HF 2 "register_operand" "w")
8701 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
8702 (float_extend:V4SF
8703 (vec_duplicate:V4HF
8704 (vec_select:HF
8705 (match_operand:V8HF 3 "register_operand" "x")
8706 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8707 (match_operand:V4SF 1 "register_operand" "0")))]
8708 "TARGET_F16FML"
8709 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
8710 [(set_attr "type" "neon_fp_mul_s")]
8711 )
8712
8713 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
8714 [(set (match_operand:V4SF 0 "register_operand" "=w")
8715 (fma:V4SF
8716 (float_extend:V4SF
8717 (neg:V4HF
8718 (vec_select:V4HF
8719 (match_operand:V8HF 2 "register_operand" "w")
8720 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
8721 (float_extend:V4SF
8722 (vec_duplicate:V4HF
8723 (vec_select:HF
8724 (match_operand:V8HF 3 "register_operand" "x")
8725 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8726 (match_operand:V4SF 1 "register_operand" "0")))]
8727 "TARGET_F16FML"
8728 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
8729 [(set_attr "type" "neon_fp_mul_s")]
8730 )
8731
8732 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
8733 [(set (match_operand:V2SF 0 "register_operand")
8734 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8735 (match_operand:V4HF 2 "register_operand")
8736 (match_operand:V8HF 3 "register_operand")
8737 (match_operand:SI 4 "aarch64_lane_imm3")]
8738 VFMLA16_LOW))]
8739 "TARGET_F16FML"
8740 {
8741 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
8742 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8743
8744 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
8745 operands[1],
8746 operands[2],
8747 operands[3],
8748 p1, lane));
8749 DONE;
8750
8751 })
8752
8753 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
8754 [(set (match_operand:V2SF 0 "register_operand")
8755 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8756 (match_operand:V4HF 2 "register_operand")
8757 (match_operand:V8HF 3 "register_operand")
8758 (match_operand:SI 4 "aarch64_lane_imm3")]
8759 VFMLA16_HIGH))]
8760 "TARGET_F16FML"
8761 {
8762 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
8763 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8764
8765 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
8766 operands[1],
8767 operands[2],
8768 operands[3],
8769 p1, lane));
8770 DONE;
8771
8772 })
8773
8774 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
8775 [(set (match_operand:V2SF 0 "register_operand" "=w")
8776 (fma:V2SF
8777 (float_extend:V2SF
8778 (vec_select:V2HF
8779 (match_operand:V4HF 2 "register_operand" "w")
8780 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
8781 (float_extend:V2SF
8782 (vec_duplicate:V2HF
8783 (vec_select:HF
8784 (match_operand:V8HF 3 "register_operand" "x")
8785 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8786 (match_operand:V2SF 1 "register_operand" "0")))]
8787 "TARGET_F16FML"
8788 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
8789 [(set_attr "type" "neon_fp_mul_s")]
8790 )
8791
8792 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
8793 [(set (match_operand:V2SF 0 "register_operand" "=w")
8794 (fma:V2SF
8795 (float_extend:V2SF
8796 (neg:V2HF
8797 (vec_select:V2HF
8798 (match_operand:V4HF 2 "register_operand" "w")
8799 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
8800 (float_extend:V2SF
8801 (vec_duplicate:V2HF
8802 (vec_select:HF
8803 (match_operand:V8HF 3 "register_operand" "x")
8804 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8805 (match_operand:V2SF 1 "register_operand" "0")))]
8806 "TARGET_F16FML"
8807 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
8808 [(set_attr "type" "neon_fp_mul_s")]
8809 )
8810
8811 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
8812 [(set (match_operand:V2SF 0 "register_operand" "=w")
8813 (fma:V2SF
8814 (float_extend:V2SF
8815 (vec_select:V2HF
8816 (match_operand:V4HF 2 "register_operand" "w")
8817 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
8818 (float_extend:V2SF
8819 (vec_duplicate:V2HF
8820 (vec_select:HF
8821 (match_operand:V8HF 3 "register_operand" "x")
8822 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8823 (match_operand:V2SF 1 "register_operand" "0")))]
8824 "TARGET_F16FML"
8825 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
8826 [(set_attr "type" "neon_fp_mul_s")]
8827 )
8828
8829 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
8830 [(set (match_operand:V2SF 0 "register_operand" "=w")
8831 (fma:V2SF
8832 (float_extend:V2SF
8833 (neg:V2HF
8834 (vec_select:V2HF
8835 (match_operand:V4HF 2 "register_operand" "w")
8836 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
8837 (float_extend:V2SF
8838 (vec_duplicate:V2HF
8839 (vec_select:HF
8840 (match_operand:V8HF 3 "register_operand" "x")
8841 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8842 (match_operand:V2SF 1 "register_operand" "0")))]
8843 "TARGET_F16FML"
8844 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
8845 [(set_attr "type" "neon_fp_mul_s")]
8846 )
8847
8848 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
8849 [(set (match_operand:V4SF 0 "register_operand")
8850 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8851 (match_operand:V8HF 2 "register_operand")
8852 (match_operand:V4HF 3 "register_operand")
8853 (match_operand:SI 4 "aarch64_imm2")]
8854 VFMLA16_LOW))]
8855 "TARGET_F16FML"
8856 {
8857 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
8858 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8859
8860 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
8861 operands[1],
8862 operands[2],
8863 operands[3],
8864 p1, lane));
8865 DONE;
8866 })
8867
8868 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
8869 [(set (match_operand:V4SF 0 "register_operand")
8870 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8871 (match_operand:V8HF 2 "register_operand")
8872 (match_operand:V4HF 3 "register_operand")
8873 (match_operand:SI 4 "aarch64_imm2")]
8874 VFMLA16_HIGH))]
8875 "TARGET_F16FML"
8876 {
8877 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
8878 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8879
8880 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
8881 operands[1],
8882 operands[2],
8883 operands[3],
8884 p1, lane));
8885 DONE;
8886 })
8887
8888 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
8889 [(set (match_operand:V4SF 0 "register_operand" "=w")
8890 (fma:V4SF
8891 (float_extend:V4SF
8892 (vec_select:V4HF
8893 (match_operand:V8HF 2 "register_operand" "w")
8894 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
8895 (float_extend:V4SF
8896 (vec_duplicate:V4HF
8897 (vec_select:HF
8898 (match_operand:V4HF 3 "register_operand" "x")
8899 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8900 (match_operand:V4SF 1 "register_operand" "0")))]
8901 "TARGET_F16FML"
8902 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
8903 [(set_attr "type" "neon_fp_mul_s")]
8904 )
8905
8906 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
8907 [(set (match_operand:V4SF 0 "register_operand" "=w")
8908 (fma:V4SF
8909 (float_extend:V4SF
8910 (neg:V4HF
8911 (vec_select:V4HF
8912 (match_operand:V8HF 2 "register_operand" "w")
8913 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
8914 (float_extend:V4SF
8915 (vec_duplicate:V4HF
8916 (vec_select:HF
8917 (match_operand:V4HF 3 "register_operand" "x")
8918 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8919 (match_operand:V4SF 1 "register_operand" "0")))]
8920 "TARGET_F16FML"
8921 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
8922 [(set_attr "type" "neon_fp_mul_s")]
8923 )
8924
8925 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
8926 [(set (match_operand:V4SF 0 "register_operand" "=w")
8927 (fma:V4SF
8928 (float_extend:V4SF
8929 (vec_select:V4HF
8930 (match_operand:V8HF 2 "register_operand" "w")
8931 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
8932 (float_extend:V4SF
8933 (vec_duplicate:V4HF
8934 (vec_select:HF
8935 (match_operand:V4HF 3 "register_operand" "x")
8936 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8937 (match_operand:V4SF 1 "register_operand" "0")))]
8938 "TARGET_F16FML"
8939 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
8940 [(set_attr "type" "neon_fp_mul_s")]
8941 )
8942
8943 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
8944 [(set (match_operand:V4SF 0 "register_operand" "=w")
8945 (fma:V4SF
8946 (float_extend:V4SF
8947 (neg:V4HF
8948 (vec_select:V4HF
8949 (match_operand:V8HF 2 "register_operand" "w")
8950 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
8951 (float_extend:V4SF
8952 (vec_duplicate:V4HF
8953 (vec_select:HF
8954 (match_operand:V4HF 3 "register_operand" "x")
8955 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8956 (match_operand:V4SF 1 "register_operand" "0")))]
8957 "TARGET_F16FML"
8958 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
8959 [(set_attr "type" "neon_fp_mul_s")]
8960 )
8961
8962 ;; pmull
8963
8964 (define_insn "aarch64_crypto_pmulldi"
8965 [(set (match_operand:TI 0 "register_operand" "=w")
8966 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
8967 (match_operand:DI 2 "register_operand" "w")]
8968 UNSPEC_PMULL))]
8969 "TARGET_SIMD && TARGET_AES"
8970 "pmull\\t%0.1q, %1.1d, %2.1d"
8971 [(set_attr "type" "crypto_pmull")]
8972 )
8973
8974 (define_insn "aarch64_crypto_pmullv2di"
8975 [(set (match_operand:TI 0 "register_operand" "=w")
8976 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
8977 (match_operand:V2DI 2 "register_operand" "w")]
8978 UNSPEC_PMULL2))]
8979 "TARGET_SIMD && TARGET_AES"
8980 "pmull2\\t%0.1q, %1.2d, %2.2d"
8981 [(set_attr "type" "crypto_pmull")]
8982 )
8983
8984 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
8985 (define_insn "<optab><Vnarrowq><mode>2"
8986 [(set (match_operand:VQN 0 "register_operand" "=w")
8987 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
8988 "TARGET_SIMD"
8989 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
8990 [(set_attr "type" "neon_shift_imm_long")]
8991 )
8992
8993 (define_expand "aarch64_<su>xtl<mode>"
8994 [(set (match_operand:VQN 0 "register_operand" "=w")
8995 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
8996 "TARGET_SIMD"
8997 ""
8998 )
8999
9000 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9001 (define_insn "trunc<mode><Vnarrowq>2"
9002 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9003 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9004 "TARGET_SIMD"
9005 "xtn\t%0.<Vntype>, %1.<Vtype>"
9006 [(set_attr "type" "neon_move_narrow_q")]
9007 )
9008
9009 (define_insn "aarch64_bfdot<mode>"
9010 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9011 (plus:VDQSF
9012 (unspec:VDQSF
9013 [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9014 (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9015 UNSPEC_BFDOT)
9016 (match_operand:VDQSF 1 "register_operand" "0")))]
9017 "TARGET_BF16_SIMD"
9018 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9019 [(set_attr "type" "neon_dot<q>")]
9020 )
9021
9022 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9023 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9024 (plus:VDQSF
9025 (unspec:VDQSF
9026 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9027 (match_operand:VBF 3 "register_operand" "w")
9028 (match_operand:SI 4 "const_int_operand" "n")]
9029 UNSPEC_BFDOT)
9030 (match_operand:VDQSF 1 "register_operand" "0")))]
9031 "TARGET_BF16_SIMD"
9032 {
9033 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9034 int lane = INTVAL (operands[4]);
9035 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9036 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9037 }
9038 [(set_attr "type" "neon_dot<VDQSF:q>")]
9039 )
9040
9041 ;; vget_low/high_bf16
9042 (define_expand "aarch64_vget_lo_halfv8bf"
9043 [(match_operand:V4BF 0 "register_operand")
9044 (match_operand:V8BF 1 "register_operand")]
9045 "TARGET_BF16_SIMD"
9046 {
9047 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
9048 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9049 DONE;
9050 })
9051
9052 (define_expand "aarch64_vget_hi_halfv8bf"
9053 [(match_operand:V4BF 0 "register_operand")
9054 (match_operand:V8BF 1 "register_operand")]
9055 "TARGET_BF16_SIMD"
9056 {
9057 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
9058 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9059 DONE;
9060 })
9061
9062 ;; bfmmla
9063 (define_insn "aarch64_bfmmlaqv4sf"
9064 [(set (match_operand:V4SF 0 "register_operand" "=w")
9065 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9066 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9067 (match_operand:V8BF 3 "register_operand" "w")]
9068 UNSPEC_BFMMLA)))]
9069 "TARGET_BF16_SIMD"
9070 "bfmmla\\t%0.4s, %2.8h, %3.8h"
9071 [(set_attr "type" "neon_fp_mla_s_q")]
9072 )
9073
9074 ;; bfmlal<bt>
9075 (define_insn "aarch64_bfmlal<bt>v4sf"
9076 [(set (match_operand:V4SF 0 "register_operand" "=w")
9077 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9078 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9079 (match_operand:V8BF 3 "register_operand" "w")]
9080 BF_MLA)))]
9081 "TARGET_BF16_SIMD"
9082 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9083 [(set_attr "type" "neon_fp_mla_s_q")]
9084 )
9085
9086 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9087 [(set (match_operand:V4SF 0 "register_operand" "=w")
9088 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9089 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9090 (match_operand:VBF 3 "register_operand" "w")
9091 (match_operand:SI 4 "const_int_operand" "n")]
9092 BF_MLA)))]
9093 "TARGET_BF16_SIMD"
9094 {
9095 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9096 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9097 }
9098 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9099 )
9100
9101 ;; 8-bit integer matrix multiply-accumulate
9102 (define_insn "aarch64_simd_<sur>mmlav16qi"
9103 [(set (match_operand:V4SI 0 "register_operand" "=w")
9104 (plus:V4SI
9105 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9106 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9107 (match_operand:V4SI 1 "register_operand" "0")))]
9108 "TARGET_I8MM"
9109 "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9110 [(set_attr "type" "neon_mla_s_q")]
9111 )
9112
9113 ;; bfcvtn
9114 (define_insn "aarch64_bfcvtn<q><mode>"
9115 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9116 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9117 UNSPEC_BFCVTN))]
9118 "TARGET_BF16_SIMD"
9119 "bfcvtn\\t%0.4h, %1.4s"
9120 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9121 )
9122
9123 (define_insn "aarch64_bfcvtn2v8bf"
9124 [(set (match_operand:V8BF 0 "register_operand" "=w")
9125 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9126 (match_operand:V4SF 2 "register_operand" "w")]
9127 UNSPEC_BFCVTN2))]
9128 "TARGET_BF16_SIMD"
9129 "bfcvtn2\\t%0.8h, %2.4s"
9130 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9131 )
9132
9133 (define_insn "aarch64_bfcvtbf"
9134 [(set (match_operand:BF 0 "register_operand" "=w")
9135 (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9136 UNSPEC_BFCVT))]
9137 "TARGET_BF16_FP"
9138 "bfcvt\\t%h0, %s1"
9139 [(set_attr "type" "f_cvt")]
9140 )
9141
9142 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9143 (define_insn "aarch64_vbfcvt<mode>"
9144 [(set (match_operand:V4SF 0 "register_operand" "=w")
9145 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
9146 UNSPEC_BFCVTN))]
9147 "TARGET_BF16_SIMD"
9148 "shll\\t%0.4s, %1.4h, #16"
9149 [(set_attr "type" "neon_shift_imm_long")]
9150 )
9151
9152 (define_insn "aarch64_vbfcvt_highv8bf"
9153 [(set (match_operand:V4SF 0 "register_operand" "=w")
9154 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
9155 UNSPEC_BFCVTN2))]
9156 "TARGET_BF16_SIMD"
9157 "shll2\\t%0.4s, %1.8h, #16"
9158 [(set_attr "type" "neon_shift_imm_long")]
9159 )
9160
9161 (define_insn "aarch64_bfcvtsf"
9162 [(set (match_operand:SF 0 "register_operand" "=w")
9163 (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
9164 UNSPEC_BFCVT))]
9165 "TARGET_BF16_FP"
9166 "shl\\t%d0, %d1, #16"
9167 [(set_attr "type" "neon_shift_imm")]
9168 )