]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
[arch64] Fix ambiguous .md attribute uses
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
23 (match_operand:VALL_F16 1 "general_operand"))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand")
43 (match_operand:VALL 1 "general_operand"))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %1"
219 [(set_attr "type" "neon_ldp_q")]
220 )
221
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %0"
232 [(set_attr "type" "neon_stp_q")]
233 )
234
235
236 (define_split
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
242 [(const_int 0)]
243 {
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245 DONE;
246 })
247
248 (define_split
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254 [(const_int 0)]
255 {
256 aarch64_split_simd_move (operands[0], operands[1]);
257 DONE;
258 })
259
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
263 "TARGET_SIMD"
264 {
265 rtx dst = operands[0];
266 rtx src = operands[1];
267
268 if (GP_REGNUM_P (REGNO (src)))
269 {
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
272
273 emit_insn
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
275 emit_insn
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
277 }
278
279 else
280 {
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285
286 emit_insn
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
288 emit_insn
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
290 }
291 DONE;
292 }
293 )
294
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297 (vec_select:<VHALF>
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
301 "umov\t%0, %1.d[0]"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
304 ])
305
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308 (vec_select:<VHALF>
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
312 "umov\t%0, %1.d[1]"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
315 ])
316
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
324 )
325
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
333 )
334
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
339 "TARGET_SIMD"
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
342 )
343
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
348 "TARGET_SIMD"
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
351 )
352
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
357 "TARGET_SIMD"
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
360 )
361
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365 "TARGET_SIMD"
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
368 )
369
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373 UNSPEC_RBIT))]
374 "TARGET_SIMD"
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
377 )
378
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
382 "TARGET_SIMD"
383 {
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386 <MODE>mode, 0);
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389 DONE;
390 }
391 )
392
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
397 "TARGET_SIMD"
398 {
399
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
404
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
407
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
409
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
413
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
418 DONE;
419 }
420 )
421
422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
423 ;; fact that their usage need to guarantee that the source vectors are
424 ;; contiguous. It would be wrong to describe the operation without being able
425 ;; to describe the permute that is also required, but even if that is done
426 ;; the permute would have been created as a LOAD_LANES which means the values
427 ;; in the registers are in the wrong order.
428 (define_insn "aarch64_fcadd<rot><mode>"
429 [(set (match_operand:VHSDF 0 "register_operand" "=w")
430 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
431 (match_operand:VHSDF 2 "register_operand" "w")]
432 FCADD))]
433 "TARGET_COMPLEX"
434 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
435 [(set_attr "type" "neon_fcadd")]
436 )
437
438 (define_insn "aarch64_fcmla<rot><mode>"
439 [(set (match_operand:VHSDF 0 "register_operand" "=w")
440 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
441 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
442 (match_operand:VHSDF 3 "register_operand" "w")]
443 FCMLA)))]
444 "TARGET_COMPLEX"
445 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
446 [(set_attr "type" "neon_fcmla")]
447 )
448
449
450 (define_insn "aarch64_fcmla_lane<rot><mode>"
451 [(set (match_operand:VHSDF 0 "register_operand" "=w")
452 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
453 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
454 (match_operand:VHSDF 3 "register_operand" "w")
455 (match_operand:SI 4 "const_int_operand" "n")]
456 FCMLA)))]
457 "TARGET_COMPLEX"
458 {
459 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
460 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
461 }
462 [(set_attr "type" "neon_fcmla")]
463 )
464
465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
466 [(set (match_operand:V4HF 0 "register_operand" "=w")
467 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
468 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
469 (match_operand:V8HF 3 "register_operand" "w")
470 (match_operand:SI 4 "const_int_operand" "n")]
471 FCMLA)))]
472 "TARGET_COMPLEX"
473 {
474 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
475 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
476 }
477 [(set_attr "type" "neon_fcmla")]
478 )
479
480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
481 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
482 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
483 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
484 (match_operand:<VHALF> 3 "register_operand" "w")
485 (match_operand:SI 4 "const_int_operand" "n")]
486 FCMLA)))]
487 "TARGET_COMPLEX"
488 {
489 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
490 operands[4]
491 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
492 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
493 }
494 [(set_attr "type" "neon_fcmla")]
495 )
496
497 ;; These instructions map to the __builtins for the Dot Product operations.
498 (define_insn "aarch64_<sur>dot<vsi2qi>"
499 [(set (match_operand:VS 0 "register_operand" "=w")
500 (plus:VS (match_operand:VS 1 "register_operand" "0")
501 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
502 (match_operand:<VSI2QI> 3 "register_operand" "w")]
503 DOTPROD)))]
504 "TARGET_DOTPROD"
505 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
506 [(set_attr "type" "neon_dot<q>")]
507 )
508
509 ;; These expands map to the Dot Product optab the vectorizer checks for.
510 ;; The auto-vectorizer expects a dot product builtin that also does an
511 ;; accumulation into the provided register.
512 ;; Given the following pattern
513 ;;
514 ;; for (i=0; i<len; i++) {
515 ;; c = a[i] * b[i];
516 ;; r += c;
517 ;; }
518 ;; return result;
519 ;;
520 ;; This can be auto-vectorized to
521 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
522 ;;
523 ;; given enough iterations. However the vectorizer can keep unrolling the loop
524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
526 ;; ...
527 ;;
528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
529 (define_expand "<sur>dot_prod<vsi2qi>"
530 [(set (match_operand:VS 0 "register_operand")
531 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
532 (match_operand:<VSI2QI> 2 "register_operand")]
533 DOTPROD)
534 (match_operand:VS 3 "register_operand")))]
535 "TARGET_DOTPROD"
536 {
537 emit_insn (
538 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
539 operands[2]));
540 emit_insn (gen_rtx_SET (operands[0], operands[3]));
541 DONE;
542 })
543
544 ;; These instructions map to the __builtins for the Dot Product
545 ;; indexed operations.
546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
547 [(set (match_operand:VS 0 "register_operand" "=w")
548 (plus:VS (match_operand:VS 1 "register_operand" "0")
549 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
550 (match_operand:V8QI 3 "register_operand" "<h_con>")
551 (match_operand:SI 4 "immediate_operand" "i")]
552 DOTPROD)))]
553 "TARGET_DOTPROD"
554 {
555 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
556 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
557 }
558 [(set_attr "type" "neon_dot<q>")]
559 )
560
561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
562 [(set (match_operand:VS 0 "register_operand" "=w")
563 (plus:VS (match_operand:VS 1 "register_operand" "0")
564 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
565 (match_operand:V16QI 3 "register_operand" "<h_con>")
566 (match_operand:SI 4 "immediate_operand" "i")]
567 DOTPROD)))]
568 "TARGET_DOTPROD"
569 {
570 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
571 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
572 }
573 [(set_attr "type" "neon_dot<q>")]
574 )
575
576 (define_expand "copysign<mode>3"
577 [(match_operand:VHSDF 0 "register_operand")
578 (match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
580 "TARGET_FLOAT && TARGET_SIMD"
581 {
582 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
583 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
584
585 emit_move_insn (v_bitmask,
586 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
587 HOST_WIDE_INT_M1U << bits));
588 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
589 operands[2], operands[1]));
590 DONE;
591 }
592 )
593
594 (define_insn "*aarch64_mul3_elt<mode>"
595 [(set (match_operand:VMUL 0 "register_operand" "=w")
596 (mult:VMUL
597 (vec_duplicate:VMUL
598 (vec_select:<VEL>
599 (match_operand:VMUL 1 "register_operand" "<h_con>")
600 (parallel [(match_operand:SI 2 "immediate_operand")])))
601 (match_operand:VMUL 3 "register_operand" "w")))]
602 "TARGET_SIMD"
603 {
604 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
605 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
606 }
607 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
608 )
609
610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
611 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
612 (mult:VMUL_CHANGE_NLANES
613 (vec_duplicate:VMUL_CHANGE_NLANES
614 (vec_select:<VEL>
615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
616 (parallel [(match_operand:SI 2 "immediate_operand")])))
617 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
618 "TARGET_SIMD"
619 {
620 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
621 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
622 }
623 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
624 )
625
626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
627 [(set (match_operand:VMUL 0 "register_operand" "=w")
628 (mult:VMUL
629 (vec_duplicate:VMUL
630 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
631 (match_operand:VMUL 2 "register_operand" "w")))]
632 "TARGET_SIMD"
633 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
634 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
635 )
636
637 (define_insn "@aarch64_rsqrte<mode>"
638 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
639 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
640 UNSPEC_RSQRTE))]
641 "TARGET_SIMD"
642 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
643 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
644
645 (define_insn "@aarch64_rsqrts<mode>"
646 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
647 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
648 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
649 UNSPEC_RSQRTS))]
650 "TARGET_SIMD"
651 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
652 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
653
654 (define_expand "rsqrt<mode>2"
655 [(set (match_operand:VALLF 0 "register_operand")
656 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
657 UNSPEC_RSQRT))]
658 "TARGET_SIMD"
659 {
660 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
661 DONE;
662 })
663
664 (define_insn "*aarch64_mul3_elt_to_64v2df"
665 [(set (match_operand:DF 0 "register_operand" "=w")
666 (mult:DF
667 (vec_select:DF
668 (match_operand:V2DF 1 "register_operand" "w")
669 (parallel [(match_operand:SI 2 "immediate_operand")]))
670 (match_operand:DF 3 "register_operand" "w")))]
671 "TARGET_SIMD"
672 {
673 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
674 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
675 }
676 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
677 )
678
679 (define_insn "neg<mode>2"
680 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
682 "TARGET_SIMD"
683 "neg\t%0.<Vtype>, %1.<Vtype>"
684 [(set_attr "type" "neon_neg<q>")]
685 )
686
687 (define_insn "abs<mode>2"
688 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
689 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
690 "TARGET_SIMD"
691 "abs\t%0.<Vtype>, %1.<Vtype>"
692 [(set_attr "type" "neon_abs<q>")]
693 )
694
695 ;; The intrinsic version of integer ABS must not be allowed to
696 ;; combine with any operation with an integerated ABS step, such
697 ;; as SABD.
698 (define_insn "aarch64_abs<mode>"
699 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
700 (unspec:VSDQ_I_DI
701 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
702 UNSPEC_ABS))]
703 "TARGET_SIMD"
704 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
705 [(set_attr "type" "neon_abs<q>")]
706 )
707
708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
709 ;; This isn't accurate as ABS treats always its input as a signed value.
710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
713 (define_insn "aarch64_<su>abd<mode>_3"
714 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
715 (minus:VDQ_BHSI
716 (USMAX:VDQ_BHSI
717 (match_operand:VDQ_BHSI 1 "register_operand" "w")
718 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
719 (<max_opp>:VDQ_BHSI
720 (match_dup 1)
721 (match_dup 2))))]
722 "TARGET_SIMD"
723 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
724 [(set_attr "type" "neon_abd<q>")]
725 )
726
727 (define_insn "aarch64_<sur>abdl2<mode>_3"
728 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
729 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
730 (match_operand:VDQV_S 2 "register_operand" "w")]
731 ABDL2))]
732 "TARGET_SIMD"
733 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
734 [(set_attr "type" "neon_abd<q>")]
735 )
736
737 (define_insn "aarch64_<sur>abal<mode>_4"
738 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
739 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
740 (match_operand:VDQV_S 2 "register_operand" "w")
741 (match_operand:<VDBLW> 3 "register_operand" "0")]
742 ABAL))]
743 "TARGET_SIMD"
744 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
745 [(set_attr "type" "neon_arith_acc<q>")]
746 )
747
748 (define_insn "aarch64_<sur>adalp<mode>_3"
749 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
750 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
751 (match_operand:<VDBLW> 2 "register_operand" "0")]
752 ADALP))]
753 "TARGET_SIMD"
754 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
755 [(set_attr "type" "neon_reduc_add<q>")]
756 )
757
758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
759 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
760 ;; reduction of the difference into a V4SI vector and accumulate that into
761 ;; operand 3 before copying that into the result operand 0.
762 ;; Perform that with a sequence of:
763 ;; UABDL2 tmp.8h, op1.16b, op2.16b
764 ;; UABAL tmp.8h, op1.16b, op2.16b
765 ;; UADALP op3.4s, tmp.8h
766 ;; MOV op0, op3 // should be eliminated in later passes.
767 ;;
768 ;; For TARGET_DOTPROD we do:
769 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
770 ;; UABD tmp2.16b, op1.16b, op2.16b
771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
772 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
773 ;;
774 ;; The signed version just uses the signed variants of the above instructions
775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
776 ;; unsigned.
777
778 (define_expand "<sur>sadv16qi"
779 [(use (match_operand:V4SI 0 "register_operand"))
780 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
781 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
782 (use (match_operand:V4SI 3 "register_operand"))]
783 "TARGET_SIMD"
784 {
785 if (TARGET_DOTPROD)
786 {
787 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
788 rtx abd = gen_reg_rtx (V16QImode);
789 emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
790 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
791 abd, ones));
792 DONE;
793 }
794 rtx reduc = gen_reg_rtx (V8HImode);
795 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
796 operands[2]));
797 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
798 operands[2], reduc));
799 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
800 operands[3]));
801 emit_move_insn (operands[0], operands[3]);
802 DONE;
803 }
804 )
805
806 (define_insn "aba<mode>_3"
807 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
808 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
809 (match_operand:VDQ_BHSI 1 "register_operand" "w")
810 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
811 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
812 "TARGET_SIMD"
813 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
814 [(set_attr "type" "neon_arith_acc<q>")]
815 )
816
817 (define_insn "fabd<mode>3"
818 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
819 (abs:VHSDF_HSDF
820 (minus:VHSDF_HSDF
821 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
822 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
823 "TARGET_SIMD"
824 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
825 [(set_attr "type" "neon_fp_abd_<stype><q>")]
826 )
827
828 ;; For AND (vector, register) and BIC (vector, immediate)
829 (define_insn "and<mode>3"
830 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
831 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
832 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
833 "TARGET_SIMD"
834 {
835 switch (which_alternative)
836 {
837 case 0:
838 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
839 case 1:
840 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
841 AARCH64_CHECK_BIC);
842 default:
843 gcc_unreachable ();
844 }
845 }
846 [(set_attr "type" "neon_logic<q>")]
847 )
848
849 ;; For ORR (vector, register) and ORR (vector, immediate)
850 (define_insn "ior<mode>3"
851 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
852 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
853 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
854 "TARGET_SIMD"
855 {
856 switch (which_alternative)
857 {
858 case 0:
859 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
860 case 1:
861 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
862 AARCH64_CHECK_ORR);
863 default:
864 gcc_unreachable ();
865 }
866 }
867 [(set_attr "type" "neon_logic<q>")]
868 )
869
870 (define_insn "xor<mode>3"
871 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
872 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
873 (match_operand:VDQ_I 2 "register_operand" "w")))]
874 "TARGET_SIMD"
875 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
876 [(set_attr "type" "neon_logic<q>")]
877 )
878
879 (define_insn "one_cmpl<mode>2"
880 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
881 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
882 "TARGET_SIMD"
883 "not\t%0.<Vbtype>, %1.<Vbtype>"
884 [(set_attr "type" "neon_logic<q>")]
885 )
886
887 (define_insn "aarch64_simd_vec_set<mode>"
888 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
889 (vec_merge:VALL_F16
890 (vec_duplicate:VALL_F16
891 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
892 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
893 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
894 "TARGET_SIMD"
895 {
896 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
897 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
898 switch (which_alternative)
899 {
900 case 0:
901 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
902 case 1:
903 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
904 case 2:
905 return "ld1\\t{%0.<Vetype>}[%p2], %1";
906 default:
907 gcc_unreachable ();
908 }
909 }
910 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
911 )
912
913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
914 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
915 (vec_merge:VALL_F16
916 (vec_duplicate:VALL_F16
917 (vec_select:<VEL>
918 (match_operand:VALL_F16 3 "register_operand" "w")
919 (parallel
920 [(match_operand:SI 4 "immediate_operand" "i")])))
921 (match_operand:VALL_F16 1 "register_operand" "0")
922 (match_operand:SI 2 "immediate_operand" "i")))]
923 "TARGET_SIMD"
924 {
925 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
926 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
927 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
928
929 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
930 }
931 [(set_attr "type" "neon_ins<q>")]
932 )
933
934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
935 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
936 (vec_merge:VALL_F16_NO_V2Q
937 (vec_duplicate:VALL_F16_NO_V2Q
938 (vec_select:<VEL>
939 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
940 (parallel
941 [(match_operand:SI 4 "immediate_operand" "i")])))
942 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
943 (match_operand:SI 2 "immediate_operand" "i")))]
944 "TARGET_SIMD"
945 {
946 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
947 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
948 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
949 INTVAL (operands[4]));
950
951 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
952 }
953 [(set_attr "type" "neon_ins<q>")]
954 )
955
956 (define_expand "signbit<mode>2"
957 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
958 (use (match_operand:VDQSF 1 "register_operand"))]
959 "TARGET_SIMD"
960 {
961 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
962 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
963 shift_amount);
964 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
965
966 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
967 shift_vector));
968 DONE;
969 })
970
971 (define_insn "aarch64_simd_lshr<mode>"
972 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
973 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
974 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
975 "TARGET_SIMD"
976 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
977 [(set_attr "type" "neon_shift_imm<q>")]
978 )
979
980 (define_insn "aarch64_simd_ashr<mode>"
981 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
982 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
983 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
984 "TARGET_SIMD"
985 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
986 [(set_attr "type" "neon_shift_imm<q>")]
987 )
988
989 (define_insn "aarch64_simd_imm_shl<mode>"
990 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
991 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
992 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
993 "TARGET_SIMD"
994 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
995 [(set_attr "type" "neon_shift_imm<q>")]
996 )
997
998 (define_insn "aarch64_simd_reg_sshl<mode>"
999 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1000 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1001 (match_operand:VDQ_I 2 "register_operand" "w")))]
1002 "TARGET_SIMD"
1003 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1004 [(set_attr "type" "neon_shift_reg<q>")]
1005 )
1006
1007 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1008 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1009 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1010 (match_operand:VDQ_I 2 "register_operand" "w")]
1011 UNSPEC_ASHIFT_UNSIGNED))]
1012 "TARGET_SIMD"
1013 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1014 [(set_attr "type" "neon_shift_reg<q>")]
1015 )
1016
1017 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1018 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1019 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1020 (match_operand:VDQ_I 2 "register_operand" "w")]
1021 UNSPEC_ASHIFT_SIGNED))]
1022 "TARGET_SIMD"
1023 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1024 [(set_attr "type" "neon_shift_reg<q>")]
1025 )
1026
1027 (define_expand "ashl<mode>3"
1028 [(match_operand:VDQ_I 0 "register_operand")
1029 (match_operand:VDQ_I 1 "register_operand")
1030 (match_operand:SI 2 "general_operand")]
1031 "TARGET_SIMD"
1032 {
1033 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1034 int shift_amount;
1035
1036 if (CONST_INT_P (operands[2]))
1037 {
1038 shift_amount = INTVAL (operands[2]);
1039 if (shift_amount >= 0 && shift_amount < bit_width)
1040 {
1041 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1042 shift_amount);
1043 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1044 operands[1],
1045 tmp));
1046 DONE;
1047 }
1048 else
1049 {
1050 operands[2] = force_reg (SImode, operands[2]);
1051 }
1052 }
1053 else if (MEM_P (operands[2]))
1054 {
1055 operands[2] = force_reg (SImode, operands[2]);
1056 }
1057
1058 if (REG_P (operands[2]))
1059 {
1060 rtx tmp = gen_reg_rtx (<MODE>mode);
1061 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1062 convert_to_mode (<VEL>mode,
1063 operands[2],
1064 0)));
1065 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1066 tmp));
1067 DONE;
1068 }
1069 else
1070 FAIL;
1071 }
1072 )
1073
1074 (define_expand "lshr<mode>3"
1075 [(match_operand:VDQ_I 0 "register_operand")
1076 (match_operand:VDQ_I 1 "register_operand")
1077 (match_operand:SI 2 "general_operand")]
1078 "TARGET_SIMD"
1079 {
1080 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1081 int shift_amount;
1082
1083 if (CONST_INT_P (operands[2]))
1084 {
1085 shift_amount = INTVAL (operands[2]);
1086 if (shift_amount > 0 && shift_amount <= bit_width)
1087 {
1088 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1089 shift_amount);
1090 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1091 operands[1],
1092 tmp));
1093 DONE;
1094 }
1095 else
1096 operands[2] = force_reg (SImode, operands[2]);
1097 }
1098 else if (MEM_P (operands[2]))
1099 {
1100 operands[2] = force_reg (SImode, operands[2]);
1101 }
1102
1103 if (REG_P (operands[2]))
1104 {
1105 rtx tmp = gen_reg_rtx (SImode);
1106 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1107 emit_insn (gen_negsi2 (tmp, operands[2]));
1108 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1109 convert_to_mode (<VEL>mode,
1110 tmp, 0)));
1111 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1112 operands[1],
1113 tmp1));
1114 DONE;
1115 }
1116 else
1117 FAIL;
1118 }
1119 )
1120
1121 (define_expand "ashr<mode>3"
1122 [(match_operand:VDQ_I 0 "register_operand")
1123 (match_operand:VDQ_I 1 "register_operand")
1124 (match_operand:SI 2 "general_operand")]
1125 "TARGET_SIMD"
1126 {
1127 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1128 int shift_amount;
1129
1130 if (CONST_INT_P (operands[2]))
1131 {
1132 shift_amount = INTVAL (operands[2]);
1133 if (shift_amount > 0 && shift_amount <= bit_width)
1134 {
1135 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1136 shift_amount);
1137 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1138 operands[1],
1139 tmp));
1140 DONE;
1141 }
1142 else
1143 operands[2] = force_reg (SImode, operands[2]);
1144 }
1145 else if (MEM_P (operands[2]))
1146 {
1147 operands[2] = force_reg (SImode, operands[2]);
1148 }
1149
1150 if (REG_P (operands[2]))
1151 {
1152 rtx tmp = gen_reg_rtx (SImode);
1153 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1154 emit_insn (gen_negsi2 (tmp, operands[2]));
1155 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1156 convert_to_mode (<VEL>mode,
1157 tmp, 0)));
1158 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1159 operands[1],
1160 tmp1));
1161 DONE;
1162 }
1163 else
1164 FAIL;
1165 }
1166 )
1167
1168 (define_expand "vashl<mode>3"
1169 [(match_operand:VDQ_I 0 "register_operand")
1170 (match_operand:VDQ_I 1 "register_operand")
1171 (match_operand:VDQ_I 2 "register_operand")]
1172 "TARGET_SIMD"
1173 {
1174 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1175 operands[2]));
1176 DONE;
1177 })
1178
1179 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1180 ;; Negating individual lanes most certainly offsets the
1181 ;; gain from vectorization.
1182 (define_expand "vashr<mode>3"
1183 [(match_operand:VDQ_BHSI 0 "register_operand")
1184 (match_operand:VDQ_BHSI 1 "register_operand")
1185 (match_operand:VDQ_BHSI 2 "register_operand")]
1186 "TARGET_SIMD"
1187 {
1188 rtx neg = gen_reg_rtx (<MODE>mode);
1189 emit (gen_neg<mode>2 (neg, operands[2]));
1190 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1191 neg));
1192 DONE;
1193 })
1194
1195 ;; DI vector shift
1196 (define_expand "aarch64_ashr_simddi"
1197 [(match_operand:DI 0 "register_operand")
1198 (match_operand:DI 1 "register_operand")
1199 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1200 "TARGET_SIMD"
1201 {
1202 /* An arithmetic shift right by 64 fills the result with copies of the sign
1203 bit, just like asr by 63 - however the standard pattern does not handle
1204 a shift by 64. */
1205 if (INTVAL (operands[2]) == 64)
1206 operands[2] = GEN_INT (63);
1207 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1208 DONE;
1209 }
1210 )
1211
1212 (define_expand "vlshr<mode>3"
1213 [(match_operand:VDQ_BHSI 0 "register_operand")
1214 (match_operand:VDQ_BHSI 1 "register_operand")
1215 (match_operand:VDQ_BHSI 2 "register_operand")]
1216 "TARGET_SIMD"
1217 {
1218 rtx neg = gen_reg_rtx (<MODE>mode);
1219 emit (gen_neg<mode>2 (neg, operands[2]));
1220 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1221 neg));
1222 DONE;
1223 })
1224
1225 (define_expand "aarch64_lshr_simddi"
1226 [(match_operand:DI 0 "register_operand")
1227 (match_operand:DI 1 "register_operand")
1228 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1229 "TARGET_SIMD"
1230 {
1231 if (INTVAL (operands[2]) == 64)
1232 emit_move_insn (operands[0], const0_rtx);
1233 else
1234 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1235 DONE;
1236 }
1237 )
1238
1239 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1240 (define_insn "vec_shr_<mode>"
1241 [(set (match_operand:VD 0 "register_operand" "=w")
1242 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1243 (match_operand:SI 2 "immediate_operand" "i")]
1244 UNSPEC_VEC_SHR))]
1245 "TARGET_SIMD"
1246 {
1247 if (BYTES_BIG_ENDIAN)
1248 return "shl %d0, %d1, %2";
1249 else
1250 return "ushr %d0, %d1, %2";
1251 }
1252 [(set_attr "type" "neon_shift_imm")]
1253 )
1254
1255 (define_expand "vec_set<mode>"
1256 [(match_operand:VALL_F16 0 "register_operand")
1257 (match_operand:<VEL> 1 "register_operand")
1258 (match_operand:SI 2 "immediate_operand")]
1259 "TARGET_SIMD"
1260 {
1261 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1262 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1263 GEN_INT (elem), operands[0]));
1264 DONE;
1265 }
1266 )
1267
1268
1269 (define_insn "aarch64_mla<mode>"
1270 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1271 (plus:VDQ_BHSI (mult:VDQ_BHSI
1272 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1274 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1275 "TARGET_SIMD"
1276 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1277 [(set_attr "type" "neon_mla_<Vetype><q>")]
1278 )
1279
1280 (define_insn "*aarch64_mla_elt<mode>"
1281 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1282 (plus:VDQHS
1283 (mult:VDQHS
1284 (vec_duplicate:VDQHS
1285 (vec_select:<VEL>
1286 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1287 (parallel [(match_operand:SI 2 "immediate_operand")])))
1288 (match_operand:VDQHS 3 "register_operand" "w"))
1289 (match_operand:VDQHS 4 "register_operand" "0")))]
1290 "TARGET_SIMD"
1291 {
1292 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1293 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1294 }
1295 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1296 )
1297
1298 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1299 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1300 (plus:VDQHS
1301 (mult:VDQHS
1302 (vec_duplicate:VDQHS
1303 (vec_select:<VEL>
1304 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1305 (parallel [(match_operand:SI 2 "immediate_operand")])))
1306 (match_operand:VDQHS 3 "register_operand" "w"))
1307 (match_operand:VDQHS 4 "register_operand" "0")))]
1308 "TARGET_SIMD"
1309 {
1310 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1311 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1312 }
1313 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1314 )
1315
1316 (define_insn "*aarch64_mla_elt_merge<mode>"
1317 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1318 (plus:VDQHS
1319 (mult:VDQHS (vec_duplicate:VDQHS
1320 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1321 (match_operand:VDQHS 2 "register_operand" "w"))
1322 (match_operand:VDQHS 3 "register_operand" "0")))]
1323 "TARGET_SIMD"
1324 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1325 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1326 )
1327
1328 (define_insn "aarch64_mls<mode>"
1329 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1330 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1331 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1332 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1333 "TARGET_SIMD"
1334 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1335 [(set_attr "type" "neon_mla_<Vetype><q>")]
1336 )
1337
1338 (define_insn "*aarch64_mls_elt<mode>"
1339 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1340 (minus:VDQHS
1341 (match_operand:VDQHS 4 "register_operand" "0")
1342 (mult:VDQHS
1343 (vec_duplicate:VDQHS
1344 (vec_select:<VEL>
1345 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1346 (parallel [(match_operand:SI 2 "immediate_operand")])))
1347 (match_operand:VDQHS 3 "register_operand" "w"))))]
1348 "TARGET_SIMD"
1349 {
1350 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1351 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1352 }
1353 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1354 )
1355
1356 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1357 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1358 (minus:VDQHS
1359 (match_operand:VDQHS 4 "register_operand" "0")
1360 (mult:VDQHS
1361 (vec_duplicate:VDQHS
1362 (vec_select:<VEL>
1363 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1364 (parallel [(match_operand:SI 2 "immediate_operand")])))
1365 (match_operand:VDQHS 3 "register_operand" "w"))))]
1366 "TARGET_SIMD"
1367 {
1368 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1369 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1370 }
1371 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1372 )
1373
1374 (define_insn "*aarch64_mls_elt_merge<mode>"
1375 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1376 (minus:VDQHS
1377 (match_operand:VDQHS 1 "register_operand" "0")
1378 (mult:VDQHS (vec_duplicate:VDQHS
1379 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1380 (match_operand:VDQHS 3 "register_operand" "w"))))]
1381 "TARGET_SIMD"
1382 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1383 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1384 )
1385
1386 ;; Max/Min operations.
1387 (define_insn "<su><maxmin><mode>3"
1388 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1389 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1390 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1391 "TARGET_SIMD"
1392 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1393 [(set_attr "type" "neon_minmax<q>")]
1394 )
1395
1396 (define_expand "<su><maxmin>v2di3"
1397 [(set (match_operand:V2DI 0 "register_operand")
1398 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1399 (match_operand:V2DI 2 "register_operand")))]
1400 "TARGET_SIMD"
1401 {
1402 enum rtx_code cmp_operator;
1403 rtx cmp_fmt;
1404
1405 switch (<CODE>)
1406 {
1407 case UMIN:
1408 cmp_operator = LTU;
1409 break;
1410 case SMIN:
1411 cmp_operator = LT;
1412 break;
1413 case UMAX:
1414 cmp_operator = GTU;
1415 break;
1416 case SMAX:
1417 cmp_operator = GT;
1418 break;
1419 default:
1420 gcc_unreachable ();
1421 }
1422
1423 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1424 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1425 operands[2], cmp_fmt, operands[1], operands[2]));
1426 DONE;
1427 })
1428
1429 ;; Pairwise Integer Max/Min operations.
1430 (define_insn "aarch64_<maxmin_uns>p<mode>"
1431 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1432 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1433 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1434 MAXMINV))]
1435 "TARGET_SIMD"
1436 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1437 [(set_attr "type" "neon_minmax<q>")]
1438 )
1439
1440 ;; Pairwise FP Max/Min operations.
1441 (define_insn "aarch64_<maxmin_uns>p<mode>"
1442 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1443 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1444 (match_operand:VHSDF 2 "register_operand" "w")]
1445 FMAXMINV))]
1446 "TARGET_SIMD"
1447 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1448 [(set_attr "type" "neon_minmax<q>")]
1449 )
1450
1451 ;; vec_concat gives a new vector with the low elements from operand 1, and
1452 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1453 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1454 ;; What that means, is that the RTL descriptions of the below patterns
1455 ;; need to change depending on endianness.
1456
1457 ;; Move to the low architectural bits of the register.
1458 ;; On little-endian this is { operand, zeroes }
1459 ;; On big-endian this is { zeroes, operand }
1460
1461 (define_insn "move_lo_quad_internal_<mode>"
1462 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1463 (vec_concat:VQ_NO2E
1464 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1465 (vec_duplicate:<VHALF> (const_int 0))))]
1466 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1467 "@
1468 dup\\t%d0, %1.d[0]
1469 fmov\\t%d0, %1
1470 dup\\t%d0, %1"
1471 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1472 (set_attr "length" "4")
1473 (set_attr "arch" "simd,fp,simd")]
1474 )
1475
1476 (define_insn "move_lo_quad_internal_<mode>"
1477 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1478 (vec_concat:VQ_2E
1479 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1480 (const_int 0)))]
1481 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1482 "@
1483 dup\\t%d0, %1.d[0]
1484 fmov\\t%d0, %1
1485 dup\\t%d0, %1"
1486 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1487 (set_attr "length" "4")
1488 (set_attr "arch" "simd,fp,simd")]
1489 )
1490
1491 (define_insn "move_lo_quad_internal_be_<mode>"
1492 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1493 (vec_concat:VQ_NO2E
1494 (vec_duplicate:<VHALF> (const_int 0))
1495 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1496 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1497 "@
1498 dup\\t%d0, %1.d[0]
1499 fmov\\t%d0, %1
1500 dup\\t%d0, %1"
1501 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1502 (set_attr "length" "4")
1503 (set_attr "arch" "simd,fp,simd")]
1504 )
1505
1506 (define_insn "move_lo_quad_internal_be_<mode>"
1507 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1508 (vec_concat:VQ_2E
1509 (const_int 0)
1510 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1511 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1512 "@
1513 dup\\t%d0, %1.d[0]
1514 fmov\\t%d0, %1
1515 dup\\t%d0, %1"
1516 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1517 (set_attr "length" "4")
1518 (set_attr "arch" "simd,fp,simd")]
1519 )
1520
1521 (define_expand "move_lo_quad_<mode>"
1522 [(match_operand:VQ 0 "register_operand")
1523 (match_operand:VQ 1 "register_operand")]
1524 "TARGET_SIMD"
1525 {
1526 if (BYTES_BIG_ENDIAN)
1527 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1528 else
1529 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1530 DONE;
1531 }
1532 )
1533
1534 ;; Move operand1 to the high architectural bits of the register, keeping
1535 ;; the low architectural bits of operand2.
1536 ;; For little-endian this is { operand2, operand1 }
1537 ;; For big-endian this is { operand1, operand2 }
1538
1539 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1540 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1541 (vec_concat:VQ
1542 (vec_select:<VHALF>
1543 (match_dup 0)
1544 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1545 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1546 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1547 "@
1548 ins\\t%0.d[1], %1.d[0]
1549 ins\\t%0.d[1], %1"
1550 [(set_attr "type" "neon_ins")]
1551 )
1552
1553 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1554 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1555 (vec_concat:VQ
1556 (match_operand:<VHALF> 1 "register_operand" "w,r")
1557 (vec_select:<VHALF>
1558 (match_dup 0)
1559 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1560 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1561 "@
1562 ins\\t%0.d[1], %1.d[0]
1563 ins\\t%0.d[1], %1"
1564 [(set_attr "type" "neon_ins")]
1565 )
1566
1567 (define_expand "move_hi_quad_<mode>"
1568 [(match_operand:VQ 0 "register_operand")
1569 (match_operand:<VHALF> 1 "register_operand")]
1570 "TARGET_SIMD"
1571 {
1572 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1573 if (BYTES_BIG_ENDIAN)
1574 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1575 operands[1], p));
1576 else
1577 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1578 operands[1], p));
1579 DONE;
1580 })
1581
1582 ;; Narrowing operations.
1583
1584 ;; For doubles.
1585 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1586 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1587 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1588 "TARGET_SIMD"
1589 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1590 [(set_attr "type" "neon_shift_imm_narrow_q")]
1591 )
1592
1593 (define_expand "vec_pack_trunc_<mode>"
1594 [(match_operand:<VNARROWD> 0 "register_operand")
1595 (match_operand:VDN 1 "register_operand")
1596 (match_operand:VDN 2 "register_operand")]
1597 "TARGET_SIMD"
1598 {
1599 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1600 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1601 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1602
1603 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1604 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1605 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1606 DONE;
1607 })
1608
1609 ;; For quads.
1610
1611 (define_insn "vec_pack_trunc_<mode>"
1612 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1613 (vec_concat:<VNARROWQ2>
1614 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1615 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1616 "TARGET_SIMD"
1617 {
1618 if (BYTES_BIG_ENDIAN)
1619 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1620 else
1621 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1622 }
1623 [(set_attr "type" "multiple")
1624 (set_attr "length" "8")]
1625 )
1626
1627 ;; Widening operations.
1628
1629 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1630 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1631 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1632 (match_operand:VQW 1 "register_operand" "w")
1633 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1634 )))]
1635 "TARGET_SIMD"
1636 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1637 [(set_attr "type" "neon_shift_imm_long")]
1638 )
1639
1640 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1641 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1642 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1643 (match_operand:VQW 1 "register_operand" "w")
1644 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1645 )))]
1646 "TARGET_SIMD"
1647 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1648 [(set_attr "type" "neon_shift_imm_long")]
1649 )
1650
1651 (define_expand "vec_unpack<su>_hi_<mode>"
1652 [(match_operand:<VWIDE> 0 "register_operand")
1653 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1654 "TARGET_SIMD"
1655 {
1656 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1657 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1658 operands[1], p));
1659 DONE;
1660 }
1661 )
1662
1663 (define_expand "vec_unpack<su>_lo_<mode>"
1664 [(match_operand:<VWIDE> 0 "register_operand")
1665 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1666 "TARGET_SIMD"
1667 {
1668 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1669 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1670 operands[1], p));
1671 DONE;
1672 }
1673 )
1674
1675 ;; Widening arithmetic.
1676
1677 (define_insn "*aarch64_<su>mlal_lo<mode>"
1678 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1679 (plus:<VWIDE>
1680 (mult:<VWIDE>
1681 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682 (match_operand:VQW 2 "register_operand" "w")
1683 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685 (match_operand:VQW 4 "register_operand" "w")
1686 (match_dup 3))))
1687 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1688 "TARGET_SIMD"
1689 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1690 [(set_attr "type" "neon_mla_<Vetype>_long")]
1691 )
1692
1693 (define_insn "*aarch64_<su>mlal_hi<mode>"
1694 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1695 (plus:<VWIDE>
1696 (mult:<VWIDE>
1697 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698 (match_operand:VQW 2 "register_operand" "w")
1699 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701 (match_operand:VQW 4 "register_operand" "w")
1702 (match_dup 3))))
1703 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1704 "TARGET_SIMD"
1705 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1706 [(set_attr "type" "neon_mla_<Vetype>_long")]
1707 )
1708
1709 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1710 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1711 (minus:<VWIDE>
1712 (match_operand:<VWIDE> 1 "register_operand" "0")
1713 (mult:<VWIDE>
1714 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1715 (match_operand:VQW 2 "register_operand" "w")
1716 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1717 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1718 (match_operand:VQW 4 "register_operand" "w")
1719 (match_dup 3))))))]
1720 "TARGET_SIMD"
1721 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1722 [(set_attr "type" "neon_mla_<Vetype>_long")]
1723 )
1724
1725 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1726 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1727 (minus:<VWIDE>
1728 (match_operand:<VWIDE> 1 "register_operand" "0")
1729 (mult:<VWIDE>
1730 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1731 (match_operand:VQW 2 "register_operand" "w")
1732 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1733 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1734 (match_operand:VQW 4 "register_operand" "w")
1735 (match_dup 3))))))]
1736 "TARGET_SIMD"
1737 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1738 [(set_attr "type" "neon_mla_<Vetype>_long")]
1739 )
1740
1741 (define_insn "*aarch64_<su>mlal<mode>"
1742 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1743 (plus:<VWIDE>
1744 (mult:<VWIDE>
1745 (ANY_EXTEND:<VWIDE>
1746 (match_operand:VD_BHSI 1 "register_operand" "w"))
1747 (ANY_EXTEND:<VWIDE>
1748 (match_operand:VD_BHSI 2 "register_operand" "w")))
1749 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1750 "TARGET_SIMD"
1751 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1752 [(set_attr "type" "neon_mla_<Vetype>_long")]
1753 )
1754
1755 (define_insn "*aarch64_<su>mlsl<mode>"
1756 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1757 (minus:<VWIDE>
1758 (match_operand:<VWIDE> 1 "register_operand" "0")
1759 (mult:<VWIDE>
1760 (ANY_EXTEND:<VWIDE>
1761 (match_operand:VD_BHSI 2 "register_operand" "w"))
1762 (ANY_EXTEND:<VWIDE>
1763 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1764 "TARGET_SIMD"
1765 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1766 [(set_attr "type" "neon_mla_<Vetype>_long")]
1767 )
1768
1769 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1770 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1771 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1772 (match_operand:VQW 1 "register_operand" "w")
1773 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1774 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1775 (match_operand:VQW 2 "register_operand" "w")
1776 (match_dup 3)))))]
1777 "TARGET_SIMD"
1778 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1779 [(set_attr "type" "neon_mul_<Vetype>_long")]
1780 )
1781
1782 (define_expand "vec_widen_<su>mult_lo_<mode>"
1783 [(match_operand:<VWIDE> 0 "register_operand")
1784 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1785 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1786 "TARGET_SIMD"
1787 {
1788 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1789 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1790 operands[1],
1791 operands[2], p));
1792 DONE;
1793 }
1794 )
1795
1796 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1797 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1798 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1799 (match_operand:VQW 1 "register_operand" "w")
1800 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1801 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1802 (match_operand:VQW 2 "register_operand" "w")
1803 (match_dup 3)))))]
1804 "TARGET_SIMD"
1805 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1806 [(set_attr "type" "neon_mul_<Vetype>_long")]
1807 )
1808
1809 (define_expand "vec_widen_<su>mult_hi_<mode>"
1810 [(match_operand:<VWIDE> 0 "register_operand")
1811 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1812 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1813 "TARGET_SIMD"
1814 {
1815 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1816 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1817 operands[1],
1818 operands[2], p));
1819 DONE;
1820
1821 }
1822 )
1823
1824 ;; FP vector operations.
1825 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1826 ;; double-precision (64-bit) floating-point data types and arithmetic as
1827 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1828 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1829 ;;
1830 ;; Floating-point operations can raise an exception. Vectorizing such
1831 ;; operations are safe because of reasons explained below.
1832 ;;
1833 ;; ARMv8 permits an extension to enable trapped floating-point
1834 ;; exception handling, however this is an optional feature. In the
1835 ;; event of a floating-point exception being raised by vectorised
1836 ;; code then:
1837 ;; 1. If trapped floating-point exceptions are available, then a trap
1838 ;; will be taken when any lane raises an enabled exception. A trap
1839 ;; handler may determine which lane raised the exception.
1840 ;; 2. Alternatively a sticky exception flag is set in the
1841 ;; floating-point status register (FPSR). Software may explicitly
1842 ;; test the exception flags, in which case the tests will either
1843 ;; prevent vectorisation, allowing precise identification of the
1844 ;; failing operation, or if tested outside of vectorisable regions
1845 ;; then the specific operation and lane are not of interest.
1846
1847 ;; FP arithmetic operations.
1848
1849 (define_insn "add<mode>3"
1850 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1851 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1852 (match_operand:VHSDF 2 "register_operand" "w")))]
1853 "TARGET_SIMD"
1854 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1855 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1856 )
1857
1858 (define_insn "sub<mode>3"
1859 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1860 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1861 (match_operand:VHSDF 2 "register_operand" "w")))]
1862 "TARGET_SIMD"
1863 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1864 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1865 )
1866
1867 (define_insn "mul<mode>3"
1868 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1869 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1870 (match_operand:VHSDF 2 "register_operand" "w")))]
1871 "TARGET_SIMD"
1872 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1873 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1874 )
1875
1876 (define_expand "div<mode>3"
1877 [(set (match_operand:VHSDF 0 "register_operand")
1878 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1879 (match_operand:VHSDF 2 "register_operand")))]
1880 "TARGET_SIMD"
1881 {
1882 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1883 DONE;
1884
1885 operands[1] = force_reg (<MODE>mode, operands[1]);
1886 })
1887
1888 (define_insn "*div<mode>3"
1889 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1890 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1891 (match_operand:VHSDF 2 "register_operand" "w")))]
1892 "TARGET_SIMD"
1893 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1894 [(set_attr "type" "neon_fp_div_<stype><q>")]
1895 )
1896
1897 (define_insn "neg<mode>2"
1898 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1899 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1900 "TARGET_SIMD"
1901 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1902 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1903 )
1904
1905 (define_insn "abs<mode>2"
1906 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1907 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1908 "TARGET_SIMD"
1909 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1910 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1911 )
1912
1913 (define_insn "fma<mode>4"
1914 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1915 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1916 (match_operand:VHSDF 2 "register_operand" "w")
1917 (match_operand:VHSDF 3 "register_operand" "0")))]
1918 "TARGET_SIMD"
1919 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1920 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1921 )
1922
1923 (define_insn "*aarch64_fma4_elt<mode>"
1924 [(set (match_operand:VDQF 0 "register_operand" "=w")
1925 (fma:VDQF
1926 (vec_duplicate:VDQF
1927 (vec_select:<VEL>
1928 (match_operand:VDQF 1 "register_operand" "<h_con>")
1929 (parallel [(match_operand:SI 2 "immediate_operand")])))
1930 (match_operand:VDQF 3 "register_operand" "w")
1931 (match_operand:VDQF 4 "register_operand" "0")))]
1932 "TARGET_SIMD"
1933 {
1934 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1935 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1936 }
1937 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1938 )
1939
1940 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1941 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1942 (fma:VDQSF
1943 (vec_duplicate:VDQSF
1944 (vec_select:<VEL>
1945 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1946 (parallel [(match_operand:SI 2 "immediate_operand")])))
1947 (match_operand:VDQSF 3 "register_operand" "w")
1948 (match_operand:VDQSF 4 "register_operand" "0")))]
1949 "TARGET_SIMD"
1950 {
1951 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1952 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1953 }
1954 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1955 )
1956
1957 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1958 [(set (match_operand:VMUL 0 "register_operand" "=w")
1959 (fma:VMUL
1960 (vec_duplicate:VMUL
1961 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1962 (match_operand:VMUL 2 "register_operand" "w")
1963 (match_operand:VMUL 3 "register_operand" "0")))]
1964 "TARGET_SIMD"
1965 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1966 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1967 )
1968
1969 (define_insn "*aarch64_fma4_elt_to_64v2df"
1970 [(set (match_operand:DF 0 "register_operand" "=w")
1971 (fma:DF
1972 (vec_select:DF
1973 (match_operand:V2DF 1 "register_operand" "w")
1974 (parallel [(match_operand:SI 2 "immediate_operand")]))
1975 (match_operand:DF 3 "register_operand" "w")
1976 (match_operand:DF 4 "register_operand" "0")))]
1977 "TARGET_SIMD"
1978 {
1979 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1980 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1981 }
1982 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1983 )
1984
1985 (define_insn "fnma<mode>4"
1986 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1987 (fma:VHSDF
1988 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1989 (match_operand:VHSDF 2 "register_operand" "w")
1990 (match_operand:VHSDF 3 "register_operand" "0")))]
1991 "TARGET_SIMD"
1992 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1993 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1994 )
1995
1996 (define_insn "*aarch64_fnma4_elt<mode>"
1997 [(set (match_operand:VDQF 0 "register_operand" "=w")
1998 (fma:VDQF
1999 (neg:VDQF
2000 (match_operand:VDQF 3 "register_operand" "w"))
2001 (vec_duplicate:VDQF
2002 (vec_select:<VEL>
2003 (match_operand:VDQF 1 "register_operand" "<h_con>")
2004 (parallel [(match_operand:SI 2 "immediate_operand")])))
2005 (match_operand:VDQF 4 "register_operand" "0")))]
2006 "TARGET_SIMD"
2007 {
2008 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2009 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2010 }
2011 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2012 )
2013
2014 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2015 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2016 (fma:VDQSF
2017 (neg:VDQSF
2018 (match_operand:VDQSF 3 "register_operand" "w"))
2019 (vec_duplicate:VDQSF
2020 (vec_select:<VEL>
2021 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2022 (parallel [(match_operand:SI 2 "immediate_operand")])))
2023 (match_operand:VDQSF 4 "register_operand" "0")))]
2024 "TARGET_SIMD"
2025 {
2026 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2027 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2028 }
2029 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2030 )
2031
2032 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2033 [(set (match_operand:VMUL 0 "register_operand" "=w")
2034 (fma:VMUL
2035 (neg:VMUL
2036 (match_operand:VMUL 2 "register_operand" "w"))
2037 (vec_duplicate:VMUL
2038 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2039 (match_operand:VMUL 3 "register_operand" "0")))]
2040 "TARGET_SIMD"
2041 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2042 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2043 )
2044
2045 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2046 [(set (match_operand:DF 0 "register_operand" "=w")
2047 (fma:DF
2048 (vec_select:DF
2049 (match_operand:V2DF 1 "register_operand" "w")
2050 (parallel [(match_operand:SI 2 "immediate_operand")]))
2051 (neg:DF
2052 (match_operand:DF 3 "register_operand" "w"))
2053 (match_operand:DF 4 "register_operand" "0")))]
2054 "TARGET_SIMD"
2055 {
2056 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2057 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2058 }
2059 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2060 )
2061
2062 ;; Vector versions of the floating-point frint patterns.
2063 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2064 (define_insn "<frint_pattern><mode>2"
2065 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2066 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2067 FRINT))]
2068 "TARGET_SIMD"
2069 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2070 [(set_attr "type" "neon_fp_round_<stype><q>")]
2071 )
2072
2073 ;; Vector versions of the fcvt standard patterns.
2074 ;; Expands to lbtrunc, lround, lceil, lfloor
2075 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2076 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2077 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2078 [(match_operand:VHSDF 1 "register_operand" "w")]
2079 FCVT)))]
2080 "TARGET_SIMD"
2081 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2082 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2083 )
2084
2085 ;; HF Scalar variants of related SIMD instructions.
2086 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2087 [(set (match_operand:HI 0 "register_operand" "=w")
2088 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2089 FCVT)))]
2090 "TARGET_SIMD_F16INST"
2091 "fcvt<frint_suffix><su>\t%h0, %h1"
2092 [(set_attr "type" "neon_fp_to_int_s")]
2093 )
2094
2095 (define_insn "<optab>_trunchfhi2"
2096 [(set (match_operand:HI 0 "register_operand" "=w")
2097 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2098 "TARGET_SIMD_F16INST"
2099 "fcvtz<su>\t%h0, %h1"
2100 [(set_attr "type" "neon_fp_to_int_s")]
2101 )
2102
2103 (define_insn "<optab>hihf2"
2104 [(set (match_operand:HF 0 "register_operand" "=w")
2105 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2106 "TARGET_SIMD_F16INST"
2107 "<su_optab>cvtf\t%h0, %h1"
2108 [(set_attr "type" "neon_int_to_fp_s")]
2109 )
2110
2111 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2112 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2113 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2114 [(mult:VDQF
2115 (match_operand:VDQF 1 "register_operand" "w")
2116 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2117 UNSPEC_FRINTZ)))]
2118 "TARGET_SIMD
2119 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2120 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2121 {
2122 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2123 char buf[64];
2124 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2125 output_asm_insn (buf, operands);
2126 return "";
2127 }
2128 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2129 )
2130
2131 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2132 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2133 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2134 [(match_operand:VHSDF 1 "register_operand")]
2135 UNSPEC_FRINTZ)))]
2136 "TARGET_SIMD"
2137 {})
2138
2139 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2140 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2141 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2142 [(match_operand:VHSDF 1 "register_operand")]
2143 UNSPEC_FRINTZ)))]
2144 "TARGET_SIMD"
2145 {})
2146
2147 (define_expand "ftrunc<VHSDF:mode>2"
2148 [(set (match_operand:VHSDF 0 "register_operand")
2149 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2150 UNSPEC_FRINTZ))]
2151 "TARGET_SIMD"
2152 {})
2153
2154 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2155 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2156 (FLOATUORS:VHSDF
2157 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2158 "TARGET_SIMD"
2159 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2160 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2161 )
2162
2163 ;; Conversions between vectors of floats and doubles.
2164 ;; Contains a mix of patterns to match standard pattern names
2165 ;; and those for intrinsics.
2166
2167 ;; Float widening operations.
2168
2169 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2170 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2171 (float_extend:<VWIDE> (vec_select:<VHALF>
2172 (match_operand:VQ_HSF 1 "register_operand" "w")
2173 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2174 )))]
2175 "TARGET_SIMD"
2176 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2177 [(set_attr "type" "neon_fp_cvt_widen_s")]
2178 )
2179
2180 ;; Convert between fixed-point and floating-point (vector modes)
2181
2182 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2183 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2184 (unspec:<VHSDF:FCVT_TARGET>
2185 [(match_operand:VHSDF 1 "register_operand" "w")
2186 (match_operand:SI 2 "immediate_operand" "i")]
2187 FCVT_F2FIXED))]
2188 "TARGET_SIMD"
2189 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2190 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2191 )
2192
2193 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2194 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2195 (unspec:<VDQ_HSDI:FCVT_TARGET>
2196 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2197 (match_operand:SI 2 "immediate_operand" "i")]
2198 FCVT_FIXED2F))]
2199 "TARGET_SIMD"
2200 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2201 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2202 )
2203
2204 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2205 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2206 ;; the meaning of HI and LO changes depending on the target endianness.
2207 ;; While elsewhere we map the higher numbered elements of a vector to
2208 ;; the lower architectural lanes of the vector, for these patterns we want
2209 ;; to always treat "hi" as referring to the higher architectural lanes.
2210 ;; Consequently, while the patterns below look inconsistent with our
2211 ;; other big-endian patterns their behavior is as required.
2212
2213 (define_expand "vec_unpacks_lo_<mode>"
2214 [(match_operand:<VWIDE> 0 "register_operand")
2215 (match_operand:VQ_HSF 1 "register_operand")]
2216 "TARGET_SIMD"
2217 {
2218 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2219 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2220 operands[1], p));
2221 DONE;
2222 }
2223 )
2224
2225 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2226 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2227 (float_extend:<VWIDE> (vec_select:<VHALF>
2228 (match_operand:VQ_HSF 1 "register_operand" "w")
2229 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2230 )))]
2231 "TARGET_SIMD"
2232 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2233 [(set_attr "type" "neon_fp_cvt_widen_s")]
2234 )
2235
2236 (define_expand "vec_unpacks_hi_<mode>"
2237 [(match_operand:<VWIDE> 0 "register_operand")
2238 (match_operand:VQ_HSF 1 "register_operand")]
2239 "TARGET_SIMD"
2240 {
2241 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2242 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2243 operands[1], p));
2244 DONE;
2245 }
2246 )
2247 (define_insn "aarch64_float_extend_lo_<Vwide>"
2248 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2249 (float_extend:<VWIDE>
2250 (match_operand:VDF 1 "register_operand" "w")))]
2251 "TARGET_SIMD"
2252 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2253 [(set_attr "type" "neon_fp_cvt_widen_s")]
2254 )
2255
2256 ;; Float narrowing operations.
2257
2258 (define_insn "aarch64_float_truncate_lo_<mode>"
2259 [(set (match_operand:VDF 0 "register_operand" "=w")
2260 (float_truncate:VDF
2261 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2262 "TARGET_SIMD"
2263 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2264 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2265 )
2266
2267 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2268 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2269 (vec_concat:<VDBL>
2270 (match_operand:VDF 1 "register_operand" "0")
2271 (float_truncate:VDF
2272 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2273 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2274 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2275 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2276 )
2277
2278 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2279 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2280 (vec_concat:<VDBL>
2281 (float_truncate:VDF
2282 (match_operand:<VWIDE> 2 "register_operand" "w"))
2283 (match_operand:VDF 1 "register_operand" "0")))]
2284 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2285 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2286 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2287 )
2288
2289 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2290 [(match_operand:<VDBL> 0 "register_operand")
2291 (match_operand:VDF 1 "register_operand")
2292 (match_operand:<VWIDE> 2 "register_operand")]
2293 "TARGET_SIMD"
2294 {
2295 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2296 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2297 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2298 emit_insn (gen (operands[0], operands[1], operands[2]));
2299 DONE;
2300 }
2301 )
2302
2303 (define_expand "vec_pack_trunc_v2df"
2304 [(set (match_operand:V4SF 0 "register_operand")
2305 (vec_concat:V4SF
2306 (float_truncate:V2SF
2307 (match_operand:V2DF 1 "register_operand"))
2308 (float_truncate:V2SF
2309 (match_operand:V2DF 2 "register_operand"))
2310 ))]
2311 "TARGET_SIMD"
2312 {
2313 rtx tmp = gen_reg_rtx (V2SFmode);
2314 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2315 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2316
2317 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2318 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2319 tmp, operands[hi]));
2320 DONE;
2321 }
2322 )
2323
2324 (define_expand "vec_pack_trunc_df"
2325 [(set (match_operand:V2SF 0 "register_operand")
2326 (vec_concat:V2SF
2327 (float_truncate:SF
2328 (match_operand:DF 1 "register_operand"))
2329 (float_truncate:SF
2330 (match_operand:DF 2 "register_operand"))
2331 ))]
2332 "TARGET_SIMD"
2333 {
2334 rtx tmp = gen_reg_rtx (V2SFmode);
2335 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2336 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2337
2338 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2339 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2340 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2341 DONE;
2342 }
2343 )
2344
2345 ;; FP Max/Min
2346 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2347 ;; expression like:
2348 ;; a = (b < c) ? b : c;
2349 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2350 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2351 ;; -ffast-math.
2352 ;;
2353 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2354 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2355 ;; operand will be returned when both operands are zero (i.e. they may not
2356 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2357 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2358 ;; NaNs.
2359
2360 (define_insn "<su><maxmin><mode>3"
2361 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2362 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2363 (match_operand:VHSDF 2 "register_operand" "w")))]
2364 "TARGET_SIMD"
2365 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2366 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2367 )
2368
2369 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2370 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2371 ;; which implement the IEEE fmax ()/fmin () functions.
2372 (define_insn "<maxmin_uns><mode>3"
2373 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2375 (match_operand:VHSDF 2 "register_operand" "w")]
2376 FMAXMIN_UNS))]
2377 "TARGET_SIMD"
2378 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2379 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2380 )
2381
2382 ;; 'across lanes' add.
2383
2384 (define_expand "reduc_plus_scal_<mode>"
2385 [(match_operand:<VEL> 0 "register_operand")
2386 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2387 UNSPEC_ADDV)]
2388 "TARGET_SIMD"
2389 {
2390 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2391 rtx scratch = gen_reg_rtx (<MODE>mode);
2392 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2393 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2394 DONE;
2395 }
2396 )
2397
2398 (define_insn "aarch64_faddp<mode>"
2399 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2400 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2401 (match_operand:VHSDF 2 "register_operand" "w")]
2402 UNSPEC_FADDV))]
2403 "TARGET_SIMD"
2404 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2405 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2406 )
2407
2408 (define_insn "aarch64_reduc_plus_internal<mode>"
2409 [(set (match_operand:VDQV 0 "register_operand" "=w")
2410 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2411 UNSPEC_ADDV))]
2412 "TARGET_SIMD"
2413 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2414 [(set_attr "type" "neon_reduc_add<q>")]
2415 )
2416
2417 (define_insn "aarch64_reduc_plus_internalv2si"
2418 [(set (match_operand:V2SI 0 "register_operand" "=w")
2419 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2420 UNSPEC_ADDV))]
2421 "TARGET_SIMD"
2422 "addp\\t%0.2s, %1.2s, %1.2s"
2423 [(set_attr "type" "neon_reduc_add")]
2424 )
2425
2426 (define_insn "reduc_plus_scal_<mode>"
2427 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2428 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2429 UNSPEC_FADDV))]
2430 "TARGET_SIMD"
2431 "faddp\\t%<Vetype>0, %1.<Vtype>"
2432 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2433 )
2434
2435 (define_expand "reduc_plus_scal_v4sf"
2436 [(set (match_operand:SF 0 "register_operand")
2437 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2438 UNSPEC_FADDV))]
2439 "TARGET_SIMD"
2440 {
2441 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2442 rtx scratch = gen_reg_rtx (V4SFmode);
2443 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2444 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2445 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2446 DONE;
2447 })
2448
2449 (define_insn "clrsb<mode>2"
2450 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2451 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2452 "TARGET_SIMD"
2453 "cls\\t%0.<Vtype>, %1.<Vtype>"
2454 [(set_attr "type" "neon_cls<q>")]
2455 )
2456
2457 (define_insn "clz<mode>2"
2458 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2459 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2460 "TARGET_SIMD"
2461 "clz\\t%0.<Vtype>, %1.<Vtype>"
2462 [(set_attr "type" "neon_cls<q>")]
2463 )
2464
2465 (define_insn "popcount<mode>2"
2466 [(set (match_operand:VB 0 "register_operand" "=w")
2467 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2468 "TARGET_SIMD"
2469 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2470 [(set_attr "type" "neon_cnt<q>")]
2471 )
2472
2473 ;; 'across lanes' max and min ops.
2474
2475 ;; Template for outputting a scalar, so we can create __builtins which can be
2476 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2477 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2478 [(match_operand:<VEL> 0 "register_operand")
2479 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2480 FMAXMINV)]
2481 "TARGET_SIMD"
2482 {
2483 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2484 rtx scratch = gen_reg_rtx (<MODE>mode);
2485 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2486 operands[1]));
2487 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2488 DONE;
2489 }
2490 )
2491
2492 ;; Likewise for integer cases, signed and unsigned.
2493 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2494 [(match_operand:<VEL> 0 "register_operand")
2495 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2496 MAXMINV)]
2497 "TARGET_SIMD"
2498 {
2499 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2500 rtx scratch = gen_reg_rtx (<MODE>mode);
2501 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2502 operands[1]));
2503 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2504 DONE;
2505 }
2506 )
2507
2508 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2509 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2510 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2511 MAXMINV))]
2512 "TARGET_SIMD"
2513 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2514 [(set_attr "type" "neon_reduc_minmax<q>")]
2515 )
2516
2517 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2518 [(set (match_operand:V2SI 0 "register_operand" "=w")
2519 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2520 MAXMINV))]
2521 "TARGET_SIMD"
2522 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2523 [(set_attr "type" "neon_reduc_minmax")]
2524 )
2525
2526 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2527 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2528 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2529 FMAXMINV))]
2530 "TARGET_SIMD"
2531 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2532 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2533 )
2534
2535 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2536 ;; allocation.
2537 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2538 ;; to select.
2539 ;;
2540 ;; Thus our BSL is of the form:
2541 ;; op0 = bsl (mask, op2, op3)
2542 ;; We can use any of:
2543 ;;
2544 ;; if (op0 = mask)
2545 ;; bsl mask, op1, op2
2546 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2547 ;; bit op0, op2, mask
2548 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2549 ;; bif op0, op1, mask
2550 ;;
2551 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2552 ;; Some forms of straight-line code may generate the equivalent form
2553 ;; in *aarch64_simd_bsl<mode>_alt.
2554
2555 (define_insn "aarch64_simd_bsl<mode>_internal"
2556 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2557 (xor:VDQ_I
2558 (and:VDQ_I
2559 (xor:VDQ_I
2560 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2561 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2562 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2563 (match_dup:<V_INT_EQUIV> 3)
2564 ))]
2565 "TARGET_SIMD"
2566 "@
2567 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2568 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2569 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2570 [(set_attr "type" "neon_bsl<q>")]
2571 )
2572
2573 ;; We need this form in addition to the above pattern to match the case
2574 ;; when combine tries merging three insns such that the second operand of
2575 ;; the outer XOR matches the second operand of the inner XOR rather than
2576 ;; the first. The two are equivalent but since recog doesn't try all
2577 ;; permutations of commutative operations, we have to have a separate pattern.
2578
2579 (define_insn "*aarch64_simd_bsl<mode>_alt"
2580 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2581 (xor:VDQ_I
2582 (and:VDQ_I
2583 (xor:VDQ_I
2584 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2585 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2586 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2587 (match_dup:<V_INT_EQUIV> 2)))]
2588 "TARGET_SIMD"
2589 "@
2590 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2591 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2592 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2593 [(set_attr "type" "neon_bsl<q>")]
2594 )
2595
2596 ;; DImode is special, we want to avoid computing operations which are
2597 ;; more naturally computed in general purpose registers in the vector
2598 ;; registers. If we do that, we need to move all three operands from general
2599 ;; purpose registers to vector registers, then back again. However, we
2600 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2601 ;; optimizations based on the component operations of a BSL.
2602 ;;
2603 ;; That means we need a splitter back to the individual operations, if they
2604 ;; would be better calculated on the integer side.
2605
2606 (define_insn_and_split "aarch64_simd_bsldi_internal"
2607 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2608 (xor:DI
2609 (and:DI
2610 (xor:DI
2611 (match_operand:DI 3 "register_operand" "w,0,w,r")
2612 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2613 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2614 (match_dup:DI 3)
2615 ))]
2616 "TARGET_SIMD"
2617 "@
2618 bsl\\t%0.8b, %2.8b, %3.8b
2619 bit\\t%0.8b, %2.8b, %1.8b
2620 bif\\t%0.8b, %3.8b, %1.8b
2621 #"
2622 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2623 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2624 {
2625 /* Split back to individual operations. If we're before reload, and
2626 able to create a temporary register, do so. If we're after reload,
2627 we've got an early-clobber destination register, so use that.
2628 Otherwise, we can't create pseudos and we can't yet guarantee that
2629 operands[0] is safe to write, so FAIL to split. */
2630
2631 rtx scratch;
2632 if (reload_completed)
2633 scratch = operands[0];
2634 else if (can_create_pseudo_p ())
2635 scratch = gen_reg_rtx (DImode);
2636 else
2637 FAIL;
2638
2639 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2640 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2641 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2642 DONE;
2643 }
2644 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2645 (set_attr "length" "4,4,4,12")]
2646 )
2647
2648 (define_insn_and_split "aarch64_simd_bsldi_alt"
2649 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2650 (xor:DI
2651 (and:DI
2652 (xor:DI
2653 (match_operand:DI 3 "register_operand" "w,w,0,r")
2654 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2655 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2656 (match_dup:DI 2)
2657 ))]
2658 "TARGET_SIMD"
2659 "@
2660 bsl\\t%0.8b, %3.8b, %2.8b
2661 bit\\t%0.8b, %3.8b, %1.8b
2662 bif\\t%0.8b, %2.8b, %1.8b
2663 #"
2664 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2665 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2666 {
2667 /* Split back to individual operations. If we're before reload, and
2668 able to create a temporary register, do so. If we're after reload,
2669 we've got an early-clobber destination register, so use that.
2670 Otherwise, we can't create pseudos and we can't yet guarantee that
2671 operands[0] is safe to write, so FAIL to split. */
2672
2673 rtx scratch;
2674 if (reload_completed)
2675 scratch = operands[0];
2676 else if (can_create_pseudo_p ())
2677 scratch = gen_reg_rtx (DImode);
2678 else
2679 FAIL;
2680
2681 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2682 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2683 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2684 DONE;
2685 }
2686 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2687 (set_attr "length" "4,4,4,12")]
2688 )
2689
2690 (define_expand "aarch64_simd_bsl<mode>"
2691 [(match_operand:VALLDIF 0 "register_operand")
2692 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2693 (match_operand:VALLDIF 2 "register_operand")
2694 (match_operand:VALLDIF 3 "register_operand")]
2695 "TARGET_SIMD"
2696 {
2697 /* We can't alias operands together if they have different modes. */
2698 rtx tmp = operands[0];
2699 if (FLOAT_MODE_P (<MODE>mode))
2700 {
2701 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2702 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2703 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2704 }
2705 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2706 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2707 operands[1],
2708 operands[2],
2709 operands[3]));
2710 if (tmp != operands[0])
2711 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2712
2713 DONE;
2714 })
2715
2716 (define_expand "vcond_mask_<mode><v_int_equiv>"
2717 [(match_operand:VALLDI 0 "register_operand")
2718 (match_operand:VALLDI 1 "nonmemory_operand")
2719 (match_operand:VALLDI 2 "nonmemory_operand")
2720 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2721 "TARGET_SIMD"
2722 {
2723 /* If we have (a = (P) ? -1 : 0);
2724 Then we can simply move the generated mask (result must be int). */
2725 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2726 && operands[2] == CONST0_RTX (<MODE>mode))
2727 emit_move_insn (operands[0], operands[3]);
2728 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2729 else if (operands[1] == CONST0_RTX (<MODE>mode)
2730 && operands[2] == CONSTM1_RTX (<MODE>mode))
2731 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2732 else
2733 {
2734 if (!REG_P (operands[1]))
2735 operands[1] = force_reg (<MODE>mode, operands[1]);
2736 if (!REG_P (operands[2]))
2737 operands[2] = force_reg (<MODE>mode, operands[2]);
2738 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2739 operands[1], operands[2]));
2740 }
2741
2742 DONE;
2743 })
2744
2745 ;; Patterns comparing two vectors to produce a mask.
2746
2747 (define_expand "vec_cmp<mode><mode>"
2748 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2749 (match_operator 1 "comparison_operator"
2750 [(match_operand:VSDQ_I_DI 2 "register_operand")
2751 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2752 "TARGET_SIMD"
2753 {
2754 rtx mask = operands[0];
2755 enum rtx_code code = GET_CODE (operands[1]);
2756
2757 switch (code)
2758 {
2759 case NE:
2760 case LE:
2761 case LT:
2762 case GE:
2763 case GT:
2764 case EQ:
2765 if (operands[3] == CONST0_RTX (<MODE>mode))
2766 break;
2767
2768 /* Fall through. */
2769 default:
2770 if (!REG_P (operands[3]))
2771 operands[3] = force_reg (<MODE>mode, operands[3]);
2772
2773 break;
2774 }
2775
2776 switch (code)
2777 {
2778 case LT:
2779 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2780 break;
2781
2782 case GE:
2783 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2784 break;
2785
2786 case LE:
2787 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2788 break;
2789
2790 case GT:
2791 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2792 break;
2793
2794 case LTU:
2795 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2796 break;
2797
2798 case GEU:
2799 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2800 break;
2801
2802 case LEU:
2803 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2804 break;
2805
2806 case GTU:
2807 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2808 break;
2809
2810 case NE:
2811 /* Handle NE as !EQ. */
2812 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2813 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2814 break;
2815
2816 case EQ:
2817 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2818 break;
2819
2820 default:
2821 gcc_unreachable ();
2822 }
2823
2824 DONE;
2825 })
2826
2827 (define_expand "vec_cmp<mode><v_int_equiv>"
2828 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2829 (match_operator 1 "comparison_operator"
2830 [(match_operand:VDQF 2 "register_operand")
2831 (match_operand:VDQF 3 "nonmemory_operand")]))]
2832 "TARGET_SIMD"
2833 {
2834 int use_zero_form = 0;
2835 enum rtx_code code = GET_CODE (operands[1]);
2836 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2837
2838 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2839
2840 switch (code)
2841 {
2842 case LE:
2843 case LT:
2844 case GE:
2845 case GT:
2846 case EQ:
2847 if (operands[3] == CONST0_RTX (<MODE>mode))
2848 {
2849 use_zero_form = 1;
2850 break;
2851 }
2852 /* Fall through. */
2853 default:
2854 if (!REG_P (operands[3]))
2855 operands[3] = force_reg (<MODE>mode, operands[3]);
2856
2857 break;
2858 }
2859
2860 switch (code)
2861 {
2862 case LT:
2863 if (use_zero_form)
2864 {
2865 comparison = gen_aarch64_cmlt<mode>;
2866 break;
2867 }
2868 /* Fall through. */
2869 case UNLT:
2870 std::swap (operands[2], operands[3]);
2871 /* Fall through. */
2872 case UNGT:
2873 case GT:
2874 comparison = gen_aarch64_cmgt<mode>;
2875 break;
2876 case LE:
2877 if (use_zero_form)
2878 {
2879 comparison = gen_aarch64_cmle<mode>;
2880 break;
2881 }
2882 /* Fall through. */
2883 case UNLE:
2884 std::swap (operands[2], operands[3]);
2885 /* Fall through. */
2886 case UNGE:
2887 case GE:
2888 comparison = gen_aarch64_cmge<mode>;
2889 break;
2890 case NE:
2891 case EQ:
2892 comparison = gen_aarch64_cmeq<mode>;
2893 break;
2894 case UNEQ:
2895 case ORDERED:
2896 case UNORDERED:
2897 case LTGT:
2898 break;
2899 default:
2900 gcc_unreachable ();
2901 }
2902
2903 switch (code)
2904 {
2905 case UNGE:
2906 case UNGT:
2907 case UNLE:
2908 case UNLT:
2909 {
2910 /* All of the above must not raise any FP exceptions. Thus we first
2911 check each operand for NaNs and force any elements containing NaN to
2912 zero before using them in the compare.
2913 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2914 (cm<cc> (isnan (a) ? 0.0 : a,
2915 isnan (b) ? 0.0 : b))
2916 We use the following transformations for doing the comparisions:
2917 a UNGE b -> a GE b
2918 a UNGT b -> a GT b
2919 a UNLE b -> b GE a
2920 a UNLT b -> b GT a. */
2921
2922 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2923 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2924 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2925 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2926 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2927 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2928 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2929 lowpart_subreg (<V_INT_EQUIV>mode,
2930 operands[2],
2931 <MODE>mode)));
2932 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2933 lowpart_subreg (<V_INT_EQUIV>mode,
2934 operands[3],
2935 <MODE>mode)));
2936 gcc_assert (comparison != NULL);
2937 emit_insn (comparison (operands[0],
2938 lowpart_subreg (<MODE>mode,
2939 tmp0, <V_INT_EQUIV>mode),
2940 lowpart_subreg (<MODE>mode,
2941 tmp1, <V_INT_EQUIV>mode)));
2942 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2943 }
2944 break;
2945
2946 case LT:
2947 case LE:
2948 case GT:
2949 case GE:
2950 case EQ:
2951 case NE:
2952 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2953 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2954 a GE b -> a GE b
2955 a GT b -> a GT b
2956 a LE b -> b GE a
2957 a LT b -> b GT a
2958 a EQ b -> a EQ b
2959 a NE b -> ~(a EQ b) */
2960 gcc_assert (comparison != NULL);
2961 emit_insn (comparison (operands[0], operands[2], operands[3]));
2962 if (code == NE)
2963 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2964 break;
2965
2966 case LTGT:
2967 /* LTGT is not guranteed to not generate a FP exception. So let's
2968 go the faster way : ((a > b) || (b > a)). */
2969 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2970 operands[2], operands[3]));
2971 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2972 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2973 break;
2974
2975 case ORDERED:
2976 case UNORDERED:
2977 case UNEQ:
2978 /* cmeq (a, a) & cmeq (b, b). */
2979 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2980 operands[2], operands[2]));
2981 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2982 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2983
2984 if (code == UNORDERED)
2985 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2986 else if (code == UNEQ)
2987 {
2988 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2989 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2990 }
2991 break;
2992
2993 default:
2994 gcc_unreachable ();
2995 }
2996
2997 DONE;
2998 })
2999
3000 (define_expand "vec_cmpu<mode><mode>"
3001 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3002 (match_operator 1 "comparison_operator"
3003 [(match_operand:VSDQ_I_DI 2 "register_operand")
3004 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3005 "TARGET_SIMD"
3006 {
3007 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3008 operands[2], operands[3]));
3009 DONE;
3010 })
3011
3012 (define_expand "vcond<mode><mode>"
3013 [(set (match_operand:VALLDI 0 "register_operand")
3014 (if_then_else:VALLDI
3015 (match_operator 3 "comparison_operator"
3016 [(match_operand:VALLDI 4 "register_operand")
3017 (match_operand:VALLDI 5 "nonmemory_operand")])
3018 (match_operand:VALLDI 1 "nonmemory_operand")
3019 (match_operand:VALLDI 2 "nonmemory_operand")))]
3020 "TARGET_SIMD"
3021 {
3022 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3023 enum rtx_code code = GET_CODE (operands[3]);
3024
3025 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3026 it as well as switch operands 1/2 in order to avoid the additional
3027 NOT instruction. */
3028 if (code == NE)
3029 {
3030 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3031 operands[4], operands[5]);
3032 std::swap (operands[1], operands[2]);
3033 }
3034 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3035 operands[4], operands[5]));
3036 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3037 operands[2], mask));
3038
3039 DONE;
3040 })
3041
3042 (define_expand "vcond<v_cmp_mixed><mode>"
3043 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3044 (if_then_else:<V_cmp_mixed>
3045 (match_operator 3 "comparison_operator"
3046 [(match_operand:VDQF_COND 4 "register_operand")
3047 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3048 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3049 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3050 "TARGET_SIMD"
3051 {
3052 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3053 enum rtx_code code = GET_CODE (operands[3]);
3054
3055 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3056 it as well as switch operands 1/2 in order to avoid the additional
3057 NOT instruction. */
3058 if (code == NE)
3059 {
3060 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3061 operands[4], operands[5]);
3062 std::swap (operands[1], operands[2]);
3063 }
3064 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3065 operands[4], operands[5]));
3066 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3067 operands[0], operands[1],
3068 operands[2], mask));
3069
3070 DONE;
3071 })
3072
3073 (define_expand "vcondu<mode><mode>"
3074 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3075 (if_then_else:VSDQ_I_DI
3076 (match_operator 3 "comparison_operator"
3077 [(match_operand:VSDQ_I_DI 4 "register_operand")
3078 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3079 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3080 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3081 "TARGET_SIMD"
3082 {
3083 rtx mask = gen_reg_rtx (<MODE>mode);
3084 enum rtx_code code = GET_CODE (operands[3]);
3085
3086 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3087 it as well as switch operands 1/2 in order to avoid the additional
3088 NOT instruction. */
3089 if (code == NE)
3090 {
3091 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3092 operands[4], operands[5]);
3093 std::swap (operands[1], operands[2]);
3094 }
3095 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3096 operands[4], operands[5]));
3097 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3098 operands[2], mask));
3099 DONE;
3100 })
3101
3102 (define_expand "vcondu<mode><v_cmp_mixed>"
3103 [(set (match_operand:VDQF 0 "register_operand")
3104 (if_then_else:VDQF
3105 (match_operator 3 "comparison_operator"
3106 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3107 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3108 (match_operand:VDQF 1 "nonmemory_operand")
3109 (match_operand:VDQF 2 "nonmemory_operand")))]
3110 "TARGET_SIMD"
3111 {
3112 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3113 enum rtx_code code = GET_CODE (operands[3]);
3114
3115 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3116 it as well as switch operands 1/2 in order to avoid the additional
3117 NOT instruction. */
3118 if (code == NE)
3119 {
3120 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3121 operands[4], operands[5]);
3122 std::swap (operands[1], operands[2]);
3123 }
3124 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3125 mask, operands[3],
3126 operands[4], operands[5]));
3127 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3128 operands[2], mask));
3129 DONE;
3130 })
3131
3132 ;; Patterns for AArch64 SIMD Intrinsics.
3133
3134 ;; Lane extraction with sign extension to general purpose register.
3135 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3136 [(set (match_operand:GPI 0 "register_operand" "=r")
3137 (sign_extend:GPI
3138 (vec_select:<VDQQH:VEL>
3139 (match_operand:VDQQH 1 "register_operand" "w")
3140 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3141 "TARGET_SIMD"
3142 {
3143 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3144 INTVAL (operands[2]));
3145 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3146 }
3147 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3148 )
3149
3150 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3151 [(set (match_operand:GPI 0 "register_operand" "=r")
3152 (zero_extend:GPI
3153 (vec_select:<VDQQH:VEL>
3154 (match_operand:VDQQH 1 "register_operand" "w")
3155 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3156 "TARGET_SIMD"
3157 {
3158 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3159 INTVAL (operands[2]));
3160 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3161 }
3162 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3163 )
3164
3165 ;; Lane extraction of a value, neither sign nor zero extension
3166 ;; is guaranteed so upper bits should be considered undefined.
3167 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3168 (define_insn "aarch64_get_lane<mode>"
3169 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3170 (vec_select:<VEL>
3171 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3172 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3173 "TARGET_SIMD"
3174 {
3175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3176 switch (which_alternative)
3177 {
3178 case 0:
3179 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3180 case 1:
3181 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3182 case 2:
3183 return "st1\\t{%1.<Vetype>}[%2], %0";
3184 default:
3185 gcc_unreachable ();
3186 }
3187 }
3188 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3189 )
3190
3191 (define_insn "load_pair_lanes<mode>"
3192 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3193 (vec_concat:<VDBL>
3194 (match_operand:VDC 1 "memory_operand" "Utq")
3195 (match_operand:VDC 2 "memory_operand" "m")))]
3196 "TARGET_SIMD && !STRICT_ALIGNMENT
3197 && rtx_equal_p (XEXP (operands[2], 0),
3198 plus_constant (Pmode,
3199 XEXP (operands[1], 0),
3200 GET_MODE_SIZE (<MODE>mode)))"
3201 "ldr\\t%q0, %1"
3202 [(set_attr "type" "neon_load1_1reg_q")]
3203 )
3204
3205 (define_insn "store_pair_lanes<mode>"
3206 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3207 (vec_concat:<VDBL>
3208 (match_operand:VDC 1 "register_operand" "w, r")
3209 (match_operand:VDC 2 "register_operand" "w, r")))]
3210 "TARGET_SIMD"
3211 "@
3212 stp\\t%d1, %d2, %y0
3213 stp\\t%x1, %x2, %y0"
3214 [(set_attr "type" "neon_stp, store_16")]
3215 )
3216
3217 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3218 ;; dest vector.
3219
3220 (define_insn "@aarch64_combinez<mode>"
3221 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3222 (vec_concat:<VDBL>
3223 (match_operand:VDC 1 "general_operand" "w,?r,m")
3224 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3225 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3226 "@
3227 mov\\t%0.8b, %1.8b
3228 fmov\t%d0, %1
3229 ldr\\t%d0, %1"
3230 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3231 (set_attr "arch" "simd,fp,simd")]
3232 )
3233
3234 (define_insn "@aarch64_combinez_be<mode>"
3235 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3236 (vec_concat:<VDBL>
3237 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3238 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3239 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3240 "@
3241 mov\\t%0.8b, %1.8b
3242 fmov\t%d0, %1
3243 ldr\\t%d0, %1"
3244 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3245 (set_attr "arch" "simd,fp,simd")]
3246 )
3247
3248 (define_expand "aarch64_combine<mode>"
3249 [(match_operand:<VDBL> 0 "register_operand")
3250 (match_operand:VDC 1 "register_operand")
3251 (match_operand:VDC 2 "register_operand")]
3252 "TARGET_SIMD"
3253 {
3254 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3255
3256 DONE;
3257 }
3258 )
3259
3260 (define_expand "@aarch64_simd_combine<mode>"
3261 [(match_operand:<VDBL> 0 "register_operand")
3262 (match_operand:VDC 1 "register_operand")
3263 (match_operand:VDC 2 "register_operand")]
3264 "TARGET_SIMD"
3265 {
3266 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3267 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3268 DONE;
3269 }
3270 [(set_attr "type" "multiple")]
3271 )
3272
3273 ;; <su><addsub>l<q>.
3274
3275 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3276 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3277 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3278 (match_operand:VQW 1 "register_operand" "w")
3279 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3280 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3281 (match_operand:VQW 2 "register_operand" "w")
3282 (match_dup 3)))))]
3283 "TARGET_SIMD"
3284 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3285 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3286 )
3287
3288 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3289 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3290 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3291 (match_operand:VQW 1 "register_operand" "w")
3292 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3293 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3294 (match_operand:VQW 2 "register_operand" "w")
3295 (match_dup 3)))))]
3296 "TARGET_SIMD"
3297 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3298 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3299 )
3300
3301
3302 (define_expand "aarch64_saddl2<mode>"
3303 [(match_operand:<VWIDE> 0 "register_operand")
3304 (match_operand:VQW 1 "register_operand")
3305 (match_operand:VQW 2 "register_operand")]
3306 "TARGET_SIMD"
3307 {
3308 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3309 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3310 operands[2], p));
3311 DONE;
3312 })
3313
3314 (define_expand "aarch64_uaddl2<mode>"
3315 [(match_operand:<VWIDE> 0 "register_operand")
3316 (match_operand:VQW 1 "register_operand")
3317 (match_operand:VQW 2 "register_operand")]
3318 "TARGET_SIMD"
3319 {
3320 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3321 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3322 operands[2], p));
3323 DONE;
3324 })
3325
3326 (define_expand "aarch64_ssubl2<mode>"
3327 [(match_operand:<VWIDE> 0 "register_operand")
3328 (match_operand:VQW 1 "register_operand")
3329 (match_operand:VQW 2 "register_operand")]
3330 "TARGET_SIMD"
3331 {
3332 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3333 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3334 operands[2], p));
3335 DONE;
3336 })
3337
3338 (define_expand "aarch64_usubl2<mode>"
3339 [(match_operand:<VWIDE> 0 "register_operand")
3340 (match_operand:VQW 1 "register_operand")
3341 (match_operand:VQW 2 "register_operand")]
3342 "TARGET_SIMD"
3343 {
3344 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3345 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3346 operands[2], p));
3347 DONE;
3348 })
3349
3350 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3351 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3352 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3353 (match_operand:VD_BHSI 1 "register_operand" "w"))
3354 (ANY_EXTEND:<VWIDE>
3355 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3356 "TARGET_SIMD"
3357 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3358 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3359 )
3360
3361 ;; <su><addsub>w<q>.
3362
3363 (define_expand "widen_ssum<mode>3"
3364 [(set (match_operand:<VDBLW> 0 "register_operand")
3365 (plus:<VDBLW> (sign_extend:<VDBLW>
3366 (match_operand:VQW 1 "register_operand"))
3367 (match_operand:<VDBLW> 2 "register_operand")))]
3368 "TARGET_SIMD"
3369 {
3370 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3371 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3372
3373 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3374 operands[1], p));
3375 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3376 DONE;
3377 }
3378 )
3379
3380 (define_expand "widen_ssum<mode>3"
3381 [(set (match_operand:<VWIDE> 0 "register_operand")
3382 (plus:<VWIDE> (sign_extend:<VWIDE>
3383 (match_operand:VD_BHSI 1 "register_operand"))
3384 (match_operand:<VWIDE> 2 "register_operand")))]
3385 "TARGET_SIMD"
3386 {
3387 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3388 DONE;
3389 })
3390
3391 (define_expand "widen_usum<mode>3"
3392 [(set (match_operand:<VDBLW> 0 "register_operand")
3393 (plus:<VDBLW> (zero_extend:<VDBLW>
3394 (match_operand:VQW 1 "register_operand"))
3395 (match_operand:<VDBLW> 2 "register_operand")))]
3396 "TARGET_SIMD"
3397 {
3398 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3399 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3400
3401 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3402 operands[1], p));
3403 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3404 DONE;
3405 }
3406 )
3407
3408 (define_expand "widen_usum<mode>3"
3409 [(set (match_operand:<VWIDE> 0 "register_operand")
3410 (plus:<VWIDE> (zero_extend:<VWIDE>
3411 (match_operand:VD_BHSI 1 "register_operand"))
3412 (match_operand:<VWIDE> 2 "register_operand")))]
3413 "TARGET_SIMD"
3414 {
3415 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3416 DONE;
3417 })
3418
3419 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3420 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3421 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3422 (ANY_EXTEND:<VWIDE>
3423 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3424 "TARGET_SIMD"
3425 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3426 [(set_attr "type" "neon_sub_widen")]
3427 )
3428
3429 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3430 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3431 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3432 (ANY_EXTEND:<VWIDE>
3433 (vec_select:<VHALF>
3434 (match_operand:VQW 2 "register_operand" "w")
3435 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3436 "TARGET_SIMD"
3437 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3438 [(set_attr "type" "neon_sub_widen")]
3439 )
3440
3441 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3442 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3443 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3444 (ANY_EXTEND:<VWIDE>
3445 (vec_select:<VHALF>
3446 (match_operand:VQW 2 "register_operand" "w")
3447 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3448 "TARGET_SIMD"
3449 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3450 [(set_attr "type" "neon_sub_widen")]
3451 )
3452
3453 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3454 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3455 (plus:<VWIDE>
3456 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3457 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3458 "TARGET_SIMD"
3459 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3460 [(set_attr "type" "neon_add_widen")]
3461 )
3462
3463 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3464 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3465 (plus:<VWIDE>
3466 (ANY_EXTEND:<VWIDE>
3467 (vec_select:<VHALF>
3468 (match_operand:VQW 2 "register_operand" "w")
3469 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3470 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3471 "TARGET_SIMD"
3472 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3473 [(set_attr "type" "neon_add_widen")]
3474 )
3475
3476 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3477 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3478 (plus:<VWIDE>
3479 (ANY_EXTEND:<VWIDE>
3480 (vec_select:<VHALF>
3481 (match_operand:VQW 2 "register_operand" "w")
3482 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3483 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3484 "TARGET_SIMD"
3485 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3486 [(set_attr "type" "neon_add_widen")]
3487 )
3488
3489 (define_expand "aarch64_saddw2<mode>"
3490 [(match_operand:<VWIDE> 0 "register_operand")
3491 (match_operand:<VWIDE> 1 "register_operand")
3492 (match_operand:VQW 2 "register_operand")]
3493 "TARGET_SIMD"
3494 {
3495 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3496 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3497 operands[2], p));
3498 DONE;
3499 })
3500
3501 (define_expand "aarch64_uaddw2<mode>"
3502 [(match_operand:<VWIDE> 0 "register_operand")
3503 (match_operand:<VWIDE> 1 "register_operand")
3504 (match_operand:VQW 2 "register_operand")]
3505 "TARGET_SIMD"
3506 {
3507 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3509 operands[2], p));
3510 DONE;
3511 })
3512
3513
3514 (define_expand "aarch64_ssubw2<mode>"
3515 [(match_operand:<VWIDE> 0 "register_operand")
3516 (match_operand:<VWIDE> 1 "register_operand")
3517 (match_operand:VQW 2 "register_operand")]
3518 "TARGET_SIMD"
3519 {
3520 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3521 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3522 operands[2], p));
3523 DONE;
3524 })
3525
3526 (define_expand "aarch64_usubw2<mode>"
3527 [(match_operand:<VWIDE> 0 "register_operand")
3528 (match_operand:<VWIDE> 1 "register_operand")
3529 (match_operand:VQW 2 "register_operand")]
3530 "TARGET_SIMD"
3531 {
3532 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3533 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3534 operands[2], p));
3535 DONE;
3536 })
3537
3538 ;; <su><r>h<addsub>.
3539
3540 (define_expand "<u>avg<mode>3_floor"
3541 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3542 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3543 (match_operand:VDQ_BHSI 2 "register_operand")]
3544 HADD))]
3545 "TARGET_SIMD"
3546 )
3547
3548 (define_expand "<u>avg<mode>3_ceil"
3549 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3550 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3551 (match_operand:VDQ_BHSI 2 "register_operand")]
3552 RHADD))]
3553 "TARGET_SIMD"
3554 )
3555
3556 (define_insn "aarch64_<sur>h<addsub><mode>"
3557 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3558 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3559 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3560 HADDSUB))]
3561 "TARGET_SIMD"
3562 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3563 [(set_attr "type" "neon_<addsub>_halve<q>")]
3564 )
3565
3566 ;; <r><addsub>hn<q>.
3567
3568 (define_insn "aarch64_<sur><addsub>hn<mode>"
3569 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3570 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3571 (match_operand:VQN 2 "register_operand" "w")]
3572 ADDSUBHN))]
3573 "TARGET_SIMD"
3574 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3575 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3576 )
3577
3578 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3579 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3580 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3581 (match_operand:VQN 2 "register_operand" "w")
3582 (match_operand:VQN 3 "register_operand" "w")]
3583 ADDSUBHN2))]
3584 "TARGET_SIMD"
3585 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3586 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3587 )
3588
3589 ;; pmul.
3590
3591 (define_insn "aarch64_pmul<mode>"
3592 [(set (match_operand:VB 0 "register_operand" "=w")
3593 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3594 (match_operand:VB 2 "register_operand" "w")]
3595 UNSPEC_PMUL))]
3596 "TARGET_SIMD"
3597 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3598 [(set_attr "type" "neon_mul_<Vetype><q>")]
3599 )
3600
3601 ;; fmulx.
3602
3603 (define_insn "aarch64_fmulx<mode>"
3604 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3605 (unspec:VHSDF_HSDF
3606 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3607 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3608 UNSPEC_FMULX))]
3609 "TARGET_SIMD"
3610 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3611 [(set_attr "type" "neon_fp_mul_<stype>")]
3612 )
3613
3614 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3615
3616 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3617 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3618 (unspec:VDQSF
3619 [(match_operand:VDQSF 1 "register_operand" "w")
3620 (vec_duplicate:VDQSF
3621 (vec_select:<VEL>
3622 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3623 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3624 UNSPEC_FMULX))]
3625 "TARGET_SIMD"
3626 {
3627 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3628 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3629 }
3630 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3631 )
3632
3633 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3634
3635 (define_insn "*aarch64_mulx_elt<mode>"
3636 [(set (match_operand:VDQF 0 "register_operand" "=w")
3637 (unspec:VDQF
3638 [(match_operand:VDQF 1 "register_operand" "w")
3639 (vec_duplicate:VDQF
3640 (vec_select:<VEL>
3641 (match_operand:VDQF 2 "register_operand" "w")
3642 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3643 UNSPEC_FMULX))]
3644 "TARGET_SIMD"
3645 {
3646 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3647 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3648 }
3649 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3650 )
3651
3652 ;; vmulxq_lane
3653
3654 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3655 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3656 (unspec:VHSDF
3657 [(match_operand:VHSDF 1 "register_operand" "w")
3658 (vec_duplicate:VHSDF
3659 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3660 UNSPEC_FMULX))]
3661 "TARGET_SIMD"
3662 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3663 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3664 )
3665
3666 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3667 ;; vmulxd_lane_f64 == vmulx_lane_f64
3668 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3669
3670 (define_insn "*aarch64_vgetfmulx<mode>"
3671 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3672 (unspec:<VEL>
3673 [(match_operand:<VEL> 1 "register_operand" "w")
3674 (vec_select:<VEL>
3675 (match_operand:VDQF 2 "register_operand" "w")
3676 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3677 UNSPEC_FMULX))]
3678 "TARGET_SIMD"
3679 {
3680 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3681 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3682 }
3683 [(set_attr "type" "fmul<Vetype>")]
3684 )
3685 ;; <su>q<addsub>
3686
3687 (define_insn "aarch64_<su_optab><optab><mode>"
3688 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3689 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3690 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3691 "TARGET_SIMD"
3692 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3693 [(set_attr "type" "neon_<optab><q>")]
3694 )
3695
3696 ;; suqadd and usqadd
3697
3698 (define_insn "aarch64_<sur>qadd<mode>"
3699 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3700 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3701 (match_operand:VSDQ_I 2 "register_operand" "w")]
3702 USSUQADD))]
3703 "TARGET_SIMD"
3704 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3705 [(set_attr "type" "neon_qadd<q>")]
3706 )
3707
3708 ;; sqmovun
3709
3710 (define_insn "aarch64_sqmovun<mode>"
3711 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3712 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3713 UNSPEC_SQXTUN))]
3714 "TARGET_SIMD"
3715 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3716 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3717 )
3718
3719 ;; sqmovn and uqmovn
3720
3721 (define_insn "aarch64_<sur>qmovn<mode>"
3722 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3723 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3724 SUQMOVN))]
3725 "TARGET_SIMD"
3726 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3727 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3728 )
3729
3730 ;; <su>q<absneg>
3731
3732 (define_insn "aarch64_s<optab><mode>"
3733 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3734 (UNQOPS:VSDQ_I
3735 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3736 "TARGET_SIMD"
3737 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3738 [(set_attr "type" "neon_<optab><q>")]
3739 )
3740
3741 ;; sq<r>dmulh.
3742
3743 (define_insn "aarch64_sq<r>dmulh<mode>"
3744 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3745 (unspec:VSDQ_HSI
3746 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3747 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3748 VQDMULH))]
3749 "TARGET_SIMD"
3750 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3751 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3752 )
3753
3754 ;; sq<r>dmulh_lane
3755
3756 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3757 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3758 (unspec:VDQHS
3759 [(match_operand:VDQHS 1 "register_operand" "w")
3760 (vec_select:<VEL>
3761 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3762 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3763 VQDMULH))]
3764 "TARGET_SIMD"
3765 "*
3766 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3767 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3768 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3769 )
3770
3771 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3772 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3773 (unspec:VDQHS
3774 [(match_operand:VDQHS 1 "register_operand" "w")
3775 (vec_select:<VEL>
3776 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3777 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3778 VQDMULH))]
3779 "TARGET_SIMD"
3780 "*
3781 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3782 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3783 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3784 )
3785
3786 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3787 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3788 (unspec:SD_HSI
3789 [(match_operand:SD_HSI 1 "register_operand" "w")
3790 (vec_select:<VEL>
3791 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3792 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3793 VQDMULH))]
3794 "TARGET_SIMD"
3795 "*
3796 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3797 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3798 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3799 )
3800
3801 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3802 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3803 (unspec:SD_HSI
3804 [(match_operand:SD_HSI 1 "register_operand" "w")
3805 (vec_select:<VEL>
3806 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3807 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3808 VQDMULH))]
3809 "TARGET_SIMD"
3810 "*
3811 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3812 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3813 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3814 )
3815
3816 ;; sqrdml[as]h.
3817
3818 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3819 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3820 (unspec:VSDQ_HSI
3821 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3822 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3823 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3824 SQRDMLH_AS))]
3825 "TARGET_SIMD_RDMA"
3826 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3827 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3828 )
3829
3830 ;; sqrdml[as]h_lane.
3831
3832 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3833 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3834 (unspec:VDQHS
3835 [(match_operand:VDQHS 1 "register_operand" "0")
3836 (match_operand:VDQHS 2 "register_operand" "w")
3837 (vec_select:<VEL>
3838 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3839 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3840 SQRDMLH_AS))]
3841 "TARGET_SIMD_RDMA"
3842 {
3843 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3844 return
3845 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3846 }
3847 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3848 )
3849
3850 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3851 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3852 (unspec:SD_HSI
3853 [(match_operand:SD_HSI 1 "register_operand" "0")
3854 (match_operand:SD_HSI 2 "register_operand" "w")
3855 (vec_select:<VEL>
3856 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3857 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3858 SQRDMLH_AS))]
3859 "TARGET_SIMD_RDMA"
3860 {
3861 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3862 return
3863 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3864 }
3865 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3866 )
3867
3868 ;; sqrdml[as]h_laneq.
3869
3870 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3871 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3872 (unspec:VDQHS
3873 [(match_operand:VDQHS 1 "register_operand" "0")
3874 (match_operand:VDQHS 2 "register_operand" "w")
3875 (vec_select:<VEL>
3876 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3877 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3878 SQRDMLH_AS))]
3879 "TARGET_SIMD_RDMA"
3880 {
3881 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3882 return
3883 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3884 }
3885 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3886 )
3887
3888 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3889 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3890 (unspec:SD_HSI
3891 [(match_operand:SD_HSI 1 "register_operand" "0")
3892 (match_operand:SD_HSI 2 "register_operand" "w")
3893 (vec_select:<VEL>
3894 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3895 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3896 SQRDMLH_AS))]
3897 "TARGET_SIMD_RDMA"
3898 {
3899 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3900 return
3901 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3902 }
3903 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3904 )
3905
3906 ;; vqdml[sa]l
3907
3908 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3909 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3910 (SBINQOPS:<VWIDE>
3911 (match_operand:<VWIDE> 1 "register_operand" "0")
3912 (ss_ashift:<VWIDE>
3913 (mult:<VWIDE>
3914 (sign_extend:<VWIDE>
3915 (match_operand:VSD_HSI 2 "register_operand" "w"))
3916 (sign_extend:<VWIDE>
3917 (match_operand:VSD_HSI 3 "register_operand" "w")))
3918 (const_int 1))))]
3919 "TARGET_SIMD"
3920 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3921 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3922 )
3923
3924 ;; vqdml[sa]l_lane
3925
3926 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3927 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3928 (SBINQOPS:<VWIDE>
3929 (match_operand:<VWIDE> 1 "register_operand" "0")
3930 (ss_ashift:<VWIDE>
3931 (mult:<VWIDE>
3932 (sign_extend:<VWIDE>
3933 (match_operand:VD_HSI 2 "register_operand" "w"))
3934 (sign_extend:<VWIDE>
3935 (vec_duplicate:VD_HSI
3936 (vec_select:<VEL>
3937 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3938 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3939 ))
3940 (const_int 1))))]
3941 "TARGET_SIMD"
3942 {
3943 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3944 return
3945 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3946 }
3947 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3948 )
3949
3950 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3951 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3952 (SBINQOPS:<VWIDE>
3953 (match_operand:<VWIDE> 1 "register_operand" "0")
3954 (ss_ashift:<VWIDE>
3955 (mult:<VWIDE>
3956 (sign_extend:<VWIDE>
3957 (match_operand:VD_HSI 2 "register_operand" "w"))
3958 (sign_extend:<VWIDE>
3959 (vec_duplicate:VD_HSI
3960 (vec_select:<VEL>
3961 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3962 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3963 ))
3964 (const_int 1))))]
3965 "TARGET_SIMD"
3966 {
3967 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3968 return
3969 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3970 }
3971 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3972 )
3973
3974 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3975 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3976 (SBINQOPS:<VWIDE>
3977 (match_operand:<VWIDE> 1 "register_operand" "0")
3978 (ss_ashift:<VWIDE>
3979 (mult:<VWIDE>
3980 (sign_extend:<VWIDE>
3981 (match_operand:SD_HSI 2 "register_operand" "w"))
3982 (sign_extend:<VWIDE>
3983 (vec_select:<VEL>
3984 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3985 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3986 )
3987 (const_int 1))))]
3988 "TARGET_SIMD"
3989 {
3990 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3991 return
3992 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3993 }
3994 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3995 )
3996
3997 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3998 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3999 (SBINQOPS:<VWIDE>
4000 (match_operand:<VWIDE> 1 "register_operand" "0")
4001 (ss_ashift:<VWIDE>
4002 (mult:<VWIDE>
4003 (sign_extend:<VWIDE>
4004 (match_operand:SD_HSI 2 "register_operand" "w"))
4005 (sign_extend:<VWIDE>
4006 (vec_select:<VEL>
4007 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4008 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4009 )
4010 (const_int 1))))]
4011 "TARGET_SIMD"
4012 {
4013 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4014 return
4015 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4016 }
4017 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4018 )
4019
4020 ;; vqdml[sa]l_n
4021
4022 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4023 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4024 (SBINQOPS:<VWIDE>
4025 (match_operand:<VWIDE> 1 "register_operand" "0")
4026 (ss_ashift:<VWIDE>
4027 (mult:<VWIDE>
4028 (sign_extend:<VWIDE>
4029 (match_operand:VD_HSI 2 "register_operand" "w"))
4030 (sign_extend:<VWIDE>
4031 (vec_duplicate:VD_HSI
4032 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4033 (const_int 1))))]
4034 "TARGET_SIMD"
4035 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4036 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4037 )
4038
4039 ;; sqdml[as]l2
4040
4041 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4042 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4043 (SBINQOPS:<VWIDE>
4044 (match_operand:<VWIDE> 1 "register_operand" "0")
4045 (ss_ashift:<VWIDE>
4046 (mult:<VWIDE>
4047 (sign_extend:<VWIDE>
4048 (vec_select:<VHALF>
4049 (match_operand:VQ_HSI 2 "register_operand" "w")
4050 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4051 (sign_extend:<VWIDE>
4052 (vec_select:<VHALF>
4053 (match_operand:VQ_HSI 3 "register_operand" "w")
4054 (match_dup 4))))
4055 (const_int 1))))]
4056 "TARGET_SIMD"
4057 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4058 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4059 )
4060
4061 (define_expand "aarch64_sqdmlal2<mode>"
4062 [(match_operand:<VWIDE> 0 "register_operand")
4063 (match_operand:<VWIDE> 1 "register_operand")
4064 (match_operand:VQ_HSI 2 "register_operand")
4065 (match_operand:VQ_HSI 3 "register_operand")]
4066 "TARGET_SIMD"
4067 {
4068 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4069 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4070 operands[2], operands[3], p));
4071 DONE;
4072 })
4073
4074 (define_expand "aarch64_sqdmlsl2<mode>"
4075 [(match_operand:<VWIDE> 0 "register_operand")
4076 (match_operand:<VWIDE> 1 "register_operand")
4077 (match_operand:VQ_HSI 2 "register_operand")
4078 (match_operand:VQ_HSI 3 "register_operand")]
4079 "TARGET_SIMD"
4080 {
4081 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4082 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4083 operands[2], operands[3], p));
4084 DONE;
4085 })
4086
4087 ;; vqdml[sa]l2_lane
4088
4089 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4090 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4091 (SBINQOPS:<VWIDE>
4092 (match_operand:<VWIDE> 1 "register_operand" "0")
4093 (ss_ashift:<VWIDE>
4094 (mult:<VWIDE>
4095 (sign_extend:<VWIDE>
4096 (vec_select:<VHALF>
4097 (match_operand:VQ_HSI 2 "register_operand" "w")
4098 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4099 (sign_extend:<VWIDE>
4100 (vec_duplicate:<VHALF>
4101 (vec_select:<VEL>
4102 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4103 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4104 ))))
4105 (const_int 1))))]
4106 "TARGET_SIMD"
4107 {
4108 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4109 return
4110 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4111 }
4112 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4113 )
4114
4115 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4116 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4117 (SBINQOPS:<VWIDE>
4118 (match_operand:<VWIDE> 1 "register_operand" "0")
4119 (ss_ashift:<VWIDE>
4120 (mult:<VWIDE>
4121 (sign_extend:<VWIDE>
4122 (vec_select:<VHALF>
4123 (match_operand:VQ_HSI 2 "register_operand" "w")
4124 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4125 (sign_extend:<VWIDE>
4126 (vec_duplicate:<VHALF>
4127 (vec_select:<VEL>
4128 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4129 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4130 ))))
4131 (const_int 1))))]
4132 "TARGET_SIMD"
4133 {
4134 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4135 return
4136 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4137 }
4138 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4139 )
4140
4141 (define_expand "aarch64_sqdmlal2_lane<mode>"
4142 [(match_operand:<VWIDE> 0 "register_operand")
4143 (match_operand:<VWIDE> 1 "register_operand")
4144 (match_operand:VQ_HSI 2 "register_operand")
4145 (match_operand:<VCOND> 3 "register_operand")
4146 (match_operand:SI 4 "immediate_operand")]
4147 "TARGET_SIMD"
4148 {
4149 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4150 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4151 operands[2], operands[3],
4152 operands[4], p));
4153 DONE;
4154 })
4155
4156 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4157 [(match_operand:<VWIDE> 0 "register_operand")
4158 (match_operand:<VWIDE> 1 "register_operand")
4159 (match_operand:VQ_HSI 2 "register_operand")
4160 (match_operand:<VCONQ> 3 "register_operand")
4161 (match_operand:SI 4 "immediate_operand")]
4162 "TARGET_SIMD"
4163 {
4164 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4165 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4166 operands[2], operands[3],
4167 operands[4], p));
4168 DONE;
4169 })
4170
4171 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4172 [(match_operand:<VWIDE> 0 "register_operand")
4173 (match_operand:<VWIDE> 1 "register_operand")
4174 (match_operand:VQ_HSI 2 "register_operand")
4175 (match_operand:<VCOND> 3 "register_operand")
4176 (match_operand:SI 4 "immediate_operand")]
4177 "TARGET_SIMD"
4178 {
4179 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4180 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4181 operands[2], operands[3],
4182 operands[4], p));
4183 DONE;
4184 })
4185
4186 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4187 [(match_operand:<VWIDE> 0 "register_operand")
4188 (match_operand:<VWIDE> 1 "register_operand")
4189 (match_operand:VQ_HSI 2 "register_operand")
4190 (match_operand:<VCONQ> 3 "register_operand")
4191 (match_operand:SI 4 "immediate_operand")]
4192 "TARGET_SIMD"
4193 {
4194 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4195 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4196 operands[2], operands[3],
4197 operands[4], p));
4198 DONE;
4199 })
4200
4201 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4202 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4203 (SBINQOPS:<VWIDE>
4204 (match_operand:<VWIDE> 1 "register_operand" "0")
4205 (ss_ashift:<VWIDE>
4206 (mult:<VWIDE>
4207 (sign_extend:<VWIDE>
4208 (vec_select:<VHALF>
4209 (match_operand:VQ_HSI 2 "register_operand" "w")
4210 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4211 (sign_extend:<VWIDE>
4212 (vec_duplicate:<VHALF>
4213 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4214 (const_int 1))))]
4215 "TARGET_SIMD"
4216 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4217 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4218 )
4219
4220 (define_expand "aarch64_sqdmlal2_n<mode>"
4221 [(match_operand:<VWIDE> 0 "register_operand")
4222 (match_operand:<VWIDE> 1 "register_operand")
4223 (match_operand:VQ_HSI 2 "register_operand")
4224 (match_operand:<VEL> 3 "register_operand")]
4225 "TARGET_SIMD"
4226 {
4227 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4228 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4229 operands[2], operands[3],
4230 p));
4231 DONE;
4232 })
4233
4234 (define_expand "aarch64_sqdmlsl2_n<mode>"
4235 [(match_operand:<VWIDE> 0 "register_operand")
4236 (match_operand:<VWIDE> 1 "register_operand")
4237 (match_operand:VQ_HSI 2 "register_operand")
4238 (match_operand:<VEL> 3 "register_operand")]
4239 "TARGET_SIMD"
4240 {
4241 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4242 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4243 operands[2], operands[3],
4244 p));
4245 DONE;
4246 })
4247
4248 ;; vqdmull
4249
4250 (define_insn "aarch64_sqdmull<mode>"
4251 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4252 (ss_ashift:<VWIDE>
4253 (mult:<VWIDE>
4254 (sign_extend:<VWIDE>
4255 (match_operand:VSD_HSI 1 "register_operand" "w"))
4256 (sign_extend:<VWIDE>
4257 (match_operand:VSD_HSI 2 "register_operand" "w")))
4258 (const_int 1)))]
4259 "TARGET_SIMD"
4260 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4261 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4262 )
4263
4264 ;; vqdmull_lane
4265
4266 (define_insn "aarch64_sqdmull_lane<mode>"
4267 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4268 (ss_ashift:<VWIDE>
4269 (mult:<VWIDE>
4270 (sign_extend:<VWIDE>
4271 (match_operand:VD_HSI 1 "register_operand" "w"))
4272 (sign_extend:<VWIDE>
4273 (vec_duplicate:VD_HSI
4274 (vec_select:<VEL>
4275 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4276 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4277 ))
4278 (const_int 1)))]
4279 "TARGET_SIMD"
4280 {
4281 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4282 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4283 }
4284 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4285 )
4286
4287 (define_insn "aarch64_sqdmull_laneq<mode>"
4288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4289 (ss_ashift:<VWIDE>
4290 (mult:<VWIDE>
4291 (sign_extend:<VWIDE>
4292 (match_operand:VD_HSI 1 "register_operand" "w"))
4293 (sign_extend:<VWIDE>
4294 (vec_duplicate:VD_HSI
4295 (vec_select:<VEL>
4296 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4297 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4298 ))
4299 (const_int 1)))]
4300 "TARGET_SIMD"
4301 {
4302 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4303 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4304 }
4305 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4306 )
4307
4308 (define_insn "aarch64_sqdmull_lane<mode>"
4309 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4310 (ss_ashift:<VWIDE>
4311 (mult:<VWIDE>
4312 (sign_extend:<VWIDE>
4313 (match_operand:SD_HSI 1 "register_operand" "w"))
4314 (sign_extend:<VWIDE>
4315 (vec_select:<VEL>
4316 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4317 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4318 ))
4319 (const_int 1)))]
4320 "TARGET_SIMD"
4321 {
4322 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4323 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4324 }
4325 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4326 )
4327
4328 (define_insn "aarch64_sqdmull_laneq<mode>"
4329 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4330 (ss_ashift:<VWIDE>
4331 (mult:<VWIDE>
4332 (sign_extend:<VWIDE>
4333 (match_operand:SD_HSI 1 "register_operand" "w"))
4334 (sign_extend:<VWIDE>
4335 (vec_select:<VEL>
4336 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4337 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4338 ))
4339 (const_int 1)))]
4340 "TARGET_SIMD"
4341 {
4342 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4343 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4344 }
4345 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4346 )
4347
4348 ;; vqdmull_n
4349
4350 (define_insn "aarch64_sqdmull_n<mode>"
4351 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4352 (ss_ashift:<VWIDE>
4353 (mult:<VWIDE>
4354 (sign_extend:<VWIDE>
4355 (match_operand:VD_HSI 1 "register_operand" "w"))
4356 (sign_extend:<VWIDE>
4357 (vec_duplicate:VD_HSI
4358 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4359 )
4360 (const_int 1)))]
4361 "TARGET_SIMD"
4362 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4363 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4364 )
4365
4366 ;; vqdmull2
4367
4368
4369
4370 (define_insn "aarch64_sqdmull2<mode>_internal"
4371 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4372 (ss_ashift:<VWIDE>
4373 (mult:<VWIDE>
4374 (sign_extend:<VWIDE>
4375 (vec_select:<VHALF>
4376 (match_operand:VQ_HSI 1 "register_operand" "w")
4377 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4378 (sign_extend:<VWIDE>
4379 (vec_select:<VHALF>
4380 (match_operand:VQ_HSI 2 "register_operand" "w")
4381 (match_dup 3)))
4382 )
4383 (const_int 1)))]
4384 "TARGET_SIMD"
4385 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4386 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4387 )
4388
4389 (define_expand "aarch64_sqdmull2<mode>"
4390 [(match_operand:<VWIDE> 0 "register_operand")
4391 (match_operand:VQ_HSI 1 "register_operand")
4392 (match_operand:VQ_HSI 2 "register_operand")]
4393 "TARGET_SIMD"
4394 {
4395 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4396 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4397 operands[2], p));
4398 DONE;
4399 })
4400
4401 ;; vqdmull2_lane
4402
4403 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4404 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4405 (ss_ashift:<VWIDE>
4406 (mult:<VWIDE>
4407 (sign_extend:<VWIDE>
4408 (vec_select:<VHALF>
4409 (match_operand:VQ_HSI 1 "register_operand" "w")
4410 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4411 (sign_extend:<VWIDE>
4412 (vec_duplicate:<VHALF>
4413 (vec_select:<VEL>
4414 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4415 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4416 ))
4417 (const_int 1)))]
4418 "TARGET_SIMD"
4419 {
4420 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4421 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4422 }
4423 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4424 )
4425
4426 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4427 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4428 (ss_ashift:<VWIDE>
4429 (mult:<VWIDE>
4430 (sign_extend:<VWIDE>
4431 (vec_select:<VHALF>
4432 (match_operand:VQ_HSI 1 "register_operand" "w")
4433 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4434 (sign_extend:<VWIDE>
4435 (vec_duplicate:<VHALF>
4436 (vec_select:<VEL>
4437 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4438 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4439 ))
4440 (const_int 1)))]
4441 "TARGET_SIMD"
4442 {
4443 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4444 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4445 }
4446 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4447 )
4448
4449 (define_expand "aarch64_sqdmull2_lane<mode>"
4450 [(match_operand:<VWIDE> 0 "register_operand")
4451 (match_operand:VQ_HSI 1 "register_operand")
4452 (match_operand:<VCOND> 2 "register_operand")
4453 (match_operand:SI 3 "immediate_operand")]
4454 "TARGET_SIMD"
4455 {
4456 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4457 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4458 operands[2], operands[3],
4459 p));
4460 DONE;
4461 })
4462
4463 (define_expand "aarch64_sqdmull2_laneq<mode>"
4464 [(match_operand:<VWIDE> 0 "register_operand")
4465 (match_operand:VQ_HSI 1 "register_operand")
4466 (match_operand:<VCONQ> 2 "register_operand")
4467 (match_operand:SI 3 "immediate_operand")]
4468 "TARGET_SIMD"
4469 {
4470 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4471 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4472 operands[2], operands[3],
4473 p));
4474 DONE;
4475 })
4476
4477 ;; vqdmull2_n
4478
4479 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4480 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4481 (ss_ashift:<VWIDE>
4482 (mult:<VWIDE>
4483 (sign_extend:<VWIDE>
4484 (vec_select:<VHALF>
4485 (match_operand:VQ_HSI 1 "register_operand" "w")
4486 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4487 (sign_extend:<VWIDE>
4488 (vec_duplicate:<VHALF>
4489 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4490 )
4491 (const_int 1)))]
4492 "TARGET_SIMD"
4493 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4494 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4495 )
4496
4497 (define_expand "aarch64_sqdmull2_n<mode>"
4498 [(match_operand:<VWIDE> 0 "register_operand")
4499 (match_operand:VQ_HSI 1 "register_operand")
4500 (match_operand:<VEL> 2 "register_operand")]
4501 "TARGET_SIMD"
4502 {
4503 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4504 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4505 operands[2], p));
4506 DONE;
4507 })
4508
4509 ;; vshl
4510
4511 (define_insn "aarch64_<sur>shl<mode>"
4512 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4513 (unspec:VSDQ_I_DI
4514 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4515 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4516 VSHL))]
4517 "TARGET_SIMD"
4518 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4519 [(set_attr "type" "neon_shift_reg<q>")]
4520 )
4521
4522
4523 ;; vqshl
4524
4525 (define_insn "aarch64_<sur>q<r>shl<mode>"
4526 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4527 (unspec:VSDQ_I
4528 [(match_operand:VSDQ_I 1 "register_operand" "w")
4529 (match_operand:VSDQ_I 2 "register_operand" "w")]
4530 VQSHL))]
4531 "TARGET_SIMD"
4532 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4533 [(set_attr "type" "neon_sat_shift_reg<q>")]
4534 )
4535
4536 ;; vshll_n
4537
4538 (define_insn "aarch64_<sur>shll_n<mode>"
4539 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4540 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4541 (match_operand:SI 2
4542 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4543 VSHLL))]
4544 "TARGET_SIMD"
4545 {
4546 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4547 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4548 else
4549 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4550 }
4551 [(set_attr "type" "neon_shift_imm_long")]
4552 )
4553
4554 ;; vshll_high_n
4555
4556 (define_insn "aarch64_<sur>shll2_n<mode>"
4557 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4558 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4559 (match_operand:SI 2 "immediate_operand" "i")]
4560 VSHLL))]
4561 "TARGET_SIMD"
4562 {
4563 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4564 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4565 else
4566 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4567 }
4568 [(set_attr "type" "neon_shift_imm_long")]
4569 )
4570
4571 ;; vrshr_n
4572
4573 (define_insn "aarch64_<sur>shr_n<mode>"
4574 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4575 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4576 (match_operand:SI 2
4577 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4578 VRSHR_N))]
4579 "TARGET_SIMD"
4580 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4581 [(set_attr "type" "neon_sat_shift_imm<q>")]
4582 )
4583
4584 ;; v(r)sra_n
4585
4586 (define_insn "aarch64_<sur>sra_n<mode>"
4587 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4588 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4589 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4590 (match_operand:SI 3
4591 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4592 VSRA))]
4593 "TARGET_SIMD"
4594 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4595 [(set_attr "type" "neon_shift_acc<q>")]
4596 )
4597
4598 ;; vs<lr>i_n
4599
4600 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4601 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4602 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4603 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4604 (match_operand:SI 3
4605 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4606 VSLRI))]
4607 "TARGET_SIMD"
4608 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4609 [(set_attr "type" "neon_shift_imm<q>")]
4610 )
4611
4612 ;; vqshl(u)
4613
4614 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4615 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4616 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4617 (match_operand:SI 2
4618 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4619 VQSHL_N))]
4620 "TARGET_SIMD"
4621 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4622 [(set_attr "type" "neon_sat_shift_imm<q>")]
4623 )
4624
4625
4626 ;; vq(r)shr(u)n_n
4627
4628 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4629 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4630 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4631 (match_operand:SI 2
4632 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4633 VQSHRN_N))]
4634 "TARGET_SIMD"
4635 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4636 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4637 )
4638
4639
4640 ;; cm(eq|ge|gt|lt|le)
4641 ;; Note, we have constraints for Dz and Z as different expanders
4642 ;; have different ideas of what should be passed to this pattern.
4643
4644 (define_insn "aarch64_cm<optab><mode>"
4645 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4646 (neg:<V_INT_EQUIV>
4647 (COMPARISONS:<V_INT_EQUIV>
4648 (match_operand:VDQ_I 1 "register_operand" "w,w")
4649 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4650 )))]
4651 "TARGET_SIMD"
4652 "@
4653 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4654 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4655 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4656 )
4657
4658 (define_insn_and_split "aarch64_cm<optab>di"
4659 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4660 (neg:DI
4661 (COMPARISONS:DI
4662 (match_operand:DI 1 "register_operand" "w,w,r")
4663 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4664 )))
4665 (clobber (reg:CC CC_REGNUM))]
4666 "TARGET_SIMD"
4667 "#"
4668 "&& reload_completed"
4669 [(set (match_operand:DI 0 "register_operand")
4670 (neg:DI
4671 (COMPARISONS:DI
4672 (match_operand:DI 1 "register_operand")
4673 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4674 )))]
4675 {
4676 /* If we are in the general purpose register file,
4677 we split to a sequence of comparison and store. */
4678 if (GP_REGNUM_P (REGNO (operands[0]))
4679 && GP_REGNUM_P (REGNO (operands[1])))
4680 {
4681 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4682 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4683 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4684 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4685 DONE;
4686 }
4687 /* Otherwise, we expand to a similar pattern which does not
4688 clobber CC_REGNUM. */
4689 }
4690 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4691 )
4692
4693 (define_insn "*aarch64_cm<optab>di"
4694 [(set (match_operand:DI 0 "register_operand" "=w,w")
4695 (neg:DI
4696 (COMPARISONS:DI
4697 (match_operand:DI 1 "register_operand" "w,w")
4698 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4699 )))]
4700 "TARGET_SIMD && reload_completed"
4701 "@
4702 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4703 cm<optab>\t%d0, %d1, #0"
4704 [(set_attr "type" "neon_compare, neon_compare_zero")]
4705 )
4706
4707 ;; cm(hs|hi)
4708
4709 (define_insn "aarch64_cm<optab><mode>"
4710 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4711 (neg:<V_INT_EQUIV>
4712 (UCOMPARISONS:<V_INT_EQUIV>
4713 (match_operand:VDQ_I 1 "register_operand" "w")
4714 (match_operand:VDQ_I 2 "register_operand" "w")
4715 )))]
4716 "TARGET_SIMD"
4717 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4718 [(set_attr "type" "neon_compare<q>")]
4719 )
4720
4721 (define_insn_and_split "aarch64_cm<optab>di"
4722 [(set (match_operand:DI 0 "register_operand" "=w,r")
4723 (neg:DI
4724 (UCOMPARISONS:DI
4725 (match_operand:DI 1 "register_operand" "w,r")
4726 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4727 )))
4728 (clobber (reg:CC CC_REGNUM))]
4729 "TARGET_SIMD"
4730 "#"
4731 "&& reload_completed"
4732 [(set (match_operand:DI 0 "register_operand")
4733 (neg:DI
4734 (UCOMPARISONS:DI
4735 (match_operand:DI 1 "register_operand")
4736 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4737 )))]
4738 {
4739 /* If we are in the general purpose register file,
4740 we split to a sequence of comparison and store. */
4741 if (GP_REGNUM_P (REGNO (operands[0]))
4742 && GP_REGNUM_P (REGNO (operands[1])))
4743 {
4744 machine_mode mode = CCmode;
4745 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4746 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4747 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4748 DONE;
4749 }
4750 /* Otherwise, we expand to a similar pattern which does not
4751 clobber CC_REGNUM. */
4752 }
4753 [(set_attr "type" "neon_compare,multiple")]
4754 )
4755
4756 (define_insn "*aarch64_cm<optab>di"
4757 [(set (match_operand:DI 0 "register_operand" "=w")
4758 (neg:DI
4759 (UCOMPARISONS:DI
4760 (match_operand:DI 1 "register_operand" "w")
4761 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4762 )))]
4763 "TARGET_SIMD && reload_completed"
4764 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4765 [(set_attr "type" "neon_compare")]
4766 )
4767
4768 ;; cmtst
4769
4770 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4771 ;; we don't have any insns using ne, and aarch64_vcond outputs
4772 ;; not (neg (eq (and x y) 0))
4773 ;; which is rewritten by simplify_rtx as
4774 ;; plus (eq (and x y) 0) -1.
4775
4776 (define_insn "aarch64_cmtst<mode>"
4777 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4778 (plus:<V_INT_EQUIV>
4779 (eq:<V_INT_EQUIV>
4780 (and:VDQ_I
4781 (match_operand:VDQ_I 1 "register_operand" "w")
4782 (match_operand:VDQ_I 2 "register_operand" "w"))
4783 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4784 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4785 ]
4786 "TARGET_SIMD"
4787 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4788 [(set_attr "type" "neon_tst<q>")]
4789 )
4790
4791 (define_insn_and_split "aarch64_cmtstdi"
4792 [(set (match_operand:DI 0 "register_operand" "=w,r")
4793 (neg:DI
4794 (ne:DI
4795 (and:DI
4796 (match_operand:DI 1 "register_operand" "w,r")
4797 (match_operand:DI 2 "register_operand" "w,r"))
4798 (const_int 0))))
4799 (clobber (reg:CC CC_REGNUM))]
4800 "TARGET_SIMD"
4801 "#"
4802 "&& reload_completed"
4803 [(set (match_operand:DI 0 "register_operand")
4804 (neg:DI
4805 (ne:DI
4806 (and:DI
4807 (match_operand:DI 1 "register_operand")
4808 (match_operand:DI 2 "register_operand"))
4809 (const_int 0))))]
4810 {
4811 /* If we are in the general purpose register file,
4812 we split to a sequence of comparison and store. */
4813 if (GP_REGNUM_P (REGNO (operands[0]))
4814 && GP_REGNUM_P (REGNO (operands[1])))
4815 {
4816 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4817 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4818 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4819 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4820 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4821 DONE;
4822 }
4823 /* Otherwise, we expand to a similar pattern which does not
4824 clobber CC_REGNUM. */
4825 }
4826 [(set_attr "type" "neon_tst,multiple")]
4827 )
4828
4829 (define_insn "*aarch64_cmtstdi"
4830 [(set (match_operand:DI 0 "register_operand" "=w")
4831 (neg:DI
4832 (ne:DI
4833 (and:DI
4834 (match_operand:DI 1 "register_operand" "w")
4835 (match_operand:DI 2 "register_operand" "w"))
4836 (const_int 0))))]
4837 "TARGET_SIMD"
4838 "cmtst\t%d0, %d1, %d2"
4839 [(set_attr "type" "neon_tst")]
4840 )
4841
4842 ;; fcm(eq|ge|gt|le|lt)
4843
4844 (define_insn "aarch64_cm<optab><mode>"
4845 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4846 (neg:<V_INT_EQUIV>
4847 (COMPARISONS:<V_INT_EQUIV>
4848 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4849 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4850 )))]
4851 "TARGET_SIMD"
4852 "@
4853 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4854 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4855 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4856 )
4857
4858 ;; fac(ge|gt)
4859 ;; Note we can also handle what would be fac(le|lt) by
4860 ;; generating fac(ge|gt).
4861
4862 (define_insn "aarch64_fac<optab><mode>"
4863 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4864 (neg:<V_INT_EQUIV>
4865 (FAC_COMPARISONS:<V_INT_EQUIV>
4866 (abs:VHSDF_HSDF
4867 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4868 (abs:VHSDF_HSDF
4869 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4870 )))]
4871 "TARGET_SIMD"
4872 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4873 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4874 )
4875
4876 ;; addp
4877
4878 (define_insn "aarch64_addp<mode>"
4879 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4880 (unspec:VD_BHSI
4881 [(match_operand:VD_BHSI 1 "register_operand" "w")
4882 (match_operand:VD_BHSI 2 "register_operand" "w")]
4883 UNSPEC_ADDP))]
4884 "TARGET_SIMD"
4885 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4886 [(set_attr "type" "neon_reduc_add<q>")]
4887 )
4888
4889 (define_insn "aarch64_addpdi"
4890 [(set (match_operand:DI 0 "register_operand" "=w")
4891 (unspec:DI
4892 [(match_operand:V2DI 1 "register_operand" "w")]
4893 UNSPEC_ADDP))]
4894 "TARGET_SIMD"
4895 "addp\t%d0, %1.2d"
4896 [(set_attr "type" "neon_reduc_add")]
4897 )
4898
4899 ;; sqrt
4900
4901 (define_expand "sqrt<mode>2"
4902 [(set (match_operand:VHSDF 0 "register_operand")
4903 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
4904 "TARGET_SIMD"
4905 {
4906 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4907 DONE;
4908 })
4909
4910 (define_insn "*sqrt<mode>2"
4911 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4912 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4913 "TARGET_SIMD"
4914 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4915 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4916 )
4917
4918 ;; Patterns for vector struct loads and stores.
4919
4920 (define_insn "aarch64_simd_ld2<mode>"
4921 [(set (match_operand:OI 0 "register_operand" "=w")
4922 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4923 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4924 UNSPEC_LD2))]
4925 "TARGET_SIMD"
4926 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4927 [(set_attr "type" "neon_load2_2reg<q>")]
4928 )
4929
4930 (define_insn "aarch64_simd_ld2r<mode>"
4931 [(set (match_operand:OI 0 "register_operand" "=w")
4932 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4933 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4934 UNSPEC_LD2_DUP))]
4935 "TARGET_SIMD"
4936 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4937 [(set_attr "type" "neon_load2_all_lanes<q>")]
4938 )
4939
4940 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4941 [(set (match_operand:OI 0 "register_operand" "=w")
4942 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4943 (match_operand:OI 2 "register_operand" "0")
4944 (match_operand:SI 3 "immediate_operand" "i")
4945 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4946 UNSPEC_LD2_LANE))]
4947 "TARGET_SIMD"
4948 {
4949 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4950 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4951 }
4952 [(set_attr "type" "neon_load2_one_lane")]
4953 )
4954
4955 (define_expand "vec_load_lanesoi<mode>"
4956 [(set (match_operand:OI 0 "register_operand")
4957 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
4958 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4959 UNSPEC_LD2))]
4960 "TARGET_SIMD"
4961 {
4962 if (BYTES_BIG_ENDIAN)
4963 {
4964 rtx tmp = gen_reg_rtx (OImode);
4965 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4966 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4967 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4968 }
4969 else
4970 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4971 DONE;
4972 })
4973
4974 (define_insn "aarch64_simd_st2<mode>"
4975 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4976 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4977 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4978 UNSPEC_ST2))]
4979 "TARGET_SIMD"
4980 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4981 [(set_attr "type" "neon_store2_2reg<q>")]
4982 )
4983
4984 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4985 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4986 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4987 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4988 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4989 (match_operand:SI 2 "immediate_operand" "i")]
4990 UNSPEC_ST2_LANE))]
4991 "TARGET_SIMD"
4992 {
4993 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4994 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4995 }
4996 [(set_attr "type" "neon_store2_one_lane<q>")]
4997 )
4998
4999 (define_expand "vec_store_lanesoi<mode>"
5000 [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5001 (unspec:OI [(match_operand:OI 1 "register_operand")
5002 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5003 UNSPEC_ST2))]
5004 "TARGET_SIMD"
5005 {
5006 if (BYTES_BIG_ENDIAN)
5007 {
5008 rtx tmp = gen_reg_rtx (OImode);
5009 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5010 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5011 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5012 }
5013 else
5014 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5015 DONE;
5016 })
5017
5018 (define_insn "aarch64_simd_ld3<mode>"
5019 [(set (match_operand:CI 0 "register_operand" "=w")
5020 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5021 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5022 UNSPEC_LD3))]
5023 "TARGET_SIMD"
5024 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5025 [(set_attr "type" "neon_load3_3reg<q>")]
5026 )
5027
5028 (define_insn "aarch64_simd_ld3r<mode>"
5029 [(set (match_operand:CI 0 "register_operand" "=w")
5030 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5031 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5032 UNSPEC_LD3_DUP))]
5033 "TARGET_SIMD"
5034 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5035 [(set_attr "type" "neon_load3_all_lanes<q>")]
5036 )
5037
5038 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5039 [(set (match_operand:CI 0 "register_operand" "=w")
5040 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5041 (match_operand:CI 2 "register_operand" "0")
5042 (match_operand:SI 3 "immediate_operand" "i")
5043 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5044 UNSPEC_LD3_LANE))]
5045 "TARGET_SIMD"
5046 {
5047 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5048 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5049 }
5050 [(set_attr "type" "neon_load3_one_lane")]
5051 )
5052
5053 (define_expand "vec_load_lanesci<mode>"
5054 [(set (match_operand:CI 0 "register_operand")
5055 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5056 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5057 UNSPEC_LD3))]
5058 "TARGET_SIMD"
5059 {
5060 if (BYTES_BIG_ENDIAN)
5061 {
5062 rtx tmp = gen_reg_rtx (CImode);
5063 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5064 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5065 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5066 }
5067 else
5068 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5069 DONE;
5070 })
5071
5072 (define_insn "aarch64_simd_st3<mode>"
5073 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5074 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5075 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5076 UNSPEC_ST3))]
5077 "TARGET_SIMD"
5078 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5079 [(set_attr "type" "neon_store3_3reg<q>")]
5080 )
5081
5082 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5083 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5084 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5085 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5086 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5087 (match_operand:SI 2 "immediate_operand" "i")]
5088 UNSPEC_ST3_LANE))]
5089 "TARGET_SIMD"
5090 {
5091 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5092 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5093 }
5094 [(set_attr "type" "neon_store3_one_lane<q>")]
5095 )
5096
5097 (define_expand "vec_store_lanesci<mode>"
5098 [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5099 (unspec:CI [(match_operand:CI 1 "register_operand")
5100 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5101 UNSPEC_ST3))]
5102 "TARGET_SIMD"
5103 {
5104 if (BYTES_BIG_ENDIAN)
5105 {
5106 rtx tmp = gen_reg_rtx (CImode);
5107 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5108 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5109 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5110 }
5111 else
5112 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5113 DONE;
5114 })
5115
5116 (define_insn "aarch64_simd_ld4<mode>"
5117 [(set (match_operand:XI 0 "register_operand" "=w")
5118 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5119 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5120 UNSPEC_LD4))]
5121 "TARGET_SIMD"
5122 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5123 [(set_attr "type" "neon_load4_4reg<q>")]
5124 )
5125
5126 (define_insn "aarch64_simd_ld4r<mode>"
5127 [(set (match_operand:XI 0 "register_operand" "=w")
5128 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5129 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5130 UNSPEC_LD4_DUP))]
5131 "TARGET_SIMD"
5132 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5133 [(set_attr "type" "neon_load4_all_lanes<q>")]
5134 )
5135
5136 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5137 [(set (match_operand:XI 0 "register_operand" "=w")
5138 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5139 (match_operand:XI 2 "register_operand" "0")
5140 (match_operand:SI 3 "immediate_operand" "i")
5141 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5142 UNSPEC_LD4_LANE))]
5143 "TARGET_SIMD"
5144 {
5145 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5146 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5147 }
5148 [(set_attr "type" "neon_load4_one_lane")]
5149 )
5150
5151 (define_expand "vec_load_lanesxi<mode>"
5152 [(set (match_operand:XI 0 "register_operand")
5153 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5154 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5155 UNSPEC_LD4))]
5156 "TARGET_SIMD"
5157 {
5158 if (BYTES_BIG_ENDIAN)
5159 {
5160 rtx tmp = gen_reg_rtx (XImode);
5161 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5162 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5163 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5164 }
5165 else
5166 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5167 DONE;
5168 })
5169
5170 (define_insn "aarch64_simd_st4<mode>"
5171 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5172 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5173 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5174 UNSPEC_ST4))]
5175 "TARGET_SIMD"
5176 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5177 [(set_attr "type" "neon_store4_4reg<q>")]
5178 )
5179
5180 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5181 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5182 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5183 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5184 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5185 (match_operand:SI 2 "immediate_operand" "i")]
5186 UNSPEC_ST4_LANE))]
5187 "TARGET_SIMD"
5188 {
5189 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5190 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5191 }
5192 [(set_attr "type" "neon_store4_one_lane<q>")]
5193 )
5194
5195 (define_expand "vec_store_lanesxi<mode>"
5196 [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5197 (unspec:XI [(match_operand:XI 1 "register_operand")
5198 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5199 UNSPEC_ST4))]
5200 "TARGET_SIMD"
5201 {
5202 if (BYTES_BIG_ENDIAN)
5203 {
5204 rtx tmp = gen_reg_rtx (XImode);
5205 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5206 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5207 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5208 }
5209 else
5210 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5211 DONE;
5212 })
5213
5214 (define_insn_and_split "aarch64_rev_reglist<mode>"
5215 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5216 (unspec:VSTRUCT
5217 [(match_operand:VSTRUCT 1 "register_operand" "w")
5218 (match_operand:V16QI 2 "register_operand" "w")]
5219 UNSPEC_REV_REGLIST))]
5220 "TARGET_SIMD"
5221 "#"
5222 "&& reload_completed"
5223 [(const_int 0)]
5224 {
5225 int i;
5226 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5227 for (i = 0; i < nregs; i++)
5228 {
5229 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5230 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5231 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5232 }
5233 DONE;
5234 }
5235 [(set_attr "type" "neon_tbl1_q")
5236 (set_attr "length" "<insn_count>")]
5237 )
5238
5239 ;; Reload patterns for AdvSIMD register list operands.
5240
5241 (define_expand "mov<mode>"
5242 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5243 (match_operand:VSTRUCT 1 "general_operand"))]
5244 "TARGET_SIMD"
5245 {
5246 if (can_create_pseudo_p ())
5247 {
5248 if (GET_CODE (operands[0]) != REG)
5249 operands[1] = force_reg (<MODE>mode, operands[1]);
5250 }
5251 })
5252
5253
5254 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5255 [(match_operand:CI 0 "register_operand")
5256 (match_operand:DI 1 "register_operand")
5257 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5258 "TARGET_SIMD"
5259 {
5260 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5261 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5262 DONE;
5263 })
5264
5265 (define_insn "aarch64_ld1_x3_<mode>"
5266 [(set (match_operand:CI 0 "register_operand" "=w")
5267 (unspec:CI
5268 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5269 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5270 "TARGET_SIMD"
5271 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5272 [(set_attr "type" "neon_load1_3reg<q>")]
5273 )
5274
5275 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5276 [(match_operand:DI 0 "register_operand")
5277 (match_operand:OI 1 "register_operand")
5278 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5279 "TARGET_SIMD"
5280 {
5281 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5282 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5283 DONE;
5284 })
5285
5286 (define_insn "aarch64_st1_x2_<mode>"
5287 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5288 (unspec:OI
5289 [(match_operand:OI 1 "register_operand" "w")
5290 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5291 "TARGET_SIMD"
5292 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5293 [(set_attr "type" "neon_store1_2reg<q>")]
5294 )
5295
5296 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5297 [(match_operand:DI 0 "register_operand")
5298 (match_operand:CI 1 "register_operand")
5299 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5300 "TARGET_SIMD"
5301 {
5302 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5303 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5304 DONE;
5305 })
5306
5307 (define_insn "aarch64_st1_x3_<mode>"
5308 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5309 (unspec:CI
5310 [(match_operand:CI 1 "register_operand" "w")
5311 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5312 "TARGET_SIMD"
5313 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5314 [(set_attr "type" "neon_store1_3reg<q>")]
5315 )
5316
5317 (define_insn "*aarch64_mov<mode>"
5318 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5319 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5320 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5321 && (register_operand (operands[0], <MODE>mode)
5322 || register_operand (operands[1], <MODE>mode))"
5323 "@
5324 #
5325 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5326 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5327 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5328 neon_load<nregs>_<nregs>reg_q")
5329 (set_attr "length" "<insn_count>,4,4")]
5330 )
5331
5332 (define_insn "aarch64_be_ld1<mode>"
5333 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5334 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5335 "aarch64_simd_struct_operand" "Utv")]
5336 UNSPEC_LD1))]
5337 "TARGET_SIMD"
5338 "ld1\\t{%0<Vmtype>}, %1"
5339 [(set_attr "type" "neon_load1_1reg<q>")]
5340 )
5341
5342 (define_insn "aarch64_be_st1<mode>"
5343 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5344 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5345 UNSPEC_ST1))]
5346 "TARGET_SIMD"
5347 "st1\\t{%1<Vmtype>}, %0"
5348 [(set_attr "type" "neon_store1_1reg<q>")]
5349 )
5350
5351 (define_insn "*aarch64_be_movoi"
5352 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5353 (match_operand:OI 1 "general_operand" " w,w,m"))]
5354 "TARGET_SIMD && BYTES_BIG_ENDIAN
5355 && (register_operand (operands[0], OImode)
5356 || register_operand (operands[1], OImode))"
5357 "@
5358 #
5359 stp\\t%q1, %R1, %0
5360 ldp\\t%q0, %R0, %1"
5361 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5362 (set_attr "length" "8,4,4")]
5363 )
5364
5365 (define_insn "*aarch64_be_movci"
5366 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5367 (match_operand:CI 1 "general_operand" " w,w,o"))]
5368 "TARGET_SIMD && BYTES_BIG_ENDIAN
5369 && (register_operand (operands[0], CImode)
5370 || register_operand (operands[1], CImode))"
5371 "#"
5372 [(set_attr "type" "multiple")
5373 (set_attr "length" "12,4,4")]
5374 )
5375
5376 (define_insn "*aarch64_be_movxi"
5377 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5378 (match_operand:XI 1 "general_operand" " w,w,o"))]
5379 "TARGET_SIMD && BYTES_BIG_ENDIAN
5380 && (register_operand (operands[0], XImode)
5381 || register_operand (operands[1], XImode))"
5382 "#"
5383 [(set_attr "type" "multiple")
5384 (set_attr "length" "16,4,4")]
5385 )
5386
5387 (define_split
5388 [(set (match_operand:OI 0 "register_operand")
5389 (match_operand:OI 1 "register_operand"))]
5390 "TARGET_SIMD && reload_completed"
5391 [(const_int 0)]
5392 {
5393 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5394 DONE;
5395 })
5396
5397 (define_split
5398 [(set (match_operand:CI 0 "nonimmediate_operand")
5399 (match_operand:CI 1 "general_operand"))]
5400 "TARGET_SIMD && reload_completed"
5401 [(const_int 0)]
5402 {
5403 if (register_operand (operands[0], CImode)
5404 && register_operand (operands[1], CImode))
5405 {
5406 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5407 DONE;
5408 }
5409 else if (BYTES_BIG_ENDIAN)
5410 {
5411 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5412 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5413 emit_move_insn (gen_lowpart (V16QImode,
5414 simplify_gen_subreg (TImode, operands[0],
5415 CImode, 32)),
5416 gen_lowpart (V16QImode,
5417 simplify_gen_subreg (TImode, operands[1],
5418 CImode, 32)));
5419 DONE;
5420 }
5421 else
5422 FAIL;
5423 })
5424
5425 (define_split
5426 [(set (match_operand:XI 0 "nonimmediate_operand")
5427 (match_operand:XI 1 "general_operand"))]
5428 "TARGET_SIMD && reload_completed"
5429 [(const_int 0)]
5430 {
5431 if (register_operand (operands[0], XImode)
5432 && register_operand (operands[1], XImode))
5433 {
5434 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5435 DONE;
5436 }
5437 else if (BYTES_BIG_ENDIAN)
5438 {
5439 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5440 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5441 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5442 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5443 DONE;
5444 }
5445 else
5446 FAIL;
5447 })
5448
5449 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5450 [(match_operand:VSTRUCT 0 "register_operand")
5451 (match_operand:DI 1 "register_operand")
5452 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5453 "TARGET_SIMD"
5454 {
5455 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5456 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5457 * <VSTRUCT:nregs>);
5458
5459 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5460 mem));
5461 DONE;
5462 })
5463
5464 (define_insn "aarch64_ld2<mode>_dreg"
5465 [(set (match_operand:OI 0 "register_operand" "=w")
5466 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5467 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5468 UNSPEC_LD2_DREG))]
5469 "TARGET_SIMD"
5470 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5471 [(set_attr "type" "neon_load2_2reg<q>")]
5472 )
5473
5474 (define_insn "aarch64_ld2<mode>_dreg"
5475 [(set (match_operand:OI 0 "register_operand" "=w")
5476 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5477 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5478 UNSPEC_LD2_DREG))]
5479 "TARGET_SIMD"
5480 "ld1\\t{%S0.1d - %T0.1d}, %1"
5481 [(set_attr "type" "neon_load1_2reg<q>")]
5482 )
5483
5484 (define_insn "aarch64_ld3<mode>_dreg"
5485 [(set (match_operand:CI 0 "register_operand" "=w")
5486 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5487 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5488 UNSPEC_LD3_DREG))]
5489 "TARGET_SIMD"
5490 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5491 [(set_attr "type" "neon_load3_3reg<q>")]
5492 )
5493
5494 (define_insn "aarch64_ld3<mode>_dreg"
5495 [(set (match_operand:CI 0 "register_operand" "=w")
5496 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5497 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5498 UNSPEC_LD3_DREG))]
5499 "TARGET_SIMD"
5500 "ld1\\t{%S0.1d - %U0.1d}, %1"
5501 [(set_attr "type" "neon_load1_3reg<q>")]
5502 )
5503
5504 (define_insn "aarch64_ld4<mode>_dreg"
5505 [(set (match_operand:XI 0 "register_operand" "=w")
5506 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5507 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5508 UNSPEC_LD4_DREG))]
5509 "TARGET_SIMD"
5510 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5511 [(set_attr "type" "neon_load4_4reg<q>")]
5512 )
5513
5514 (define_insn "aarch64_ld4<mode>_dreg"
5515 [(set (match_operand:XI 0 "register_operand" "=w")
5516 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5517 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5518 UNSPEC_LD4_DREG))]
5519 "TARGET_SIMD"
5520 "ld1\\t{%S0.1d - %V0.1d}, %1"
5521 [(set_attr "type" "neon_load1_4reg<q>")]
5522 )
5523
5524 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5525 [(match_operand:VSTRUCT 0 "register_operand")
5526 (match_operand:DI 1 "register_operand")
5527 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5528 "TARGET_SIMD"
5529 {
5530 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5531 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5532
5533 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5534 DONE;
5535 })
5536
5537 (define_expand "aarch64_ld1<VALL_F16:mode>"
5538 [(match_operand:VALL_F16 0 "register_operand")
5539 (match_operand:DI 1 "register_operand")]
5540 "TARGET_SIMD"
5541 {
5542 machine_mode mode = <VALL_F16:MODE>mode;
5543 rtx mem = gen_rtx_MEM (mode, operands[1]);
5544
5545 if (BYTES_BIG_ENDIAN)
5546 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5547 else
5548 emit_move_insn (operands[0], mem);
5549 DONE;
5550 })
5551
5552 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5553 [(match_operand:VSTRUCT 0 "register_operand")
5554 (match_operand:DI 1 "register_operand")
5555 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5556 "TARGET_SIMD"
5557 {
5558 machine_mode mode = <VSTRUCT:MODE>mode;
5559 rtx mem = gen_rtx_MEM (mode, operands[1]);
5560
5561 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5562 DONE;
5563 })
5564
5565 (define_expand "aarch64_ld1x2<VQ:mode>"
5566 [(match_operand:OI 0 "register_operand")
5567 (match_operand:DI 1 "register_operand")
5568 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569 "TARGET_SIMD"
5570 {
5571 machine_mode mode = OImode;
5572 rtx mem = gen_rtx_MEM (mode, operands[1]);
5573
5574 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5575 DONE;
5576 })
5577
5578 (define_expand "aarch64_ld1x2<VDC:mode>"
5579 [(match_operand:OI 0 "register_operand")
5580 (match_operand:DI 1 "register_operand")
5581 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5582 "TARGET_SIMD"
5583 {
5584 machine_mode mode = OImode;
5585 rtx mem = gen_rtx_MEM (mode, operands[1]);
5586
5587 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5588 DONE;
5589 })
5590
5591
5592 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5593 [(match_operand:VSTRUCT 0 "register_operand")
5594 (match_operand:DI 1 "register_operand")
5595 (match_operand:VSTRUCT 2 "register_operand")
5596 (match_operand:SI 3 "immediate_operand")
5597 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5598 "TARGET_SIMD"
5599 {
5600 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5601 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5602 * <VSTRUCT:nregs>);
5603
5604 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5605 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5606 operands[0], mem, operands[2], operands[3]));
5607 DONE;
5608 })
5609
5610 ;; Expanders for builtins to extract vector registers from large
5611 ;; opaque integer modes.
5612
5613 ;; D-register list.
5614
5615 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5616 [(match_operand:VDC 0 "register_operand")
5617 (match_operand:VSTRUCT 1 "register_operand")
5618 (match_operand:SI 2 "immediate_operand")]
5619 "TARGET_SIMD"
5620 {
5621 int part = INTVAL (operands[2]);
5622 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5623 int offset = part * 16;
5624
5625 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5626 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5627 DONE;
5628 })
5629
5630 ;; Q-register list.
5631
5632 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5633 [(match_operand:VQ 0 "register_operand")
5634 (match_operand:VSTRUCT 1 "register_operand")
5635 (match_operand:SI 2 "immediate_operand")]
5636 "TARGET_SIMD"
5637 {
5638 int part = INTVAL (operands[2]);
5639 int offset = part * 16;
5640
5641 emit_move_insn (operands[0],
5642 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5643 DONE;
5644 })
5645
5646 ;; Permuted-store expanders for neon intrinsics.
5647
5648 ;; Permute instructions
5649
5650 ;; vec_perm support
5651
5652 (define_expand "vec_perm<mode>"
5653 [(match_operand:VB 0 "register_operand")
5654 (match_operand:VB 1 "register_operand")
5655 (match_operand:VB 2 "register_operand")
5656 (match_operand:VB 3 "register_operand")]
5657 "TARGET_SIMD"
5658 {
5659 aarch64_expand_vec_perm (operands[0], operands[1],
5660 operands[2], operands[3], <nunits>);
5661 DONE;
5662 })
5663
5664 (define_insn "aarch64_tbl1<mode>"
5665 [(set (match_operand:VB 0 "register_operand" "=w")
5666 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5667 (match_operand:VB 2 "register_operand" "w")]
5668 UNSPEC_TBL))]
5669 "TARGET_SIMD"
5670 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5671 [(set_attr "type" "neon_tbl1<q>")]
5672 )
5673
5674 ;; Two source registers.
5675
5676 (define_insn "aarch64_tbl2v16qi"
5677 [(set (match_operand:V16QI 0 "register_operand" "=w")
5678 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5679 (match_operand:V16QI 2 "register_operand" "w")]
5680 UNSPEC_TBL))]
5681 "TARGET_SIMD"
5682 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5683 [(set_attr "type" "neon_tbl2_q")]
5684 )
5685
5686 (define_insn "aarch64_tbl3<mode>"
5687 [(set (match_operand:VB 0 "register_operand" "=w")
5688 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5689 (match_operand:VB 2 "register_operand" "w")]
5690 UNSPEC_TBL))]
5691 "TARGET_SIMD"
5692 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5693 [(set_attr "type" "neon_tbl3")]
5694 )
5695
5696 (define_insn "aarch64_tbx4<mode>"
5697 [(set (match_operand:VB 0 "register_operand" "=w")
5698 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5699 (match_operand:OI 2 "register_operand" "w")
5700 (match_operand:VB 3 "register_operand" "w")]
5701 UNSPEC_TBX))]
5702 "TARGET_SIMD"
5703 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5704 [(set_attr "type" "neon_tbl4")]
5705 )
5706
5707 ;; Three source registers.
5708
5709 (define_insn "aarch64_qtbl3<mode>"
5710 [(set (match_operand:VB 0 "register_operand" "=w")
5711 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5712 (match_operand:VB 2 "register_operand" "w")]
5713 UNSPEC_TBL))]
5714 "TARGET_SIMD"
5715 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5716 [(set_attr "type" "neon_tbl3")]
5717 )
5718
5719 (define_insn "aarch64_qtbx3<mode>"
5720 [(set (match_operand:VB 0 "register_operand" "=w")
5721 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5722 (match_operand:CI 2 "register_operand" "w")
5723 (match_operand:VB 3 "register_operand" "w")]
5724 UNSPEC_TBX))]
5725 "TARGET_SIMD"
5726 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5727 [(set_attr "type" "neon_tbl3")]
5728 )
5729
5730 ;; Four source registers.
5731
5732 (define_insn "aarch64_qtbl4<mode>"
5733 [(set (match_operand:VB 0 "register_operand" "=w")
5734 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5735 (match_operand:VB 2 "register_operand" "w")]
5736 UNSPEC_TBL))]
5737 "TARGET_SIMD"
5738 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5739 [(set_attr "type" "neon_tbl4")]
5740 )
5741
5742 (define_insn "aarch64_qtbx4<mode>"
5743 [(set (match_operand:VB 0 "register_operand" "=w")
5744 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5745 (match_operand:XI 2 "register_operand" "w")
5746 (match_operand:VB 3 "register_operand" "w")]
5747 UNSPEC_TBX))]
5748 "TARGET_SIMD"
5749 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5750 [(set_attr "type" "neon_tbl4")]
5751 )
5752
5753 (define_insn_and_split "aarch64_combinev16qi"
5754 [(set (match_operand:OI 0 "register_operand" "=w")
5755 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5756 (match_operand:V16QI 2 "register_operand" "w")]
5757 UNSPEC_CONCAT))]
5758 "TARGET_SIMD"
5759 "#"
5760 "&& reload_completed"
5761 [(const_int 0)]
5762 {
5763 aarch64_split_combinev16qi (operands);
5764 DONE;
5765 }
5766 [(set_attr "type" "multiple")]
5767 )
5768
5769 ;; This instruction's pattern is generated directly by
5770 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5771 ;; need corresponding changes there.
5772 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5773 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5774 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5775 (match_operand:VALL_F16 2 "register_operand" "w")]
5776 PERMUTE))]
5777 "TARGET_SIMD"
5778 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5779 [(set_attr "type" "neon_permute<q>")]
5780 )
5781
5782 ;; This instruction's pattern is generated directly by
5783 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5784 ;; need corresponding changes there. Note that the immediate (third)
5785 ;; operand is a lane index not a byte index.
5786 (define_insn "aarch64_ext<mode>"
5787 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5788 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5789 (match_operand:VALL_F16 2 "register_operand" "w")
5790 (match_operand:SI 3 "immediate_operand" "i")]
5791 UNSPEC_EXT))]
5792 "TARGET_SIMD"
5793 {
5794 operands[3] = GEN_INT (INTVAL (operands[3])
5795 * GET_MODE_UNIT_SIZE (<MODE>mode));
5796 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5797 }
5798 [(set_attr "type" "neon_ext<q>")]
5799 )
5800
5801 ;; This instruction's pattern is generated directly by
5802 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5803 ;; need corresponding changes there.
5804 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5805 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5806 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5807 REVERSE))]
5808 "TARGET_SIMD"
5809 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5810 [(set_attr "type" "neon_rev<q>")]
5811 )
5812
5813 (define_insn "aarch64_st2<mode>_dreg"
5814 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5815 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5816 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5817 UNSPEC_ST2))]
5818 "TARGET_SIMD"
5819 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5820 [(set_attr "type" "neon_store2_2reg")]
5821 )
5822
5823 (define_insn "aarch64_st2<mode>_dreg"
5824 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5825 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5826 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5827 UNSPEC_ST2))]
5828 "TARGET_SIMD"
5829 "st1\\t{%S1.1d - %T1.1d}, %0"
5830 [(set_attr "type" "neon_store1_2reg")]
5831 )
5832
5833 (define_insn "aarch64_st3<mode>_dreg"
5834 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5835 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5836 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5837 UNSPEC_ST3))]
5838 "TARGET_SIMD"
5839 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5840 [(set_attr "type" "neon_store3_3reg")]
5841 )
5842
5843 (define_insn "aarch64_st3<mode>_dreg"
5844 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5845 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5846 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5847 UNSPEC_ST3))]
5848 "TARGET_SIMD"
5849 "st1\\t{%S1.1d - %U1.1d}, %0"
5850 [(set_attr "type" "neon_store1_3reg")]
5851 )
5852
5853 (define_insn "aarch64_st4<mode>_dreg"
5854 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5855 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5856 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5857 UNSPEC_ST4))]
5858 "TARGET_SIMD"
5859 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5860 [(set_attr "type" "neon_store4_4reg")]
5861 )
5862
5863 (define_insn "aarch64_st4<mode>_dreg"
5864 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5865 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5866 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5867 UNSPEC_ST4))]
5868 "TARGET_SIMD"
5869 "st1\\t{%S1.1d - %V1.1d}, %0"
5870 [(set_attr "type" "neon_store1_4reg")]
5871 )
5872
5873 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5874 [(match_operand:DI 0 "register_operand")
5875 (match_operand:VSTRUCT 1 "register_operand")
5876 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5877 "TARGET_SIMD"
5878 {
5879 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5880 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5881
5882 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5883 DONE;
5884 })
5885
5886 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5887 [(match_operand:DI 0 "register_operand")
5888 (match_operand:VSTRUCT 1 "register_operand")
5889 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5890 "TARGET_SIMD"
5891 {
5892 machine_mode mode = <VSTRUCT:MODE>mode;
5893 rtx mem = gen_rtx_MEM (mode, operands[0]);
5894
5895 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5896 DONE;
5897 })
5898
5899 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5900 [(match_operand:DI 0 "register_operand")
5901 (match_operand:VSTRUCT 1 "register_operand")
5902 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5903 (match_operand:SI 2 "immediate_operand")]
5904 "TARGET_SIMD"
5905 {
5906 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5907 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5908 * <VSTRUCT:nregs>);
5909
5910 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5911 mem, operands[1], operands[2]));
5912 DONE;
5913 })
5914
5915 (define_expand "aarch64_st1<VALL_F16:mode>"
5916 [(match_operand:DI 0 "register_operand")
5917 (match_operand:VALL_F16 1 "register_operand")]
5918 "TARGET_SIMD"
5919 {
5920 machine_mode mode = <VALL_F16:MODE>mode;
5921 rtx mem = gen_rtx_MEM (mode, operands[0]);
5922
5923 if (BYTES_BIG_ENDIAN)
5924 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5925 else
5926 emit_move_insn (mem, operands[1]);
5927 DONE;
5928 })
5929
5930 ;; Expander for builtins to insert vector registers into large
5931 ;; opaque integer modes.
5932
5933 ;; Q-register list. We don't need a D-reg inserter as we zero
5934 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5935
5936 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5937 [(match_operand:VSTRUCT 0 "register_operand")
5938 (match_operand:VSTRUCT 1 "register_operand")
5939 (match_operand:VQ 2 "register_operand")
5940 (match_operand:SI 3 "immediate_operand")]
5941 "TARGET_SIMD"
5942 {
5943 int part = INTVAL (operands[3]);
5944 int offset = part * 16;
5945
5946 emit_move_insn (operands[0], operands[1]);
5947 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5948 operands[2]);
5949 DONE;
5950 })
5951
5952 ;; Standard pattern name vec_init<mode><Vel>.
5953
5954 (define_expand "vec_init<mode><Vel>"
5955 [(match_operand:VALL_F16 0 "register_operand")
5956 (match_operand 1 "" "")]
5957 "TARGET_SIMD"
5958 {
5959 aarch64_expand_vector_init (operands[0], operands[1]);
5960 DONE;
5961 })
5962
5963 (define_expand "vec_init<mode><Vhalf>"
5964 [(match_operand:VQ_NO2E 0 "register_operand")
5965 (match_operand 1 "" "")]
5966 "TARGET_SIMD"
5967 {
5968 aarch64_expand_vector_init (operands[0], operands[1]);
5969 DONE;
5970 })
5971
5972 (define_insn "*aarch64_simd_ld1r<mode>"
5973 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5974 (vec_duplicate:VALL_F16
5975 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5976 "TARGET_SIMD"
5977 "ld1r\\t{%0.<Vtype>}, %1"
5978 [(set_attr "type" "neon_load1_all_lanes")]
5979 )
5980
5981 (define_insn "aarch64_simd_ld1<mode>_x2"
5982 [(set (match_operand:OI 0 "register_operand" "=w")
5983 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5984 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5985 UNSPEC_LD1))]
5986 "TARGET_SIMD"
5987 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5988 [(set_attr "type" "neon_load1_2reg<q>")]
5989 )
5990
5991 (define_insn "aarch64_simd_ld1<mode>_x2"
5992 [(set (match_operand:OI 0 "register_operand" "=w")
5993 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5994 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5995 UNSPEC_LD1))]
5996 "TARGET_SIMD"
5997 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5998 [(set_attr "type" "neon_load1_2reg<q>")]
5999 )
6000
6001
6002 (define_insn "@aarch64_frecpe<mode>"
6003 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6004 (unspec:VHSDF_HSDF
6005 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6006 UNSPEC_FRECPE))]
6007 "TARGET_SIMD"
6008 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6009 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6010 )
6011
6012 (define_insn "aarch64_frecpx<mode>"
6013 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6014 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6015 UNSPEC_FRECPX))]
6016 "TARGET_SIMD"
6017 "frecpx\t%<s>0, %<s>1"
6018 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6019 )
6020
6021 (define_insn "@aarch64_frecps<mode>"
6022 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6023 (unspec:VHSDF_HSDF
6024 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6025 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6026 UNSPEC_FRECPS))]
6027 "TARGET_SIMD"
6028 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6029 [(set_attr "type" "neon_fp_recps_<stype><q>")]
6030 )
6031
6032 (define_insn "aarch64_urecpe<mode>"
6033 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6034 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6035 UNSPEC_URECPE))]
6036 "TARGET_SIMD"
6037 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6038 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6039
6040 ;; Standard pattern name vec_extract<mode><Vel>.
6041
6042 (define_expand "vec_extract<mode><Vel>"
6043 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6044 (match_operand:VALL_F16 1 "register_operand")
6045 (match_operand:SI 2 "immediate_operand")]
6046 "TARGET_SIMD"
6047 {
6048 emit_insn
6049 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6050 DONE;
6051 })
6052
6053 ;; aes
6054
6055 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6056 [(set (match_operand:V16QI 0 "register_operand" "=w")
6057 (unspec:V16QI
6058 [(xor:V16QI
6059 (match_operand:V16QI 1 "register_operand" "%0")
6060 (match_operand:V16QI 2 "register_operand" "w"))]
6061 CRYPTO_AES))]
6062 "TARGET_SIMD && TARGET_AES"
6063 "aes<aes_op>\\t%0.16b, %2.16b"
6064 [(set_attr "type" "crypto_aese")]
6065 )
6066
6067 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6068 [(set (match_operand:V16QI 0 "register_operand" "=w")
6069 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6070 CRYPTO_AESMC))]
6071 "TARGET_SIMD && TARGET_AES"
6072 "aes<aesmc_op>\\t%0.16b, %1.16b"
6073 [(set_attr "type" "crypto_aesmc")]
6074 )
6075
6076 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6077 ;; and enforce the register dependency without scheduling or register
6078 ;; allocation messing up the order or introducing moves inbetween.
6079 ;; Mash the two together during combine.
6080
6081 (define_insn "*aarch64_crypto_aese_fused"
6082 [(set (match_operand:V16QI 0 "register_operand" "=w")
6083 (unspec:V16QI
6084 [(unspec:V16QI
6085 [(xor:V16QI
6086 (match_operand:V16QI 1 "register_operand" "%0")
6087 (match_operand:V16QI 2 "register_operand" "w"))]
6088 UNSPEC_AESE)]
6089 UNSPEC_AESMC))]
6090 "TARGET_SIMD && TARGET_AES
6091 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6092 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6093 [(set_attr "type" "crypto_aese")
6094 (set_attr "length" "8")]
6095 )
6096
6097 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6098 ;; and enforce the register dependency without scheduling or register
6099 ;; allocation messing up the order or introducing moves inbetween.
6100 ;; Mash the two together during combine.
6101
6102 (define_insn "*aarch64_crypto_aesd_fused"
6103 [(set (match_operand:V16QI 0 "register_operand" "=w")
6104 (unspec:V16QI
6105 [(unspec:V16QI
6106 [(xor:V16QI
6107 (match_operand:V16QI 1 "register_operand" "%0")
6108 (match_operand:V16QI 2 "register_operand" "w"))]
6109 UNSPEC_AESD)]
6110 UNSPEC_AESIMC))]
6111 "TARGET_SIMD && TARGET_AES
6112 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6113 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6114 [(set_attr "type" "crypto_aese")
6115 (set_attr "length" "8")]
6116 )
6117
6118 ;; sha1
6119
6120 (define_insn "aarch64_crypto_sha1hsi"
6121 [(set (match_operand:SI 0 "register_operand" "=w")
6122 (unspec:SI [(match_operand:SI 1
6123 "register_operand" "w")]
6124 UNSPEC_SHA1H))]
6125 "TARGET_SIMD && TARGET_SHA2"
6126 "sha1h\\t%s0, %s1"
6127 [(set_attr "type" "crypto_sha1_fast")]
6128 )
6129
6130 (define_insn "aarch64_crypto_sha1hv4si"
6131 [(set (match_operand:SI 0 "register_operand" "=w")
6132 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6133 (parallel [(const_int 0)]))]
6134 UNSPEC_SHA1H))]
6135 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6136 "sha1h\\t%s0, %s1"
6137 [(set_attr "type" "crypto_sha1_fast")]
6138 )
6139
6140 (define_insn "aarch64_be_crypto_sha1hv4si"
6141 [(set (match_operand:SI 0 "register_operand" "=w")
6142 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6143 (parallel [(const_int 3)]))]
6144 UNSPEC_SHA1H))]
6145 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6146 "sha1h\\t%s0, %s1"
6147 [(set_attr "type" "crypto_sha1_fast")]
6148 )
6149
6150 (define_insn "aarch64_crypto_sha1su1v4si"
6151 [(set (match_operand:V4SI 0 "register_operand" "=w")
6152 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6153 (match_operand:V4SI 2 "register_operand" "w")]
6154 UNSPEC_SHA1SU1))]
6155 "TARGET_SIMD && TARGET_SHA2"
6156 "sha1su1\\t%0.4s, %2.4s"
6157 [(set_attr "type" "crypto_sha1_fast")]
6158 )
6159
6160 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6161 [(set (match_operand:V4SI 0 "register_operand" "=w")
6162 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6163 (match_operand:SI 2 "register_operand" "w")
6164 (match_operand:V4SI 3 "register_operand" "w")]
6165 CRYPTO_SHA1))]
6166 "TARGET_SIMD && TARGET_SHA2"
6167 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6168 [(set_attr "type" "crypto_sha1_slow")]
6169 )
6170
6171 (define_insn "aarch64_crypto_sha1su0v4si"
6172 [(set (match_operand:V4SI 0 "register_operand" "=w")
6173 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6174 (match_operand:V4SI 2 "register_operand" "w")
6175 (match_operand:V4SI 3 "register_operand" "w")]
6176 UNSPEC_SHA1SU0))]
6177 "TARGET_SIMD && TARGET_SHA2"
6178 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6179 [(set_attr "type" "crypto_sha1_xor")]
6180 )
6181
6182 ;; sha256
6183
6184 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6185 [(set (match_operand:V4SI 0 "register_operand" "=w")
6186 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6187 (match_operand:V4SI 2 "register_operand" "w")
6188 (match_operand:V4SI 3 "register_operand" "w")]
6189 CRYPTO_SHA256))]
6190 "TARGET_SIMD && TARGET_SHA2"
6191 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6192 [(set_attr "type" "crypto_sha256_slow")]
6193 )
6194
6195 (define_insn "aarch64_crypto_sha256su0v4si"
6196 [(set (match_operand:V4SI 0 "register_operand" "=w")
6197 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6198 (match_operand:V4SI 2 "register_operand" "w")]
6199 UNSPEC_SHA256SU0))]
6200 "TARGET_SIMD && TARGET_SHA2"
6201 "sha256su0\\t%0.4s, %2.4s"
6202 [(set_attr "type" "crypto_sha256_fast")]
6203 )
6204
6205 (define_insn "aarch64_crypto_sha256su1v4si"
6206 [(set (match_operand:V4SI 0 "register_operand" "=w")
6207 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6208 (match_operand:V4SI 2 "register_operand" "w")
6209 (match_operand:V4SI 3 "register_operand" "w")]
6210 UNSPEC_SHA256SU1))]
6211 "TARGET_SIMD && TARGET_SHA2"
6212 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6213 [(set_attr "type" "crypto_sha256_slow")]
6214 )
6215
6216 ;; sha512
6217
6218 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6219 [(set (match_operand:V2DI 0 "register_operand" "=w")
6220 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6221 (match_operand:V2DI 2 "register_operand" "w")
6222 (match_operand:V2DI 3 "register_operand" "w")]
6223 CRYPTO_SHA512))]
6224 "TARGET_SIMD && TARGET_SHA3"
6225 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6226 [(set_attr "type" "crypto_sha512")]
6227 )
6228
6229 (define_insn "aarch64_crypto_sha512su0qv2di"
6230 [(set (match_operand:V2DI 0 "register_operand" "=w")
6231 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6232 (match_operand:V2DI 2 "register_operand" "w")]
6233 UNSPEC_SHA512SU0))]
6234 "TARGET_SIMD && TARGET_SHA3"
6235 "sha512su0\\t%0.2d, %2.2d"
6236 [(set_attr "type" "crypto_sha512")]
6237 )
6238
6239 (define_insn "aarch64_crypto_sha512su1qv2di"
6240 [(set (match_operand:V2DI 0 "register_operand" "=w")
6241 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6242 (match_operand:V2DI 2 "register_operand" "w")
6243 (match_operand:V2DI 3 "register_operand" "w")]
6244 UNSPEC_SHA512SU1))]
6245 "TARGET_SIMD && TARGET_SHA3"
6246 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6247 [(set_attr "type" "crypto_sha512")]
6248 )
6249
6250 ;; sha3
6251
6252 (define_insn "eor3q<mode>4"
6253 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6254 (xor:VQ_I
6255 (xor:VQ_I
6256 (match_operand:VQ_I 2 "register_operand" "w")
6257 (match_operand:VQ_I 3 "register_operand" "w"))
6258 (match_operand:VQ_I 1 "register_operand" "w")))]
6259 "TARGET_SIMD && TARGET_SHA3"
6260 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6261 [(set_attr "type" "crypto_sha3")]
6262 )
6263
6264 (define_insn "aarch64_rax1qv2di"
6265 [(set (match_operand:V2DI 0 "register_operand" "=w")
6266 (xor:V2DI
6267 (rotate:V2DI
6268 (match_operand:V2DI 2 "register_operand" "w")
6269 (const_int 1))
6270 (match_operand:V2DI 1 "register_operand" "w")))]
6271 "TARGET_SIMD && TARGET_SHA3"
6272 "rax1\\t%0.2d, %1.2d, %2.2d"
6273 [(set_attr "type" "crypto_sha3")]
6274 )
6275
6276 (define_insn "aarch64_xarqv2di"
6277 [(set (match_operand:V2DI 0 "register_operand" "=w")
6278 (rotatert:V2DI
6279 (xor:V2DI
6280 (match_operand:V2DI 1 "register_operand" "%w")
6281 (match_operand:V2DI 2 "register_operand" "w"))
6282 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6283 "TARGET_SIMD && TARGET_SHA3"
6284 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6285 [(set_attr "type" "crypto_sha3")]
6286 )
6287
6288 (define_insn "bcaxq<mode>4"
6289 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6290 (xor:VQ_I
6291 (and:VQ_I
6292 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6293 (match_operand:VQ_I 2 "register_operand" "w"))
6294 (match_operand:VQ_I 1 "register_operand" "w")))]
6295 "TARGET_SIMD && TARGET_SHA3"
6296 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6297 [(set_attr "type" "crypto_sha3")]
6298 )
6299
6300 ;; SM3
6301
6302 (define_insn "aarch64_sm3ss1qv4si"
6303 [(set (match_operand:V4SI 0 "register_operand" "=w")
6304 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6305 (match_operand:V4SI 2 "register_operand" "w")
6306 (match_operand:V4SI 3 "register_operand" "w")]
6307 UNSPEC_SM3SS1))]
6308 "TARGET_SIMD && TARGET_SM4"
6309 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6310 [(set_attr "type" "crypto_sm3")]
6311 )
6312
6313
6314 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6315 [(set (match_operand:V4SI 0 "register_operand" "=w")
6316 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6317 (match_operand:V4SI 2 "register_operand" "w")
6318 (match_operand:V4SI 3 "register_operand" "w")
6319 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6320 CRYPTO_SM3TT))]
6321 "TARGET_SIMD && TARGET_SM4"
6322 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6323 [(set_attr "type" "crypto_sm3")]
6324 )
6325
6326 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6327 [(set (match_operand:V4SI 0 "register_operand" "=w")
6328 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6329 (match_operand:V4SI 2 "register_operand" "w")
6330 (match_operand:V4SI 3 "register_operand" "w")]
6331 CRYPTO_SM3PART))]
6332 "TARGET_SIMD && TARGET_SM4"
6333 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6334 [(set_attr "type" "crypto_sm3")]
6335 )
6336
6337 ;; SM4
6338
6339 (define_insn "aarch64_sm4eqv4si"
6340 [(set (match_operand:V4SI 0 "register_operand" "=w")
6341 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6342 (match_operand:V4SI 2 "register_operand" "w")]
6343 UNSPEC_SM4E))]
6344 "TARGET_SIMD && TARGET_SM4"
6345 "sm4e\\t%0.4s, %2.4s"
6346 [(set_attr "type" "crypto_sm4")]
6347 )
6348
6349 (define_insn "aarch64_sm4ekeyqv4si"
6350 [(set (match_operand:V4SI 0 "register_operand" "=w")
6351 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6352 (match_operand:V4SI 2 "register_operand" "w")]
6353 UNSPEC_SM4EKEY))]
6354 "TARGET_SIMD && TARGET_SM4"
6355 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6356 [(set_attr "type" "crypto_sm4")]
6357 )
6358
6359 ;; fp16fml
6360
6361 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6362 [(set (match_operand:VDQSF 0 "register_operand")
6363 (unspec:VDQSF
6364 [(match_operand:VDQSF 1 "register_operand")
6365 (match_operand:<VFMLA_W> 2 "register_operand")
6366 (match_operand:<VFMLA_W> 3 "register_operand")]
6367 VFMLA16_LOW))]
6368 "TARGET_F16FML"
6369 {
6370 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6371 <nunits> * 2, false);
6372 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6373 <nunits> * 2, false);
6374
6375 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6376 operands[1],
6377 operands[2],
6378 operands[3],
6379 p1, p2));
6380 DONE;
6381
6382 })
6383
6384 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6385 [(set (match_operand:VDQSF 0 "register_operand")
6386 (unspec:VDQSF
6387 [(match_operand:VDQSF 1 "register_operand")
6388 (match_operand:<VFMLA_W> 2 "register_operand")
6389 (match_operand:<VFMLA_W> 3 "register_operand")]
6390 VFMLA16_HIGH))]
6391 "TARGET_F16FML"
6392 {
6393 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6394 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6395
6396 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6397 operands[1],
6398 operands[2],
6399 operands[3],
6400 p1, p2));
6401 DONE;
6402 })
6403
6404 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6405 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6406 (fma:VDQSF
6407 (float_extend:VDQSF
6408 (vec_select:<VFMLA_SEL_W>
6409 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6410 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6411 (float_extend:VDQSF
6412 (vec_select:<VFMLA_SEL_W>
6413 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6414 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6415 (match_operand:VDQSF 1 "register_operand" "0")))]
6416 "TARGET_F16FML"
6417 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6418 [(set_attr "type" "neon_fp_mul_s")]
6419 )
6420
6421 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6422 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6423 (fma:VDQSF
6424 (float_extend:VDQSF
6425 (neg:<VFMLA_SEL_W>
6426 (vec_select:<VFMLA_SEL_W>
6427 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6428 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6429 (float_extend:VDQSF
6430 (vec_select:<VFMLA_SEL_W>
6431 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6432 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6433 (match_operand:VDQSF 1 "register_operand" "0")))]
6434 "TARGET_F16FML"
6435 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6436 [(set_attr "type" "neon_fp_mul_s")]
6437 )
6438
6439 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6440 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6441 (fma:VDQSF
6442 (float_extend:VDQSF
6443 (vec_select:<VFMLA_SEL_W>
6444 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6445 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6446 (float_extend:VDQSF
6447 (vec_select:<VFMLA_SEL_W>
6448 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6449 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6450 (match_operand:VDQSF 1 "register_operand" "0")))]
6451 "TARGET_F16FML"
6452 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6453 [(set_attr "type" "neon_fp_mul_s")]
6454 )
6455
6456 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6457 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6458 (fma:VDQSF
6459 (float_extend:VDQSF
6460 (neg:<VFMLA_SEL_W>
6461 (vec_select:<VFMLA_SEL_W>
6462 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6463 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6464 (float_extend:VDQSF
6465 (vec_select:<VFMLA_SEL_W>
6466 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6467 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6468 (match_operand:VDQSF 1 "register_operand" "0")))]
6469 "TARGET_F16FML"
6470 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6471 [(set_attr "type" "neon_fp_mul_s")]
6472 )
6473
6474 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6475 [(set (match_operand:V2SF 0 "register_operand")
6476 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6477 (match_operand:V4HF 2 "register_operand")
6478 (match_operand:V4HF 3 "register_operand")
6479 (match_operand:SI 4 "aarch64_imm2")]
6480 VFMLA16_LOW))]
6481 "TARGET_F16FML"
6482 {
6483 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6484 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6485
6486 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6487 operands[1],
6488 operands[2],
6489 operands[3],
6490 p1, lane));
6491 DONE;
6492 }
6493 )
6494
6495 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6496 [(set (match_operand:V2SF 0 "register_operand")
6497 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6498 (match_operand:V4HF 2 "register_operand")
6499 (match_operand:V4HF 3 "register_operand")
6500 (match_operand:SI 4 "aarch64_imm2")]
6501 VFMLA16_HIGH))]
6502 "TARGET_F16FML"
6503 {
6504 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6505 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6506
6507 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6508 operands[1],
6509 operands[2],
6510 operands[3],
6511 p1, lane));
6512 DONE;
6513 })
6514
6515 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6516 [(set (match_operand:V2SF 0 "register_operand" "=w")
6517 (fma:V2SF
6518 (float_extend:V2SF
6519 (vec_select:V2HF
6520 (match_operand:V4HF 2 "register_operand" "w")
6521 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6522 (float_extend:V2SF
6523 (vec_duplicate:V2HF
6524 (vec_select:HF
6525 (match_operand:V4HF 3 "register_operand" "x")
6526 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6527 (match_operand:V2SF 1 "register_operand" "0")))]
6528 "TARGET_F16FML"
6529 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6530 [(set_attr "type" "neon_fp_mul_s")]
6531 )
6532
6533 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6534 [(set (match_operand:V2SF 0 "register_operand" "=w")
6535 (fma:V2SF
6536 (float_extend:V2SF
6537 (neg:V2HF
6538 (vec_select:V2HF
6539 (match_operand:V4HF 2 "register_operand" "w")
6540 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6541 (float_extend:V2SF
6542 (vec_duplicate:V2HF
6543 (vec_select:HF
6544 (match_operand:V4HF 3 "register_operand" "x")
6545 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6546 (match_operand:V2SF 1 "register_operand" "0")))]
6547 "TARGET_F16FML"
6548 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6549 [(set_attr "type" "neon_fp_mul_s")]
6550 )
6551
6552 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6553 [(set (match_operand:V2SF 0 "register_operand" "=w")
6554 (fma:V2SF
6555 (float_extend:V2SF
6556 (vec_select:V2HF
6557 (match_operand:V4HF 2 "register_operand" "w")
6558 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6559 (float_extend:V2SF
6560 (vec_duplicate:V2HF
6561 (vec_select:HF
6562 (match_operand:V4HF 3 "register_operand" "x")
6563 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6564 (match_operand:V2SF 1 "register_operand" "0")))]
6565 "TARGET_F16FML"
6566 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6567 [(set_attr "type" "neon_fp_mul_s")]
6568 )
6569
6570 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6571 [(set (match_operand:V2SF 0 "register_operand" "=w")
6572 (fma:V2SF
6573 (float_extend:V2SF
6574 (neg:V2HF
6575 (vec_select:V2HF
6576 (match_operand:V4HF 2 "register_operand" "w")
6577 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6578 (float_extend:V2SF
6579 (vec_duplicate:V2HF
6580 (vec_select:HF
6581 (match_operand:V4HF 3 "register_operand" "x")
6582 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6583 (match_operand:V2SF 1 "register_operand" "0")))]
6584 "TARGET_F16FML"
6585 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6586 [(set_attr "type" "neon_fp_mul_s")]
6587 )
6588
6589 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6590 [(set (match_operand:V4SF 0 "register_operand")
6591 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6592 (match_operand:V8HF 2 "register_operand")
6593 (match_operand:V8HF 3 "register_operand")
6594 (match_operand:SI 4 "aarch64_lane_imm3")]
6595 VFMLA16_LOW))]
6596 "TARGET_F16FML"
6597 {
6598 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6599 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6600
6601 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6602 operands[1],
6603 operands[2],
6604 operands[3],
6605 p1, lane));
6606 DONE;
6607 })
6608
6609 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6610 [(set (match_operand:V4SF 0 "register_operand")
6611 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6612 (match_operand:V8HF 2 "register_operand")
6613 (match_operand:V8HF 3 "register_operand")
6614 (match_operand:SI 4 "aarch64_lane_imm3")]
6615 VFMLA16_HIGH))]
6616 "TARGET_F16FML"
6617 {
6618 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6619 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6620
6621 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6622 operands[1],
6623 operands[2],
6624 operands[3],
6625 p1, lane));
6626 DONE;
6627 })
6628
6629 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6630 [(set (match_operand:V4SF 0 "register_operand" "=w")
6631 (fma:V4SF
6632 (float_extend:V4SF
6633 (vec_select:V4HF
6634 (match_operand:V8HF 2 "register_operand" "w")
6635 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6636 (float_extend:V4SF
6637 (vec_duplicate:V4HF
6638 (vec_select:HF
6639 (match_operand:V8HF 3 "register_operand" "x")
6640 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6641 (match_operand:V4SF 1 "register_operand" "0")))]
6642 "TARGET_F16FML"
6643 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6644 [(set_attr "type" "neon_fp_mul_s")]
6645 )
6646
6647 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6648 [(set (match_operand:V4SF 0 "register_operand" "=w")
6649 (fma:V4SF
6650 (float_extend:V4SF
6651 (neg:V4HF
6652 (vec_select:V4HF
6653 (match_operand:V8HF 2 "register_operand" "w")
6654 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6655 (float_extend:V4SF
6656 (vec_duplicate:V4HF
6657 (vec_select:HF
6658 (match_operand:V8HF 3 "register_operand" "x")
6659 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6660 (match_operand:V4SF 1 "register_operand" "0")))]
6661 "TARGET_F16FML"
6662 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6663 [(set_attr "type" "neon_fp_mul_s")]
6664 )
6665
6666 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6667 [(set (match_operand:V4SF 0 "register_operand" "=w")
6668 (fma:V4SF
6669 (float_extend:V4SF
6670 (vec_select:V4HF
6671 (match_operand:V8HF 2 "register_operand" "w")
6672 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6673 (float_extend:V4SF
6674 (vec_duplicate:V4HF
6675 (vec_select:HF
6676 (match_operand:V8HF 3 "register_operand" "x")
6677 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6678 (match_operand:V4SF 1 "register_operand" "0")))]
6679 "TARGET_F16FML"
6680 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6681 [(set_attr "type" "neon_fp_mul_s")]
6682 )
6683
6684 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6685 [(set (match_operand:V4SF 0 "register_operand" "=w")
6686 (fma:V4SF
6687 (float_extend:V4SF
6688 (neg:V4HF
6689 (vec_select:V4HF
6690 (match_operand:V8HF 2 "register_operand" "w")
6691 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6692 (float_extend:V4SF
6693 (vec_duplicate:V4HF
6694 (vec_select:HF
6695 (match_operand:V8HF 3 "register_operand" "x")
6696 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6697 (match_operand:V4SF 1 "register_operand" "0")))]
6698 "TARGET_F16FML"
6699 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6700 [(set_attr "type" "neon_fp_mul_s")]
6701 )
6702
6703 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6704 [(set (match_operand:V2SF 0 "register_operand")
6705 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6706 (match_operand:V4HF 2 "register_operand")
6707 (match_operand:V8HF 3 "register_operand")
6708 (match_operand:SI 4 "aarch64_lane_imm3")]
6709 VFMLA16_LOW))]
6710 "TARGET_F16FML"
6711 {
6712 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6713 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6714
6715 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6716 operands[1],
6717 operands[2],
6718 operands[3],
6719 p1, lane));
6720 DONE;
6721
6722 })
6723
6724 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6725 [(set (match_operand:V2SF 0 "register_operand")
6726 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6727 (match_operand:V4HF 2 "register_operand")
6728 (match_operand:V8HF 3 "register_operand")
6729 (match_operand:SI 4 "aarch64_lane_imm3")]
6730 VFMLA16_HIGH))]
6731 "TARGET_F16FML"
6732 {
6733 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6734 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6735
6736 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6737 operands[1],
6738 operands[2],
6739 operands[3],
6740 p1, lane));
6741 DONE;
6742
6743 })
6744
6745 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6746 [(set (match_operand:V2SF 0 "register_operand" "=w")
6747 (fma:V2SF
6748 (float_extend:V2SF
6749 (vec_select:V2HF
6750 (match_operand:V4HF 2 "register_operand" "w")
6751 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6752 (float_extend:V2SF
6753 (vec_duplicate:V2HF
6754 (vec_select:HF
6755 (match_operand:V8HF 3 "register_operand" "x")
6756 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6757 (match_operand:V2SF 1 "register_operand" "0")))]
6758 "TARGET_F16FML"
6759 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6760 [(set_attr "type" "neon_fp_mul_s")]
6761 )
6762
6763 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6764 [(set (match_operand:V2SF 0 "register_operand" "=w")
6765 (fma:V2SF
6766 (float_extend:V2SF
6767 (neg:V2HF
6768 (vec_select:V2HF
6769 (match_operand:V4HF 2 "register_operand" "w")
6770 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6771 (float_extend:V2SF
6772 (vec_duplicate:V2HF
6773 (vec_select:HF
6774 (match_operand:V8HF 3 "register_operand" "x")
6775 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6776 (match_operand:V2SF 1 "register_operand" "0")))]
6777 "TARGET_F16FML"
6778 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6779 [(set_attr "type" "neon_fp_mul_s")]
6780 )
6781
6782 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6783 [(set (match_operand:V2SF 0 "register_operand" "=w")
6784 (fma:V2SF
6785 (float_extend:V2SF
6786 (vec_select:V2HF
6787 (match_operand:V4HF 2 "register_operand" "w")
6788 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6789 (float_extend:V2SF
6790 (vec_duplicate:V2HF
6791 (vec_select:HF
6792 (match_operand:V8HF 3 "register_operand" "x")
6793 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6794 (match_operand:V2SF 1 "register_operand" "0")))]
6795 "TARGET_F16FML"
6796 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6797 [(set_attr "type" "neon_fp_mul_s")]
6798 )
6799
6800 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6801 [(set (match_operand:V2SF 0 "register_operand" "=w")
6802 (fma:V2SF
6803 (float_extend:V2SF
6804 (neg:V2HF
6805 (vec_select:V2HF
6806 (match_operand:V4HF 2 "register_operand" "w")
6807 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6808 (float_extend:V2SF
6809 (vec_duplicate:V2HF
6810 (vec_select:HF
6811 (match_operand:V8HF 3 "register_operand" "x")
6812 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6813 (match_operand:V2SF 1 "register_operand" "0")))]
6814 "TARGET_F16FML"
6815 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6816 [(set_attr "type" "neon_fp_mul_s")]
6817 )
6818
6819 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6820 [(set (match_operand:V4SF 0 "register_operand")
6821 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6822 (match_operand:V8HF 2 "register_operand")
6823 (match_operand:V4HF 3 "register_operand")
6824 (match_operand:SI 4 "aarch64_imm2")]
6825 VFMLA16_LOW))]
6826 "TARGET_F16FML"
6827 {
6828 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6829 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6830
6831 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6832 operands[1],
6833 operands[2],
6834 operands[3],
6835 p1, lane));
6836 DONE;
6837 })
6838
6839 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6840 [(set (match_operand:V4SF 0 "register_operand")
6841 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6842 (match_operand:V8HF 2 "register_operand")
6843 (match_operand:V4HF 3 "register_operand")
6844 (match_operand:SI 4 "aarch64_imm2")]
6845 VFMLA16_HIGH))]
6846 "TARGET_F16FML"
6847 {
6848 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6849 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6850
6851 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6852 operands[1],
6853 operands[2],
6854 operands[3],
6855 p1, lane));
6856 DONE;
6857 })
6858
6859 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6860 [(set (match_operand:V4SF 0 "register_operand" "=w")
6861 (fma:V4SF
6862 (float_extend:V4SF
6863 (vec_select:V4HF
6864 (match_operand:V8HF 2 "register_operand" "w")
6865 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6866 (float_extend:V4SF
6867 (vec_duplicate:V4HF
6868 (vec_select:HF
6869 (match_operand:V4HF 3 "register_operand" "x")
6870 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6871 (match_operand:V4SF 1 "register_operand" "0")))]
6872 "TARGET_F16FML"
6873 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6874 [(set_attr "type" "neon_fp_mul_s")]
6875 )
6876
6877 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6878 [(set (match_operand:V4SF 0 "register_operand" "=w")
6879 (fma:V4SF
6880 (float_extend:V4SF
6881 (neg:V4HF
6882 (vec_select:V4HF
6883 (match_operand:V8HF 2 "register_operand" "w")
6884 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6885 (float_extend:V4SF
6886 (vec_duplicate:V4HF
6887 (vec_select:HF
6888 (match_operand:V4HF 3 "register_operand" "x")
6889 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6890 (match_operand:V4SF 1 "register_operand" "0")))]
6891 "TARGET_F16FML"
6892 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6893 [(set_attr "type" "neon_fp_mul_s")]
6894 )
6895
6896 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6897 [(set (match_operand:V4SF 0 "register_operand" "=w")
6898 (fma:V4SF
6899 (float_extend:V4SF
6900 (vec_select:V4HF
6901 (match_operand:V8HF 2 "register_operand" "w")
6902 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6903 (float_extend:V4SF
6904 (vec_duplicate:V4HF
6905 (vec_select:HF
6906 (match_operand:V4HF 3 "register_operand" "x")
6907 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6908 (match_operand:V4SF 1 "register_operand" "0")))]
6909 "TARGET_F16FML"
6910 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6911 [(set_attr "type" "neon_fp_mul_s")]
6912 )
6913
6914 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6915 [(set (match_operand:V4SF 0 "register_operand" "=w")
6916 (fma:V4SF
6917 (float_extend:V4SF
6918 (neg:V4HF
6919 (vec_select:V4HF
6920 (match_operand:V8HF 2 "register_operand" "w")
6921 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6922 (float_extend:V4SF
6923 (vec_duplicate:V4HF
6924 (vec_select:HF
6925 (match_operand:V4HF 3 "register_operand" "x")
6926 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6927 (match_operand:V4SF 1 "register_operand" "0")))]
6928 "TARGET_F16FML"
6929 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6930 [(set_attr "type" "neon_fp_mul_s")]
6931 )
6932
6933 ;; pmull
6934
6935 (define_insn "aarch64_crypto_pmulldi"
6936 [(set (match_operand:TI 0 "register_operand" "=w")
6937 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6938 (match_operand:DI 2 "register_operand" "w")]
6939 UNSPEC_PMULL))]
6940 "TARGET_SIMD && TARGET_AES"
6941 "pmull\\t%0.1q, %1.1d, %2.1d"
6942 [(set_attr "type" "crypto_pmull")]
6943 )
6944
6945 (define_insn "aarch64_crypto_pmullv2di"
6946 [(set (match_operand:TI 0 "register_operand" "=w")
6947 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6948 (match_operand:V2DI 2 "register_operand" "w")]
6949 UNSPEC_PMULL2))]
6950 "TARGET_SIMD && TARGET_AES"
6951 "pmull2\\t%0.1q, %1.2d, %2.2d"
6952 [(set_attr "type" "crypto_pmull")]
6953 )