]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
[AArch64] Make <perm_insn> the complete mnemonic
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
23 (match_operand:VALL_F16 1 "general_operand"))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand")
43 (match_operand:VALL 1 "general_operand"))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %1"
219 [(set_attr "type" "neon_ldp_q")]
220 )
221
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %0"
232 [(set_attr "type" "neon_stp_q")]
233 )
234
235
236 (define_split
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
242 [(const_int 0)]
243 {
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245 DONE;
246 })
247
248 (define_split
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254 [(const_int 0)]
255 {
256 aarch64_split_simd_move (operands[0], operands[1]);
257 DONE;
258 })
259
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
263 "TARGET_SIMD"
264 {
265 rtx dst = operands[0];
266 rtx src = operands[1];
267
268 if (GP_REGNUM_P (REGNO (src)))
269 {
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
272
273 emit_insn
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
275 emit_insn
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
277 }
278
279 else
280 {
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285
286 emit_insn
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
288 emit_insn
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
290 }
291 DONE;
292 }
293 )
294
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297 (vec_select:<VHALF>
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
301 "umov\t%0, %1.d[0]"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
304 ])
305
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308 (vec_select:<VHALF>
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
312 "umov\t%0, %1.d[1]"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
315 ])
316
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
324 )
325
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
333 )
334
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
339 "TARGET_SIMD"
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
342 )
343
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
348 "TARGET_SIMD"
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
351 )
352
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
357 "TARGET_SIMD"
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
360 )
361
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365 "TARGET_SIMD"
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
368 )
369
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373 UNSPEC_RBIT))]
374 "TARGET_SIMD"
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
377 )
378
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
382 "TARGET_SIMD"
383 {
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386 <MODE>mode, 0);
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389 DONE;
390 }
391 )
392
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
397 "TARGET_SIMD"
398 {
399
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
404
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
407
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
409
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
413
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
418 DONE;
419 }
420 )
421
422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
423 ;; fact that their usage need to guarantee that the source vectors are
424 ;; contiguous. It would be wrong to describe the operation without being able
425 ;; to describe the permute that is also required, but even if that is done
426 ;; the permute would have been created as a LOAD_LANES which means the values
427 ;; in the registers are in the wrong order.
428 (define_insn "aarch64_fcadd<rot><mode>"
429 [(set (match_operand:VHSDF 0 "register_operand" "=w")
430 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
431 (match_operand:VHSDF 2 "register_operand" "w")]
432 FCADD))]
433 "TARGET_COMPLEX"
434 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
435 [(set_attr "type" "neon_fcadd")]
436 )
437
438 (define_insn "aarch64_fcmla<rot><mode>"
439 [(set (match_operand:VHSDF 0 "register_operand" "=w")
440 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
441 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
442 (match_operand:VHSDF 3 "register_operand" "w")]
443 FCMLA)))]
444 "TARGET_COMPLEX"
445 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
446 [(set_attr "type" "neon_fcmla")]
447 )
448
449
450 (define_insn "aarch64_fcmla_lane<rot><mode>"
451 [(set (match_operand:VHSDF 0 "register_operand" "=w")
452 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
453 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
454 (match_operand:VHSDF 3 "register_operand" "w")
455 (match_operand:SI 4 "const_int_operand" "n")]
456 FCMLA)))]
457 "TARGET_COMPLEX"
458 {
459 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
460 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
461 }
462 [(set_attr "type" "neon_fcmla")]
463 )
464
465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
466 [(set (match_operand:V4HF 0 "register_operand" "=w")
467 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
468 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
469 (match_operand:V8HF 3 "register_operand" "w")
470 (match_operand:SI 4 "const_int_operand" "n")]
471 FCMLA)))]
472 "TARGET_COMPLEX"
473 {
474 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
475 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
476 }
477 [(set_attr "type" "neon_fcmla")]
478 )
479
480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
481 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
482 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
483 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
484 (match_operand:<VHALF> 3 "register_operand" "w")
485 (match_operand:SI 4 "const_int_operand" "n")]
486 FCMLA)))]
487 "TARGET_COMPLEX"
488 {
489 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
490 operands[4]
491 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
492 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
493 }
494 [(set_attr "type" "neon_fcmla")]
495 )
496
497 ;; These instructions map to the __builtins for the Dot Product operations.
498 (define_insn "aarch64_<sur>dot<vsi2qi>"
499 [(set (match_operand:VS 0 "register_operand" "=w")
500 (plus:VS (match_operand:VS 1 "register_operand" "0")
501 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
502 (match_operand:<VSI2QI> 3 "register_operand" "w")]
503 DOTPROD)))]
504 "TARGET_DOTPROD"
505 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
506 [(set_attr "type" "neon_dot<q>")]
507 )
508
509 ;; These expands map to the Dot Product optab the vectorizer checks for.
510 ;; The auto-vectorizer expects a dot product builtin that also does an
511 ;; accumulation into the provided register.
512 ;; Given the following pattern
513 ;;
514 ;; for (i=0; i<len; i++) {
515 ;; c = a[i] * b[i];
516 ;; r += c;
517 ;; }
518 ;; return result;
519 ;;
520 ;; This can be auto-vectorized to
521 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
522 ;;
523 ;; given enough iterations. However the vectorizer can keep unrolling the loop
524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
526 ;; ...
527 ;;
528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
529 (define_expand "<sur>dot_prod<vsi2qi>"
530 [(set (match_operand:VS 0 "register_operand")
531 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
532 (match_operand:<VSI2QI> 2 "register_operand")]
533 DOTPROD)
534 (match_operand:VS 3 "register_operand")))]
535 "TARGET_DOTPROD"
536 {
537 emit_insn (
538 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
539 operands[2]));
540 emit_insn (gen_rtx_SET (operands[0], operands[3]));
541 DONE;
542 })
543
544 ;; These instructions map to the __builtins for the Dot Product
545 ;; indexed operations.
546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
547 [(set (match_operand:VS 0 "register_operand" "=w")
548 (plus:VS (match_operand:VS 1 "register_operand" "0")
549 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
550 (match_operand:V8QI 3 "register_operand" "<h_con>")
551 (match_operand:SI 4 "immediate_operand" "i")]
552 DOTPROD)))]
553 "TARGET_DOTPROD"
554 {
555 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
556 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
557 }
558 [(set_attr "type" "neon_dot<q>")]
559 )
560
561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
562 [(set (match_operand:VS 0 "register_operand" "=w")
563 (plus:VS (match_operand:VS 1 "register_operand" "0")
564 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
565 (match_operand:V16QI 3 "register_operand" "<h_con>")
566 (match_operand:SI 4 "immediate_operand" "i")]
567 DOTPROD)))]
568 "TARGET_DOTPROD"
569 {
570 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
571 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
572 }
573 [(set_attr "type" "neon_dot<q>")]
574 )
575
576 (define_expand "copysign<mode>3"
577 [(match_operand:VHSDF 0 "register_operand")
578 (match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
580 "TARGET_FLOAT && TARGET_SIMD"
581 {
582 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
583 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
584
585 emit_move_insn (v_bitmask,
586 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
587 HOST_WIDE_INT_M1U << bits));
588 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
589 operands[2], operands[1]));
590 DONE;
591 }
592 )
593
594 (define_insn "*aarch64_mul3_elt<mode>"
595 [(set (match_operand:VMUL 0 "register_operand" "=w")
596 (mult:VMUL
597 (vec_duplicate:VMUL
598 (vec_select:<VEL>
599 (match_operand:VMUL 1 "register_operand" "<h_con>")
600 (parallel [(match_operand:SI 2 "immediate_operand")])))
601 (match_operand:VMUL 3 "register_operand" "w")))]
602 "TARGET_SIMD"
603 {
604 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
605 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
606 }
607 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
608 )
609
610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
611 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
612 (mult:VMUL_CHANGE_NLANES
613 (vec_duplicate:VMUL_CHANGE_NLANES
614 (vec_select:<VEL>
615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
616 (parallel [(match_operand:SI 2 "immediate_operand")])))
617 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
618 "TARGET_SIMD"
619 {
620 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
621 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
622 }
623 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
624 )
625
626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
627 [(set (match_operand:VMUL 0 "register_operand" "=w")
628 (mult:VMUL
629 (vec_duplicate:VMUL
630 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
631 (match_operand:VMUL 2 "register_operand" "w")))]
632 "TARGET_SIMD"
633 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
634 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
635 )
636
637 (define_insn "@aarch64_rsqrte<mode>"
638 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
639 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
640 UNSPEC_RSQRTE))]
641 "TARGET_SIMD"
642 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
643 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
644
645 (define_insn "@aarch64_rsqrts<mode>"
646 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
647 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
648 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
649 UNSPEC_RSQRTS))]
650 "TARGET_SIMD"
651 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
652 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
653
654 (define_expand "rsqrt<mode>2"
655 [(set (match_operand:VALLF 0 "register_operand")
656 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
657 UNSPEC_RSQRT))]
658 "TARGET_SIMD"
659 {
660 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
661 DONE;
662 })
663
664 (define_insn "*aarch64_mul3_elt_to_64v2df"
665 [(set (match_operand:DF 0 "register_operand" "=w")
666 (mult:DF
667 (vec_select:DF
668 (match_operand:V2DF 1 "register_operand" "w")
669 (parallel [(match_operand:SI 2 "immediate_operand")]))
670 (match_operand:DF 3 "register_operand" "w")))]
671 "TARGET_SIMD"
672 {
673 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
674 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
675 }
676 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
677 )
678
679 (define_insn "neg<mode>2"
680 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
682 "TARGET_SIMD"
683 "neg\t%0.<Vtype>, %1.<Vtype>"
684 [(set_attr "type" "neon_neg<q>")]
685 )
686
687 (define_insn "abs<mode>2"
688 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
689 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
690 "TARGET_SIMD"
691 "abs\t%0.<Vtype>, %1.<Vtype>"
692 [(set_attr "type" "neon_abs<q>")]
693 )
694
695 ;; The intrinsic version of integer ABS must not be allowed to
696 ;; combine with any operation with an integerated ABS step, such
697 ;; as SABD.
698 (define_insn "aarch64_abs<mode>"
699 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
700 (unspec:VSDQ_I_DI
701 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
702 UNSPEC_ABS))]
703 "TARGET_SIMD"
704 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
705 [(set_attr "type" "neon_abs<q>")]
706 )
707
708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
709 ;; This isn't accurate as ABS treats always its input as a signed value.
710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
713 (define_insn "aarch64_<su>abd<mode>_3"
714 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
715 (minus:VDQ_BHSI
716 (USMAX:VDQ_BHSI
717 (match_operand:VDQ_BHSI 1 "register_operand" "w")
718 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
719 (<max_opp>:VDQ_BHSI
720 (match_dup 1)
721 (match_dup 2))))]
722 "TARGET_SIMD"
723 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
724 [(set_attr "type" "neon_abd<q>")]
725 )
726
727 (define_insn "aarch64_<sur>abdl2<mode>_3"
728 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
729 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
730 (match_operand:VDQV_S 2 "register_operand" "w")]
731 ABDL2))]
732 "TARGET_SIMD"
733 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
734 [(set_attr "type" "neon_abd<q>")]
735 )
736
737 (define_insn "aarch64_<sur>abal<mode>_4"
738 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
739 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
740 (match_operand:VDQV_S 2 "register_operand" "w")
741 (match_operand:<VDBLW> 3 "register_operand" "0")]
742 ABAL))]
743 "TARGET_SIMD"
744 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
745 [(set_attr "type" "neon_arith_acc<q>")]
746 )
747
748 (define_insn "aarch64_<sur>adalp<mode>_3"
749 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
750 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
751 (match_operand:<VDBLW> 2 "register_operand" "0")]
752 ADALP))]
753 "TARGET_SIMD"
754 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
755 [(set_attr "type" "neon_reduc_add<q>")]
756 )
757
758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
759 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
760 ;; reduction of the difference into a V4SI vector and accumulate that into
761 ;; operand 3 before copying that into the result operand 0.
762 ;; Perform that with a sequence of:
763 ;; UABDL2 tmp.8h, op1.16b, op2.16b
764 ;; UABAL tmp.8h, op1.16b, op2.16b
765 ;; UADALP op3.4s, tmp.8h
766 ;; MOV op0, op3 // should be eliminated in later passes.
767 ;;
768 ;; For TARGET_DOTPROD we do:
769 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
770 ;; UABD tmp2.16b, op1.16b, op2.16b
771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
772 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
773 ;;
774 ;; The signed version just uses the signed variants of the above instructions
775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
776 ;; unsigned.
777
778 (define_expand "<sur>sadv16qi"
779 [(use (match_operand:V4SI 0 "register_operand"))
780 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
781 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
782 (use (match_operand:V4SI 3 "register_operand"))]
783 "TARGET_SIMD"
784 {
785 if (TARGET_DOTPROD)
786 {
787 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
788 rtx abd = gen_reg_rtx (V16QImode);
789 emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
790 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
791 abd, ones));
792 DONE;
793 }
794 rtx reduc = gen_reg_rtx (V8HImode);
795 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
796 operands[2]));
797 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
798 operands[2], reduc));
799 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
800 operands[3]));
801 emit_move_insn (operands[0], operands[3]);
802 DONE;
803 }
804 )
805
806 (define_insn "aba<mode>_3"
807 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
808 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
809 (match_operand:VDQ_BHSI 1 "register_operand" "w")
810 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
811 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
812 "TARGET_SIMD"
813 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
814 [(set_attr "type" "neon_arith_acc<q>")]
815 )
816
817 (define_insn "fabd<mode>3"
818 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
819 (abs:VHSDF_HSDF
820 (minus:VHSDF_HSDF
821 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
822 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
823 "TARGET_SIMD"
824 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
825 [(set_attr "type" "neon_fp_abd_<stype><q>")]
826 )
827
828 ;; For AND (vector, register) and BIC (vector, immediate)
829 (define_insn "and<mode>3"
830 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
831 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
832 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
833 "TARGET_SIMD"
834 {
835 switch (which_alternative)
836 {
837 case 0:
838 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
839 case 1:
840 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
841 AARCH64_CHECK_BIC);
842 default:
843 gcc_unreachable ();
844 }
845 }
846 [(set_attr "type" "neon_logic<q>")]
847 )
848
849 ;; For ORR (vector, register) and ORR (vector, immediate)
850 (define_insn "ior<mode>3"
851 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
852 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
853 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
854 "TARGET_SIMD"
855 {
856 switch (which_alternative)
857 {
858 case 0:
859 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
860 case 1:
861 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
862 AARCH64_CHECK_ORR);
863 default:
864 gcc_unreachable ();
865 }
866 }
867 [(set_attr "type" "neon_logic<q>")]
868 )
869
870 (define_insn "xor<mode>3"
871 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
872 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
873 (match_operand:VDQ_I 2 "register_operand" "w")))]
874 "TARGET_SIMD"
875 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
876 [(set_attr "type" "neon_logic<q>")]
877 )
878
879 (define_insn "one_cmpl<mode>2"
880 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
881 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
882 "TARGET_SIMD"
883 "not\t%0.<Vbtype>, %1.<Vbtype>"
884 [(set_attr "type" "neon_logic<q>")]
885 )
886
887 (define_insn "aarch64_simd_vec_set<mode>"
888 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
889 (vec_merge:VALL_F16
890 (vec_duplicate:VALL_F16
891 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
892 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
893 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
894 "TARGET_SIMD"
895 {
896 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
897 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
898 switch (which_alternative)
899 {
900 case 0:
901 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
902 case 1:
903 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
904 case 2:
905 return "ld1\\t{%0.<Vetype>}[%p2], %1";
906 default:
907 gcc_unreachable ();
908 }
909 }
910 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
911 )
912
913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
914 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
915 (vec_merge:VALL_F16
916 (vec_duplicate:VALL_F16
917 (vec_select:<VEL>
918 (match_operand:VALL_F16 3 "register_operand" "w")
919 (parallel
920 [(match_operand:SI 4 "immediate_operand" "i")])))
921 (match_operand:VALL_F16 1 "register_operand" "0")
922 (match_operand:SI 2 "immediate_operand" "i")))]
923 "TARGET_SIMD"
924 {
925 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
926 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
927 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
928
929 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
930 }
931 [(set_attr "type" "neon_ins<q>")]
932 )
933
934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
935 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
936 (vec_merge:VALL_F16_NO_V2Q
937 (vec_duplicate:VALL_F16_NO_V2Q
938 (vec_select:<VEL>
939 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
940 (parallel
941 [(match_operand:SI 4 "immediate_operand" "i")])))
942 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
943 (match_operand:SI 2 "immediate_operand" "i")))]
944 "TARGET_SIMD"
945 {
946 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
947 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
948 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
949 INTVAL (operands[4]));
950
951 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
952 }
953 [(set_attr "type" "neon_ins<q>")]
954 )
955
956 (define_expand "signbit<mode>2"
957 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
958 (use (match_operand:VDQSF 1 "register_operand"))]
959 "TARGET_SIMD"
960 {
961 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
962 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
963 shift_amount);
964 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
965
966 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
967 shift_vector));
968 DONE;
969 })
970
971 (define_insn "aarch64_simd_lshr<mode>"
972 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
973 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
974 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
975 "TARGET_SIMD"
976 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
977 [(set_attr "type" "neon_shift_imm<q>")]
978 )
979
980 (define_insn "aarch64_simd_ashr<mode>"
981 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
982 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
983 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
984 "TARGET_SIMD"
985 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
986 [(set_attr "type" "neon_shift_imm<q>")]
987 )
988
989 (define_insn "*aarch64_simd_sra<mode>"
990 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
991 (plus:VDQ_I
992 (SHIFTRT:VDQ_I
993 (match_operand:VDQ_I 1 "register_operand" "w")
994 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
995 (match_operand:VDQ_I 3 "register_operand" "0")))]
996 "TARGET_SIMD"
997 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
998 [(set_attr "type" "neon_shift_acc<q>")]
999 )
1000
1001 (define_insn "aarch64_simd_imm_shl<mode>"
1002 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1003 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1004 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
1005 "TARGET_SIMD"
1006 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1007 [(set_attr "type" "neon_shift_imm<q>")]
1008 )
1009
1010 (define_insn "aarch64_simd_reg_sshl<mode>"
1011 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1012 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1013 (match_operand:VDQ_I 2 "register_operand" "w")))]
1014 "TARGET_SIMD"
1015 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1016 [(set_attr "type" "neon_shift_reg<q>")]
1017 )
1018
1019 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1020 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1021 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1022 (match_operand:VDQ_I 2 "register_operand" "w")]
1023 UNSPEC_ASHIFT_UNSIGNED))]
1024 "TARGET_SIMD"
1025 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1026 [(set_attr "type" "neon_shift_reg<q>")]
1027 )
1028
1029 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1030 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1031 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1032 (match_operand:VDQ_I 2 "register_operand" "w")]
1033 UNSPEC_ASHIFT_SIGNED))]
1034 "TARGET_SIMD"
1035 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1036 [(set_attr "type" "neon_shift_reg<q>")]
1037 )
1038
1039 (define_expand "ashl<mode>3"
1040 [(match_operand:VDQ_I 0 "register_operand")
1041 (match_operand:VDQ_I 1 "register_operand")
1042 (match_operand:SI 2 "general_operand")]
1043 "TARGET_SIMD"
1044 {
1045 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1046 int shift_amount;
1047
1048 if (CONST_INT_P (operands[2]))
1049 {
1050 shift_amount = INTVAL (operands[2]);
1051 if (shift_amount >= 0 && shift_amount < bit_width)
1052 {
1053 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1054 shift_amount);
1055 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1056 operands[1],
1057 tmp));
1058 DONE;
1059 }
1060 else
1061 {
1062 operands[2] = force_reg (SImode, operands[2]);
1063 }
1064 }
1065 else if (MEM_P (operands[2]))
1066 {
1067 operands[2] = force_reg (SImode, operands[2]);
1068 }
1069
1070 if (REG_P (operands[2]))
1071 {
1072 rtx tmp = gen_reg_rtx (<MODE>mode);
1073 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1074 convert_to_mode (<VEL>mode,
1075 operands[2],
1076 0)));
1077 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1078 tmp));
1079 DONE;
1080 }
1081 else
1082 FAIL;
1083 }
1084 )
1085
1086 (define_expand "lshr<mode>3"
1087 [(match_operand:VDQ_I 0 "register_operand")
1088 (match_operand:VDQ_I 1 "register_operand")
1089 (match_operand:SI 2 "general_operand")]
1090 "TARGET_SIMD"
1091 {
1092 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1093 int shift_amount;
1094
1095 if (CONST_INT_P (operands[2]))
1096 {
1097 shift_amount = INTVAL (operands[2]);
1098 if (shift_amount > 0 && shift_amount <= bit_width)
1099 {
1100 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1101 shift_amount);
1102 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1103 operands[1],
1104 tmp));
1105 DONE;
1106 }
1107 else
1108 operands[2] = force_reg (SImode, operands[2]);
1109 }
1110 else if (MEM_P (operands[2]))
1111 {
1112 operands[2] = force_reg (SImode, operands[2]);
1113 }
1114
1115 if (REG_P (operands[2]))
1116 {
1117 rtx tmp = gen_reg_rtx (SImode);
1118 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1119 emit_insn (gen_negsi2 (tmp, operands[2]));
1120 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1121 convert_to_mode (<VEL>mode,
1122 tmp, 0)));
1123 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1124 operands[1],
1125 tmp1));
1126 DONE;
1127 }
1128 else
1129 FAIL;
1130 }
1131 )
1132
1133 (define_expand "ashr<mode>3"
1134 [(match_operand:VDQ_I 0 "register_operand")
1135 (match_operand:VDQ_I 1 "register_operand")
1136 (match_operand:SI 2 "general_operand")]
1137 "TARGET_SIMD"
1138 {
1139 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1140 int shift_amount;
1141
1142 if (CONST_INT_P (operands[2]))
1143 {
1144 shift_amount = INTVAL (operands[2]);
1145 if (shift_amount > 0 && shift_amount <= bit_width)
1146 {
1147 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1148 shift_amount);
1149 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1150 operands[1],
1151 tmp));
1152 DONE;
1153 }
1154 else
1155 operands[2] = force_reg (SImode, operands[2]);
1156 }
1157 else if (MEM_P (operands[2]))
1158 {
1159 operands[2] = force_reg (SImode, operands[2]);
1160 }
1161
1162 if (REG_P (operands[2]))
1163 {
1164 rtx tmp = gen_reg_rtx (SImode);
1165 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1166 emit_insn (gen_negsi2 (tmp, operands[2]));
1167 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1168 convert_to_mode (<VEL>mode,
1169 tmp, 0)));
1170 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1171 operands[1],
1172 tmp1));
1173 DONE;
1174 }
1175 else
1176 FAIL;
1177 }
1178 )
1179
1180 (define_expand "vashl<mode>3"
1181 [(match_operand:VDQ_I 0 "register_operand")
1182 (match_operand:VDQ_I 1 "register_operand")
1183 (match_operand:VDQ_I 2 "register_operand")]
1184 "TARGET_SIMD"
1185 {
1186 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1187 operands[2]));
1188 DONE;
1189 })
1190
1191 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1192 ;; Negating individual lanes most certainly offsets the
1193 ;; gain from vectorization.
1194 (define_expand "vashr<mode>3"
1195 [(match_operand:VDQ_BHSI 0 "register_operand")
1196 (match_operand:VDQ_BHSI 1 "register_operand")
1197 (match_operand:VDQ_BHSI 2 "register_operand")]
1198 "TARGET_SIMD"
1199 {
1200 rtx neg = gen_reg_rtx (<MODE>mode);
1201 emit (gen_neg<mode>2 (neg, operands[2]));
1202 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1203 neg));
1204 DONE;
1205 })
1206
1207 ;; DI vector shift
1208 (define_expand "aarch64_ashr_simddi"
1209 [(match_operand:DI 0 "register_operand")
1210 (match_operand:DI 1 "register_operand")
1211 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1212 "TARGET_SIMD"
1213 {
1214 /* An arithmetic shift right by 64 fills the result with copies of the sign
1215 bit, just like asr by 63 - however the standard pattern does not handle
1216 a shift by 64. */
1217 if (INTVAL (operands[2]) == 64)
1218 operands[2] = GEN_INT (63);
1219 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1220 DONE;
1221 }
1222 )
1223
1224 (define_expand "vlshr<mode>3"
1225 [(match_operand:VDQ_BHSI 0 "register_operand")
1226 (match_operand:VDQ_BHSI 1 "register_operand")
1227 (match_operand:VDQ_BHSI 2 "register_operand")]
1228 "TARGET_SIMD"
1229 {
1230 rtx neg = gen_reg_rtx (<MODE>mode);
1231 emit (gen_neg<mode>2 (neg, operands[2]));
1232 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1233 neg));
1234 DONE;
1235 })
1236
1237 (define_expand "aarch64_lshr_simddi"
1238 [(match_operand:DI 0 "register_operand")
1239 (match_operand:DI 1 "register_operand")
1240 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1241 "TARGET_SIMD"
1242 {
1243 if (INTVAL (operands[2]) == 64)
1244 emit_move_insn (operands[0], const0_rtx);
1245 else
1246 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1247 DONE;
1248 }
1249 )
1250
1251 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1252 (define_insn "vec_shr_<mode>"
1253 [(set (match_operand:VD 0 "register_operand" "=w")
1254 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1255 (match_operand:SI 2 "immediate_operand" "i")]
1256 UNSPEC_VEC_SHR))]
1257 "TARGET_SIMD"
1258 {
1259 if (BYTES_BIG_ENDIAN)
1260 return "shl %d0, %d1, %2";
1261 else
1262 return "ushr %d0, %d1, %2";
1263 }
1264 [(set_attr "type" "neon_shift_imm")]
1265 )
1266
1267 (define_expand "vec_set<mode>"
1268 [(match_operand:VALL_F16 0 "register_operand")
1269 (match_operand:<VEL> 1 "register_operand")
1270 (match_operand:SI 2 "immediate_operand")]
1271 "TARGET_SIMD"
1272 {
1273 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1274 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1275 GEN_INT (elem), operands[0]));
1276 DONE;
1277 }
1278 )
1279
1280
1281 (define_insn "aarch64_mla<mode>"
1282 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1283 (plus:VDQ_BHSI (mult:VDQ_BHSI
1284 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1285 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1286 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1287 "TARGET_SIMD"
1288 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1289 [(set_attr "type" "neon_mla_<Vetype><q>")]
1290 )
1291
1292 (define_insn "*aarch64_mla_elt<mode>"
1293 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1294 (plus:VDQHS
1295 (mult:VDQHS
1296 (vec_duplicate:VDQHS
1297 (vec_select:<VEL>
1298 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1299 (parallel [(match_operand:SI 2 "immediate_operand")])))
1300 (match_operand:VDQHS 3 "register_operand" "w"))
1301 (match_operand:VDQHS 4 "register_operand" "0")))]
1302 "TARGET_SIMD"
1303 {
1304 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1305 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1306 }
1307 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1308 )
1309
1310 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1311 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1312 (plus:VDQHS
1313 (mult:VDQHS
1314 (vec_duplicate:VDQHS
1315 (vec_select:<VEL>
1316 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1317 (parallel [(match_operand:SI 2 "immediate_operand")])))
1318 (match_operand:VDQHS 3 "register_operand" "w"))
1319 (match_operand:VDQHS 4 "register_operand" "0")))]
1320 "TARGET_SIMD"
1321 {
1322 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1323 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1324 }
1325 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1326 )
1327
1328 (define_insn "*aarch64_mla_elt_merge<mode>"
1329 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1330 (plus:VDQHS
1331 (mult:VDQHS (vec_duplicate:VDQHS
1332 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1333 (match_operand:VDQHS 2 "register_operand" "w"))
1334 (match_operand:VDQHS 3 "register_operand" "0")))]
1335 "TARGET_SIMD"
1336 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1337 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1338 )
1339
1340 (define_insn "aarch64_mls<mode>"
1341 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1342 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1343 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1344 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1345 "TARGET_SIMD"
1346 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1347 [(set_attr "type" "neon_mla_<Vetype><q>")]
1348 )
1349
1350 (define_insn "*aarch64_mls_elt<mode>"
1351 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1352 (minus:VDQHS
1353 (match_operand:VDQHS 4 "register_operand" "0")
1354 (mult:VDQHS
1355 (vec_duplicate:VDQHS
1356 (vec_select:<VEL>
1357 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1358 (parallel [(match_operand:SI 2 "immediate_operand")])))
1359 (match_operand:VDQHS 3 "register_operand" "w"))))]
1360 "TARGET_SIMD"
1361 {
1362 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1363 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1364 }
1365 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1366 )
1367
1368 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1369 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1370 (minus:VDQHS
1371 (match_operand:VDQHS 4 "register_operand" "0")
1372 (mult:VDQHS
1373 (vec_duplicate:VDQHS
1374 (vec_select:<VEL>
1375 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1376 (parallel [(match_operand:SI 2 "immediate_operand")])))
1377 (match_operand:VDQHS 3 "register_operand" "w"))))]
1378 "TARGET_SIMD"
1379 {
1380 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1381 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1382 }
1383 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1384 )
1385
1386 (define_insn "*aarch64_mls_elt_merge<mode>"
1387 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1388 (minus:VDQHS
1389 (match_operand:VDQHS 1 "register_operand" "0")
1390 (mult:VDQHS (vec_duplicate:VDQHS
1391 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1392 (match_operand:VDQHS 3 "register_operand" "w"))))]
1393 "TARGET_SIMD"
1394 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1395 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1396 )
1397
1398 ;; Max/Min operations.
1399 (define_insn "<su><maxmin><mode>3"
1400 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1401 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1402 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1403 "TARGET_SIMD"
1404 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1405 [(set_attr "type" "neon_minmax<q>")]
1406 )
1407
1408 (define_expand "<su><maxmin>v2di3"
1409 [(set (match_operand:V2DI 0 "register_operand")
1410 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1411 (match_operand:V2DI 2 "register_operand")))]
1412 "TARGET_SIMD"
1413 {
1414 enum rtx_code cmp_operator;
1415 rtx cmp_fmt;
1416
1417 switch (<CODE>)
1418 {
1419 case UMIN:
1420 cmp_operator = LTU;
1421 break;
1422 case SMIN:
1423 cmp_operator = LT;
1424 break;
1425 case UMAX:
1426 cmp_operator = GTU;
1427 break;
1428 case SMAX:
1429 cmp_operator = GT;
1430 break;
1431 default:
1432 gcc_unreachable ();
1433 }
1434
1435 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1436 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1437 operands[2], cmp_fmt, operands[1], operands[2]));
1438 DONE;
1439 })
1440
1441 ;; Pairwise Integer Max/Min operations.
1442 (define_insn "aarch64_<maxmin_uns>p<mode>"
1443 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1444 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1445 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1446 MAXMINV))]
1447 "TARGET_SIMD"
1448 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1449 [(set_attr "type" "neon_minmax<q>")]
1450 )
1451
1452 ;; Pairwise FP Max/Min operations.
1453 (define_insn "aarch64_<maxmin_uns>p<mode>"
1454 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1455 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1456 (match_operand:VHSDF 2 "register_operand" "w")]
1457 FMAXMINV))]
1458 "TARGET_SIMD"
1459 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1460 [(set_attr "type" "neon_minmax<q>")]
1461 )
1462
1463 ;; vec_concat gives a new vector with the low elements from operand 1, and
1464 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1465 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1466 ;; What that means, is that the RTL descriptions of the below patterns
1467 ;; need to change depending on endianness.
1468
1469 ;; Move to the low architectural bits of the register.
1470 ;; On little-endian this is { operand, zeroes }
1471 ;; On big-endian this is { zeroes, operand }
1472
1473 (define_insn "move_lo_quad_internal_<mode>"
1474 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1475 (vec_concat:VQ_NO2E
1476 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1477 (vec_duplicate:<VHALF> (const_int 0))))]
1478 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1479 "@
1480 dup\\t%d0, %1.d[0]
1481 fmov\\t%d0, %1
1482 dup\\t%d0, %1"
1483 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1484 (set_attr "length" "4")
1485 (set_attr "arch" "simd,fp,simd")]
1486 )
1487
1488 (define_insn "move_lo_quad_internal_<mode>"
1489 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1490 (vec_concat:VQ_2E
1491 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1492 (const_int 0)))]
1493 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1494 "@
1495 dup\\t%d0, %1.d[0]
1496 fmov\\t%d0, %1
1497 dup\\t%d0, %1"
1498 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1499 (set_attr "length" "4")
1500 (set_attr "arch" "simd,fp,simd")]
1501 )
1502
1503 (define_insn "move_lo_quad_internal_be_<mode>"
1504 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1505 (vec_concat:VQ_NO2E
1506 (vec_duplicate:<VHALF> (const_int 0))
1507 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1508 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1509 "@
1510 dup\\t%d0, %1.d[0]
1511 fmov\\t%d0, %1
1512 dup\\t%d0, %1"
1513 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1514 (set_attr "length" "4")
1515 (set_attr "arch" "simd,fp,simd")]
1516 )
1517
1518 (define_insn "move_lo_quad_internal_be_<mode>"
1519 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1520 (vec_concat:VQ_2E
1521 (const_int 0)
1522 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1523 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1524 "@
1525 dup\\t%d0, %1.d[0]
1526 fmov\\t%d0, %1
1527 dup\\t%d0, %1"
1528 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1529 (set_attr "length" "4")
1530 (set_attr "arch" "simd,fp,simd")]
1531 )
1532
1533 (define_expand "move_lo_quad_<mode>"
1534 [(match_operand:VQ 0 "register_operand")
1535 (match_operand:VQ 1 "register_operand")]
1536 "TARGET_SIMD"
1537 {
1538 if (BYTES_BIG_ENDIAN)
1539 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1540 else
1541 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1542 DONE;
1543 }
1544 )
1545
1546 ;; Move operand1 to the high architectural bits of the register, keeping
1547 ;; the low architectural bits of operand2.
1548 ;; For little-endian this is { operand2, operand1 }
1549 ;; For big-endian this is { operand1, operand2 }
1550
1551 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1552 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1553 (vec_concat:VQ
1554 (vec_select:<VHALF>
1555 (match_dup 0)
1556 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1557 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1558 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1559 "@
1560 ins\\t%0.d[1], %1.d[0]
1561 ins\\t%0.d[1], %1"
1562 [(set_attr "type" "neon_ins")]
1563 )
1564
1565 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1566 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1567 (vec_concat:VQ
1568 (match_operand:<VHALF> 1 "register_operand" "w,r")
1569 (vec_select:<VHALF>
1570 (match_dup 0)
1571 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1572 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1573 "@
1574 ins\\t%0.d[1], %1.d[0]
1575 ins\\t%0.d[1], %1"
1576 [(set_attr "type" "neon_ins")]
1577 )
1578
1579 (define_expand "move_hi_quad_<mode>"
1580 [(match_operand:VQ 0 "register_operand")
1581 (match_operand:<VHALF> 1 "register_operand")]
1582 "TARGET_SIMD"
1583 {
1584 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1585 if (BYTES_BIG_ENDIAN)
1586 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1587 operands[1], p));
1588 else
1589 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1590 operands[1], p));
1591 DONE;
1592 })
1593
1594 ;; Narrowing operations.
1595
1596 ;; For doubles.
1597 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1598 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1599 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1600 "TARGET_SIMD"
1601 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1602 [(set_attr "type" "neon_shift_imm_narrow_q")]
1603 )
1604
1605 (define_expand "vec_pack_trunc_<mode>"
1606 [(match_operand:<VNARROWD> 0 "register_operand")
1607 (match_operand:VDN 1 "register_operand")
1608 (match_operand:VDN 2 "register_operand")]
1609 "TARGET_SIMD"
1610 {
1611 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1612 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1613 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1614
1615 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1616 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1617 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1618 DONE;
1619 })
1620
1621 ;; For quads.
1622
1623 (define_insn "vec_pack_trunc_<mode>"
1624 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1625 (vec_concat:<VNARROWQ2>
1626 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1627 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1628 "TARGET_SIMD"
1629 {
1630 if (BYTES_BIG_ENDIAN)
1631 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1632 else
1633 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1634 }
1635 [(set_attr "type" "multiple")
1636 (set_attr "length" "8")]
1637 )
1638
1639 ;; Widening operations.
1640
1641 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1642 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1643 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1644 (match_operand:VQW 1 "register_operand" "w")
1645 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1646 )))]
1647 "TARGET_SIMD"
1648 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1649 [(set_attr "type" "neon_shift_imm_long")]
1650 )
1651
1652 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1654 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1655 (match_operand:VQW 1 "register_operand" "w")
1656 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1657 )))]
1658 "TARGET_SIMD"
1659 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1660 [(set_attr "type" "neon_shift_imm_long")]
1661 )
1662
1663 (define_expand "vec_unpack<su>_hi_<mode>"
1664 [(match_operand:<VWIDE> 0 "register_operand")
1665 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1666 "TARGET_SIMD"
1667 {
1668 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1669 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1670 operands[1], p));
1671 DONE;
1672 }
1673 )
1674
1675 (define_expand "vec_unpack<su>_lo_<mode>"
1676 [(match_operand:<VWIDE> 0 "register_operand")
1677 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1678 "TARGET_SIMD"
1679 {
1680 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1681 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1682 operands[1], p));
1683 DONE;
1684 }
1685 )
1686
1687 ;; Widening arithmetic.
1688
1689 (define_insn "*aarch64_<su>mlal_lo<mode>"
1690 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1691 (plus:<VWIDE>
1692 (mult:<VWIDE>
1693 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1694 (match_operand:VQW 2 "register_operand" "w")
1695 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1696 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1697 (match_operand:VQW 4 "register_operand" "w")
1698 (match_dup 3))))
1699 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1700 "TARGET_SIMD"
1701 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1702 [(set_attr "type" "neon_mla_<Vetype>_long")]
1703 )
1704
1705 (define_insn "*aarch64_<su>mlal_hi<mode>"
1706 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1707 (plus:<VWIDE>
1708 (mult:<VWIDE>
1709 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1710 (match_operand:VQW 2 "register_operand" "w")
1711 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1712 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1713 (match_operand:VQW 4 "register_operand" "w")
1714 (match_dup 3))))
1715 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1716 "TARGET_SIMD"
1717 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1718 [(set_attr "type" "neon_mla_<Vetype>_long")]
1719 )
1720
1721 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1722 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1723 (minus:<VWIDE>
1724 (match_operand:<VWIDE> 1 "register_operand" "0")
1725 (mult:<VWIDE>
1726 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1727 (match_operand:VQW 2 "register_operand" "w")
1728 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1729 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1730 (match_operand:VQW 4 "register_operand" "w")
1731 (match_dup 3))))))]
1732 "TARGET_SIMD"
1733 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1734 [(set_attr "type" "neon_mla_<Vetype>_long")]
1735 )
1736
1737 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1738 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1739 (minus:<VWIDE>
1740 (match_operand:<VWIDE> 1 "register_operand" "0")
1741 (mult:<VWIDE>
1742 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1743 (match_operand:VQW 2 "register_operand" "w")
1744 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1745 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1746 (match_operand:VQW 4 "register_operand" "w")
1747 (match_dup 3))))))]
1748 "TARGET_SIMD"
1749 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1750 [(set_attr "type" "neon_mla_<Vetype>_long")]
1751 )
1752
1753 (define_insn "*aarch64_<su>mlal<mode>"
1754 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1755 (plus:<VWIDE>
1756 (mult:<VWIDE>
1757 (ANY_EXTEND:<VWIDE>
1758 (match_operand:VD_BHSI 1 "register_operand" "w"))
1759 (ANY_EXTEND:<VWIDE>
1760 (match_operand:VD_BHSI 2 "register_operand" "w")))
1761 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1762 "TARGET_SIMD"
1763 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1764 [(set_attr "type" "neon_mla_<Vetype>_long")]
1765 )
1766
1767 (define_insn "*aarch64_<su>mlsl<mode>"
1768 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1769 (minus:<VWIDE>
1770 (match_operand:<VWIDE> 1 "register_operand" "0")
1771 (mult:<VWIDE>
1772 (ANY_EXTEND:<VWIDE>
1773 (match_operand:VD_BHSI 2 "register_operand" "w"))
1774 (ANY_EXTEND:<VWIDE>
1775 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1776 "TARGET_SIMD"
1777 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1778 [(set_attr "type" "neon_mla_<Vetype>_long")]
1779 )
1780
1781 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1782 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1783 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1784 (match_operand:VQW 1 "register_operand" "w")
1785 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1786 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1787 (match_operand:VQW 2 "register_operand" "w")
1788 (match_dup 3)))))]
1789 "TARGET_SIMD"
1790 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1791 [(set_attr "type" "neon_mul_<Vetype>_long")]
1792 )
1793
1794 (define_expand "vec_widen_<su>mult_lo_<mode>"
1795 [(match_operand:<VWIDE> 0 "register_operand")
1796 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1797 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1798 "TARGET_SIMD"
1799 {
1800 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1801 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1802 operands[1],
1803 operands[2], p));
1804 DONE;
1805 }
1806 )
1807
1808 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1809 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1810 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1811 (match_operand:VQW 1 "register_operand" "w")
1812 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1813 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1814 (match_operand:VQW 2 "register_operand" "w")
1815 (match_dup 3)))))]
1816 "TARGET_SIMD"
1817 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1818 [(set_attr "type" "neon_mul_<Vetype>_long")]
1819 )
1820
1821 (define_expand "vec_widen_<su>mult_hi_<mode>"
1822 [(match_operand:<VWIDE> 0 "register_operand")
1823 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1824 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1825 "TARGET_SIMD"
1826 {
1827 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1828 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1829 operands[1],
1830 operands[2], p));
1831 DONE;
1832
1833 }
1834 )
1835
1836 ;; FP vector operations.
1837 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1838 ;; double-precision (64-bit) floating-point data types and arithmetic as
1839 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1840 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1841 ;;
1842 ;; Floating-point operations can raise an exception. Vectorizing such
1843 ;; operations are safe because of reasons explained below.
1844 ;;
1845 ;; ARMv8 permits an extension to enable trapped floating-point
1846 ;; exception handling, however this is an optional feature. In the
1847 ;; event of a floating-point exception being raised by vectorised
1848 ;; code then:
1849 ;; 1. If trapped floating-point exceptions are available, then a trap
1850 ;; will be taken when any lane raises an enabled exception. A trap
1851 ;; handler may determine which lane raised the exception.
1852 ;; 2. Alternatively a sticky exception flag is set in the
1853 ;; floating-point status register (FPSR). Software may explicitly
1854 ;; test the exception flags, in which case the tests will either
1855 ;; prevent vectorisation, allowing precise identification of the
1856 ;; failing operation, or if tested outside of vectorisable regions
1857 ;; then the specific operation and lane are not of interest.
1858
1859 ;; FP arithmetic operations.
1860
1861 (define_insn "add<mode>3"
1862 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1863 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1864 (match_operand:VHSDF 2 "register_operand" "w")))]
1865 "TARGET_SIMD"
1866 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1867 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1868 )
1869
1870 (define_insn "sub<mode>3"
1871 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1872 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1873 (match_operand:VHSDF 2 "register_operand" "w")))]
1874 "TARGET_SIMD"
1875 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1876 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1877 )
1878
1879 (define_insn "mul<mode>3"
1880 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1881 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1882 (match_operand:VHSDF 2 "register_operand" "w")))]
1883 "TARGET_SIMD"
1884 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1885 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1886 )
1887
1888 (define_expand "div<mode>3"
1889 [(set (match_operand:VHSDF 0 "register_operand")
1890 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1891 (match_operand:VHSDF 2 "register_operand")))]
1892 "TARGET_SIMD"
1893 {
1894 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1895 DONE;
1896
1897 operands[1] = force_reg (<MODE>mode, operands[1]);
1898 })
1899
1900 (define_insn "*div<mode>3"
1901 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1902 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1903 (match_operand:VHSDF 2 "register_operand" "w")))]
1904 "TARGET_SIMD"
1905 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1906 [(set_attr "type" "neon_fp_div_<stype><q>")]
1907 )
1908
1909 (define_insn "neg<mode>2"
1910 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1911 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1912 "TARGET_SIMD"
1913 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1914 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1915 )
1916
1917 (define_insn "abs<mode>2"
1918 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1919 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1920 "TARGET_SIMD"
1921 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1922 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1923 )
1924
1925 (define_insn "fma<mode>4"
1926 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1927 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1928 (match_operand:VHSDF 2 "register_operand" "w")
1929 (match_operand:VHSDF 3 "register_operand" "0")))]
1930 "TARGET_SIMD"
1931 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1932 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1933 )
1934
1935 (define_insn "*aarch64_fma4_elt<mode>"
1936 [(set (match_operand:VDQF 0 "register_operand" "=w")
1937 (fma:VDQF
1938 (vec_duplicate:VDQF
1939 (vec_select:<VEL>
1940 (match_operand:VDQF 1 "register_operand" "<h_con>")
1941 (parallel [(match_operand:SI 2 "immediate_operand")])))
1942 (match_operand:VDQF 3 "register_operand" "w")
1943 (match_operand:VDQF 4 "register_operand" "0")))]
1944 "TARGET_SIMD"
1945 {
1946 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1947 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1948 }
1949 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1950 )
1951
1952 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1953 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1954 (fma:VDQSF
1955 (vec_duplicate:VDQSF
1956 (vec_select:<VEL>
1957 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1958 (parallel [(match_operand:SI 2 "immediate_operand")])))
1959 (match_operand:VDQSF 3 "register_operand" "w")
1960 (match_operand:VDQSF 4 "register_operand" "0")))]
1961 "TARGET_SIMD"
1962 {
1963 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1964 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1965 }
1966 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1967 )
1968
1969 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1970 [(set (match_operand:VMUL 0 "register_operand" "=w")
1971 (fma:VMUL
1972 (vec_duplicate:VMUL
1973 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1974 (match_operand:VMUL 2 "register_operand" "w")
1975 (match_operand:VMUL 3 "register_operand" "0")))]
1976 "TARGET_SIMD"
1977 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1978 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1979 )
1980
1981 (define_insn "*aarch64_fma4_elt_to_64v2df"
1982 [(set (match_operand:DF 0 "register_operand" "=w")
1983 (fma:DF
1984 (vec_select:DF
1985 (match_operand:V2DF 1 "register_operand" "w")
1986 (parallel [(match_operand:SI 2 "immediate_operand")]))
1987 (match_operand:DF 3 "register_operand" "w")
1988 (match_operand:DF 4 "register_operand" "0")))]
1989 "TARGET_SIMD"
1990 {
1991 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1992 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1993 }
1994 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1995 )
1996
1997 (define_insn "fnma<mode>4"
1998 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1999 (fma:VHSDF
2000 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2001 (match_operand:VHSDF 2 "register_operand" "w")
2002 (match_operand:VHSDF 3 "register_operand" "0")))]
2003 "TARGET_SIMD"
2004 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2005 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2006 )
2007
2008 (define_insn "*aarch64_fnma4_elt<mode>"
2009 [(set (match_operand:VDQF 0 "register_operand" "=w")
2010 (fma:VDQF
2011 (neg:VDQF
2012 (match_operand:VDQF 3 "register_operand" "w"))
2013 (vec_duplicate:VDQF
2014 (vec_select:<VEL>
2015 (match_operand:VDQF 1 "register_operand" "<h_con>")
2016 (parallel [(match_operand:SI 2 "immediate_operand")])))
2017 (match_operand:VDQF 4 "register_operand" "0")))]
2018 "TARGET_SIMD"
2019 {
2020 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2021 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2022 }
2023 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2024 )
2025
2026 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2027 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2028 (fma:VDQSF
2029 (neg:VDQSF
2030 (match_operand:VDQSF 3 "register_operand" "w"))
2031 (vec_duplicate:VDQSF
2032 (vec_select:<VEL>
2033 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2034 (parallel [(match_operand:SI 2 "immediate_operand")])))
2035 (match_operand:VDQSF 4 "register_operand" "0")))]
2036 "TARGET_SIMD"
2037 {
2038 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2039 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2040 }
2041 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2042 )
2043
2044 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2045 [(set (match_operand:VMUL 0 "register_operand" "=w")
2046 (fma:VMUL
2047 (neg:VMUL
2048 (match_operand:VMUL 2 "register_operand" "w"))
2049 (vec_duplicate:VMUL
2050 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2051 (match_operand:VMUL 3 "register_operand" "0")))]
2052 "TARGET_SIMD"
2053 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2054 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2055 )
2056
2057 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2058 [(set (match_operand:DF 0 "register_operand" "=w")
2059 (fma:DF
2060 (vec_select:DF
2061 (match_operand:V2DF 1 "register_operand" "w")
2062 (parallel [(match_operand:SI 2 "immediate_operand")]))
2063 (neg:DF
2064 (match_operand:DF 3 "register_operand" "w"))
2065 (match_operand:DF 4 "register_operand" "0")))]
2066 "TARGET_SIMD"
2067 {
2068 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2069 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2070 }
2071 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2072 )
2073
2074 ;; Vector versions of the floating-point frint patterns.
2075 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2076 (define_insn "<frint_pattern><mode>2"
2077 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2078 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2079 FRINT))]
2080 "TARGET_SIMD"
2081 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2082 [(set_attr "type" "neon_fp_round_<stype><q>")]
2083 )
2084
2085 ;; Vector versions of the fcvt standard patterns.
2086 ;; Expands to lbtrunc, lround, lceil, lfloor
2087 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2088 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2089 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2090 [(match_operand:VHSDF 1 "register_operand" "w")]
2091 FCVT)))]
2092 "TARGET_SIMD"
2093 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2094 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2095 )
2096
2097 ;; HF Scalar variants of related SIMD instructions.
2098 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2099 [(set (match_operand:HI 0 "register_operand" "=w")
2100 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2101 FCVT)))]
2102 "TARGET_SIMD_F16INST"
2103 "fcvt<frint_suffix><su>\t%h0, %h1"
2104 [(set_attr "type" "neon_fp_to_int_s")]
2105 )
2106
2107 (define_insn "<optab>_trunchfhi2"
2108 [(set (match_operand:HI 0 "register_operand" "=w")
2109 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2110 "TARGET_SIMD_F16INST"
2111 "fcvtz<su>\t%h0, %h1"
2112 [(set_attr "type" "neon_fp_to_int_s")]
2113 )
2114
2115 (define_insn "<optab>hihf2"
2116 [(set (match_operand:HF 0 "register_operand" "=w")
2117 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2118 "TARGET_SIMD_F16INST"
2119 "<su_optab>cvtf\t%h0, %h1"
2120 [(set_attr "type" "neon_int_to_fp_s")]
2121 )
2122
2123 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2124 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2125 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2126 [(mult:VDQF
2127 (match_operand:VDQF 1 "register_operand" "w")
2128 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2129 UNSPEC_FRINTZ)))]
2130 "TARGET_SIMD
2131 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2132 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2133 {
2134 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2135 char buf[64];
2136 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2137 output_asm_insn (buf, operands);
2138 return "";
2139 }
2140 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2141 )
2142
2143 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2144 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2145 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2146 [(match_operand:VHSDF 1 "register_operand")]
2147 UNSPEC_FRINTZ)))]
2148 "TARGET_SIMD"
2149 {})
2150
2151 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2152 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2153 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2154 [(match_operand:VHSDF 1 "register_operand")]
2155 UNSPEC_FRINTZ)))]
2156 "TARGET_SIMD"
2157 {})
2158
2159 (define_expand "ftrunc<VHSDF:mode>2"
2160 [(set (match_operand:VHSDF 0 "register_operand")
2161 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2162 UNSPEC_FRINTZ))]
2163 "TARGET_SIMD"
2164 {})
2165
2166 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2167 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2168 (FLOATUORS:VHSDF
2169 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2170 "TARGET_SIMD"
2171 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2172 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2173 )
2174
2175 ;; Conversions between vectors of floats and doubles.
2176 ;; Contains a mix of patterns to match standard pattern names
2177 ;; and those for intrinsics.
2178
2179 ;; Float widening operations.
2180
2181 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2182 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2183 (float_extend:<VWIDE> (vec_select:<VHALF>
2184 (match_operand:VQ_HSF 1 "register_operand" "w")
2185 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2186 )))]
2187 "TARGET_SIMD"
2188 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2189 [(set_attr "type" "neon_fp_cvt_widen_s")]
2190 )
2191
2192 ;; Convert between fixed-point and floating-point (vector modes)
2193
2194 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2195 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2196 (unspec:<VHSDF:FCVT_TARGET>
2197 [(match_operand:VHSDF 1 "register_operand" "w")
2198 (match_operand:SI 2 "immediate_operand" "i")]
2199 FCVT_F2FIXED))]
2200 "TARGET_SIMD"
2201 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2202 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2203 )
2204
2205 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2206 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2207 (unspec:<VDQ_HSDI:FCVT_TARGET>
2208 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2209 (match_operand:SI 2 "immediate_operand" "i")]
2210 FCVT_FIXED2F))]
2211 "TARGET_SIMD"
2212 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2213 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2214 )
2215
2216 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2217 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2218 ;; the meaning of HI and LO changes depending on the target endianness.
2219 ;; While elsewhere we map the higher numbered elements of a vector to
2220 ;; the lower architectural lanes of the vector, for these patterns we want
2221 ;; to always treat "hi" as referring to the higher architectural lanes.
2222 ;; Consequently, while the patterns below look inconsistent with our
2223 ;; other big-endian patterns their behavior is as required.
2224
2225 (define_expand "vec_unpacks_lo_<mode>"
2226 [(match_operand:<VWIDE> 0 "register_operand")
2227 (match_operand:VQ_HSF 1 "register_operand")]
2228 "TARGET_SIMD"
2229 {
2230 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2231 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2232 operands[1], p));
2233 DONE;
2234 }
2235 )
2236
2237 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2238 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2239 (float_extend:<VWIDE> (vec_select:<VHALF>
2240 (match_operand:VQ_HSF 1 "register_operand" "w")
2241 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2242 )))]
2243 "TARGET_SIMD"
2244 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2245 [(set_attr "type" "neon_fp_cvt_widen_s")]
2246 )
2247
2248 (define_expand "vec_unpacks_hi_<mode>"
2249 [(match_operand:<VWIDE> 0 "register_operand")
2250 (match_operand:VQ_HSF 1 "register_operand")]
2251 "TARGET_SIMD"
2252 {
2253 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2254 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2255 operands[1], p));
2256 DONE;
2257 }
2258 )
2259 (define_insn "aarch64_float_extend_lo_<Vwide>"
2260 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2261 (float_extend:<VWIDE>
2262 (match_operand:VDF 1 "register_operand" "w")))]
2263 "TARGET_SIMD"
2264 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2265 [(set_attr "type" "neon_fp_cvt_widen_s")]
2266 )
2267
2268 ;; Float narrowing operations.
2269
2270 (define_insn "aarch64_float_truncate_lo_<mode>"
2271 [(set (match_operand:VDF 0 "register_operand" "=w")
2272 (float_truncate:VDF
2273 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2274 "TARGET_SIMD"
2275 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2276 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2277 )
2278
2279 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2280 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2281 (vec_concat:<VDBL>
2282 (match_operand:VDF 1 "register_operand" "0")
2283 (float_truncate:VDF
2284 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2285 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2286 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2287 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2288 )
2289
2290 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2291 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2292 (vec_concat:<VDBL>
2293 (float_truncate:VDF
2294 (match_operand:<VWIDE> 2 "register_operand" "w"))
2295 (match_operand:VDF 1 "register_operand" "0")))]
2296 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2297 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2298 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2299 )
2300
2301 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2302 [(match_operand:<VDBL> 0 "register_operand")
2303 (match_operand:VDF 1 "register_operand")
2304 (match_operand:<VWIDE> 2 "register_operand")]
2305 "TARGET_SIMD"
2306 {
2307 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2308 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2309 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2310 emit_insn (gen (operands[0], operands[1], operands[2]));
2311 DONE;
2312 }
2313 )
2314
2315 (define_expand "vec_pack_trunc_v2df"
2316 [(set (match_operand:V4SF 0 "register_operand")
2317 (vec_concat:V4SF
2318 (float_truncate:V2SF
2319 (match_operand:V2DF 1 "register_operand"))
2320 (float_truncate:V2SF
2321 (match_operand:V2DF 2 "register_operand"))
2322 ))]
2323 "TARGET_SIMD"
2324 {
2325 rtx tmp = gen_reg_rtx (V2SFmode);
2326 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2327 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2328
2329 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2330 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2331 tmp, operands[hi]));
2332 DONE;
2333 }
2334 )
2335
2336 (define_expand "vec_pack_trunc_df"
2337 [(set (match_operand:V2SF 0 "register_operand")
2338 (vec_concat:V2SF
2339 (float_truncate:SF
2340 (match_operand:DF 1 "register_operand"))
2341 (float_truncate:SF
2342 (match_operand:DF 2 "register_operand"))
2343 ))]
2344 "TARGET_SIMD"
2345 {
2346 rtx tmp = gen_reg_rtx (V2SFmode);
2347 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2348 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2349
2350 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2351 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2352 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2353 DONE;
2354 }
2355 )
2356
2357 ;; FP Max/Min
2358 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2359 ;; expression like:
2360 ;; a = (b < c) ? b : c;
2361 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2362 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2363 ;; -ffast-math.
2364 ;;
2365 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2366 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2367 ;; operand will be returned when both operands are zero (i.e. they may not
2368 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2369 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2370 ;; NaNs.
2371
2372 (define_insn "<su><maxmin><mode>3"
2373 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2375 (match_operand:VHSDF 2 "register_operand" "w")))]
2376 "TARGET_SIMD"
2377 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2378 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2379 )
2380
2381 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2382 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2383 ;; which implement the IEEE fmax ()/fmin () functions.
2384 (define_insn "<maxmin_uns><mode>3"
2385 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2387 (match_operand:VHSDF 2 "register_operand" "w")]
2388 FMAXMIN_UNS))]
2389 "TARGET_SIMD"
2390 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2391 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2392 )
2393
2394 ;; 'across lanes' add.
2395
2396 (define_expand "reduc_plus_scal_<mode>"
2397 [(match_operand:<VEL> 0 "register_operand")
2398 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2399 UNSPEC_ADDV)]
2400 "TARGET_SIMD"
2401 {
2402 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2403 rtx scratch = gen_reg_rtx (<MODE>mode);
2404 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2405 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2406 DONE;
2407 }
2408 )
2409
2410 (define_insn "aarch64_faddp<mode>"
2411 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2412 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2413 (match_operand:VHSDF 2 "register_operand" "w")]
2414 UNSPEC_FADDV))]
2415 "TARGET_SIMD"
2416 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2417 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2418 )
2419
2420 (define_insn "aarch64_reduc_plus_internal<mode>"
2421 [(set (match_operand:VDQV 0 "register_operand" "=w")
2422 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2423 UNSPEC_ADDV))]
2424 "TARGET_SIMD"
2425 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2426 [(set_attr "type" "neon_reduc_add<q>")]
2427 )
2428
2429 (define_insn "aarch64_reduc_plus_internalv2si"
2430 [(set (match_operand:V2SI 0 "register_operand" "=w")
2431 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2432 UNSPEC_ADDV))]
2433 "TARGET_SIMD"
2434 "addp\\t%0.2s, %1.2s, %1.2s"
2435 [(set_attr "type" "neon_reduc_add")]
2436 )
2437
2438 (define_insn "reduc_plus_scal_<mode>"
2439 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2440 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2441 UNSPEC_FADDV))]
2442 "TARGET_SIMD"
2443 "faddp\\t%<Vetype>0, %1.<Vtype>"
2444 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2445 )
2446
2447 (define_expand "reduc_plus_scal_v4sf"
2448 [(set (match_operand:SF 0 "register_operand")
2449 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2450 UNSPEC_FADDV))]
2451 "TARGET_SIMD"
2452 {
2453 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2454 rtx scratch = gen_reg_rtx (V4SFmode);
2455 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2456 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2457 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2458 DONE;
2459 })
2460
2461 (define_insn "clrsb<mode>2"
2462 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2463 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2464 "TARGET_SIMD"
2465 "cls\\t%0.<Vtype>, %1.<Vtype>"
2466 [(set_attr "type" "neon_cls<q>")]
2467 )
2468
2469 (define_insn "clz<mode>2"
2470 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2471 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2472 "TARGET_SIMD"
2473 "clz\\t%0.<Vtype>, %1.<Vtype>"
2474 [(set_attr "type" "neon_cls<q>")]
2475 )
2476
2477 (define_insn "popcount<mode>2"
2478 [(set (match_operand:VB 0 "register_operand" "=w")
2479 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2480 "TARGET_SIMD"
2481 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2482 [(set_attr "type" "neon_cnt<q>")]
2483 )
2484
2485 ;; 'across lanes' max and min ops.
2486
2487 ;; Template for outputting a scalar, so we can create __builtins which can be
2488 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2489 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2490 [(match_operand:<VEL> 0 "register_operand")
2491 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2492 FMAXMINV)]
2493 "TARGET_SIMD"
2494 {
2495 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2496 rtx scratch = gen_reg_rtx (<MODE>mode);
2497 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2498 operands[1]));
2499 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2500 DONE;
2501 }
2502 )
2503
2504 ;; Likewise for integer cases, signed and unsigned.
2505 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2506 [(match_operand:<VEL> 0 "register_operand")
2507 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2508 MAXMINV)]
2509 "TARGET_SIMD"
2510 {
2511 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2512 rtx scratch = gen_reg_rtx (<MODE>mode);
2513 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2514 operands[1]));
2515 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2516 DONE;
2517 }
2518 )
2519
2520 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2521 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2522 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2523 MAXMINV))]
2524 "TARGET_SIMD"
2525 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2526 [(set_attr "type" "neon_reduc_minmax<q>")]
2527 )
2528
2529 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2530 [(set (match_operand:V2SI 0 "register_operand" "=w")
2531 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2532 MAXMINV))]
2533 "TARGET_SIMD"
2534 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2535 [(set_attr "type" "neon_reduc_minmax")]
2536 )
2537
2538 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2539 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2540 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2541 FMAXMINV))]
2542 "TARGET_SIMD"
2543 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2544 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2545 )
2546
2547 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2548 ;; allocation.
2549 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2550 ;; to select.
2551 ;;
2552 ;; Thus our BSL is of the form:
2553 ;; op0 = bsl (mask, op2, op3)
2554 ;; We can use any of:
2555 ;;
2556 ;; if (op0 = mask)
2557 ;; bsl mask, op1, op2
2558 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2559 ;; bit op0, op2, mask
2560 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2561 ;; bif op0, op1, mask
2562 ;;
2563 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2564 ;; Some forms of straight-line code may generate the equivalent form
2565 ;; in *aarch64_simd_bsl<mode>_alt.
2566
2567 (define_insn "aarch64_simd_bsl<mode>_internal"
2568 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2569 (xor:VDQ_I
2570 (and:VDQ_I
2571 (xor:VDQ_I
2572 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2573 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2574 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2575 (match_dup:<V_INT_EQUIV> 3)
2576 ))]
2577 "TARGET_SIMD"
2578 "@
2579 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2580 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2581 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2582 [(set_attr "type" "neon_bsl<q>")]
2583 )
2584
2585 ;; We need this form in addition to the above pattern to match the case
2586 ;; when combine tries merging three insns such that the second operand of
2587 ;; the outer XOR matches the second operand of the inner XOR rather than
2588 ;; the first. The two are equivalent but since recog doesn't try all
2589 ;; permutations of commutative operations, we have to have a separate pattern.
2590
2591 (define_insn "*aarch64_simd_bsl<mode>_alt"
2592 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2593 (xor:VDQ_I
2594 (and:VDQ_I
2595 (xor:VDQ_I
2596 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2597 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2598 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2599 (match_dup:<V_INT_EQUIV> 2)))]
2600 "TARGET_SIMD"
2601 "@
2602 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2603 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2604 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2605 [(set_attr "type" "neon_bsl<q>")]
2606 )
2607
2608 ;; DImode is special, we want to avoid computing operations which are
2609 ;; more naturally computed in general purpose registers in the vector
2610 ;; registers. If we do that, we need to move all three operands from general
2611 ;; purpose registers to vector registers, then back again. However, we
2612 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2613 ;; optimizations based on the component operations of a BSL.
2614 ;;
2615 ;; That means we need a splitter back to the individual operations, if they
2616 ;; would be better calculated on the integer side.
2617
2618 (define_insn_and_split "aarch64_simd_bsldi_internal"
2619 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2620 (xor:DI
2621 (and:DI
2622 (xor:DI
2623 (match_operand:DI 3 "register_operand" "w,0,w,r")
2624 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2625 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2626 (match_dup:DI 3)
2627 ))]
2628 "TARGET_SIMD"
2629 "@
2630 bsl\\t%0.8b, %2.8b, %3.8b
2631 bit\\t%0.8b, %2.8b, %1.8b
2632 bif\\t%0.8b, %3.8b, %1.8b
2633 #"
2634 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2635 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2636 {
2637 /* Split back to individual operations. If we're before reload, and
2638 able to create a temporary register, do so. If we're after reload,
2639 we've got an early-clobber destination register, so use that.
2640 Otherwise, we can't create pseudos and we can't yet guarantee that
2641 operands[0] is safe to write, so FAIL to split. */
2642
2643 rtx scratch;
2644 if (reload_completed)
2645 scratch = operands[0];
2646 else if (can_create_pseudo_p ())
2647 scratch = gen_reg_rtx (DImode);
2648 else
2649 FAIL;
2650
2651 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2652 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2653 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2654 DONE;
2655 }
2656 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2657 (set_attr "length" "4,4,4,12")]
2658 )
2659
2660 (define_insn_and_split "aarch64_simd_bsldi_alt"
2661 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2662 (xor:DI
2663 (and:DI
2664 (xor:DI
2665 (match_operand:DI 3 "register_operand" "w,w,0,r")
2666 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2667 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2668 (match_dup:DI 2)
2669 ))]
2670 "TARGET_SIMD"
2671 "@
2672 bsl\\t%0.8b, %3.8b, %2.8b
2673 bit\\t%0.8b, %3.8b, %1.8b
2674 bif\\t%0.8b, %2.8b, %1.8b
2675 #"
2676 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2677 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2678 {
2679 /* Split back to individual operations. If we're before reload, and
2680 able to create a temporary register, do so. If we're after reload,
2681 we've got an early-clobber destination register, so use that.
2682 Otherwise, we can't create pseudos and we can't yet guarantee that
2683 operands[0] is safe to write, so FAIL to split. */
2684
2685 rtx scratch;
2686 if (reload_completed)
2687 scratch = operands[0];
2688 else if (can_create_pseudo_p ())
2689 scratch = gen_reg_rtx (DImode);
2690 else
2691 FAIL;
2692
2693 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2694 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2695 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2696 DONE;
2697 }
2698 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2699 (set_attr "length" "4,4,4,12")]
2700 )
2701
2702 (define_expand "aarch64_simd_bsl<mode>"
2703 [(match_operand:VALLDIF 0 "register_operand")
2704 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2705 (match_operand:VALLDIF 2 "register_operand")
2706 (match_operand:VALLDIF 3 "register_operand")]
2707 "TARGET_SIMD"
2708 {
2709 /* We can't alias operands together if they have different modes. */
2710 rtx tmp = operands[0];
2711 if (FLOAT_MODE_P (<MODE>mode))
2712 {
2713 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2714 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2715 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2716 }
2717 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2718 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2719 operands[1],
2720 operands[2],
2721 operands[3]));
2722 if (tmp != operands[0])
2723 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2724
2725 DONE;
2726 })
2727
2728 (define_expand "vcond_mask_<mode><v_int_equiv>"
2729 [(match_operand:VALLDI 0 "register_operand")
2730 (match_operand:VALLDI 1 "nonmemory_operand")
2731 (match_operand:VALLDI 2 "nonmemory_operand")
2732 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2733 "TARGET_SIMD"
2734 {
2735 /* If we have (a = (P) ? -1 : 0);
2736 Then we can simply move the generated mask (result must be int). */
2737 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2738 && operands[2] == CONST0_RTX (<MODE>mode))
2739 emit_move_insn (operands[0], operands[3]);
2740 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2741 else if (operands[1] == CONST0_RTX (<MODE>mode)
2742 && operands[2] == CONSTM1_RTX (<MODE>mode))
2743 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2744 else
2745 {
2746 if (!REG_P (operands[1]))
2747 operands[1] = force_reg (<MODE>mode, operands[1]);
2748 if (!REG_P (operands[2]))
2749 operands[2] = force_reg (<MODE>mode, operands[2]);
2750 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2751 operands[1], operands[2]));
2752 }
2753
2754 DONE;
2755 })
2756
2757 ;; Patterns comparing two vectors to produce a mask.
2758
2759 (define_expand "vec_cmp<mode><mode>"
2760 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2761 (match_operator 1 "comparison_operator"
2762 [(match_operand:VSDQ_I_DI 2 "register_operand")
2763 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2764 "TARGET_SIMD"
2765 {
2766 rtx mask = operands[0];
2767 enum rtx_code code = GET_CODE (operands[1]);
2768
2769 switch (code)
2770 {
2771 case NE:
2772 case LE:
2773 case LT:
2774 case GE:
2775 case GT:
2776 case EQ:
2777 if (operands[3] == CONST0_RTX (<MODE>mode))
2778 break;
2779
2780 /* Fall through. */
2781 default:
2782 if (!REG_P (operands[3]))
2783 operands[3] = force_reg (<MODE>mode, operands[3]);
2784
2785 break;
2786 }
2787
2788 switch (code)
2789 {
2790 case LT:
2791 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2792 break;
2793
2794 case GE:
2795 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2796 break;
2797
2798 case LE:
2799 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2800 break;
2801
2802 case GT:
2803 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2804 break;
2805
2806 case LTU:
2807 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2808 break;
2809
2810 case GEU:
2811 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2812 break;
2813
2814 case LEU:
2815 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2816 break;
2817
2818 case GTU:
2819 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2820 break;
2821
2822 case NE:
2823 /* Handle NE as !EQ. */
2824 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2825 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2826 break;
2827
2828 case EQ:
2829 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2830 break;
2831
2832 default:
2833 gcc_unreachable ();
2834 }
2835
2836 DONE;
2837 })
2838
2839 (define_expand "vec_cmp<mode><v_int_equiv>"
2840 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2841 (match_operator 1 "comparison_operator"
2842 [(match_operand:VDQF 2 "register_operand")
2843 (match_operand:VDQF 3 "nonmemory_operand")]))]
2844 "TARGET_SIMD"
2845 {
2846 int use_zero_form = 0;
2847 enum rtx_code code = GET_CODE (operands[1]);
2848 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2849
2850 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2851
2852 switch (code)
2853 {
2854 case LE:
2855 case LT:
2856 case GE:
2857 case GT:
2858 case EQ:
2859 if (operands[3] == CONST0_RTX (<MODE>mode))
2860 {
2861 use_zero_form = 1;
2862 break;
2863 }
2864 /* Fall through. */
2865 default:
2866 if (!REG_P (operands[3]))
2867 operands[3] = force_reg (<MODE>mode, operands[3]);
2868
2869 break;
2870 }
2871
2872 switch (code)
2873 {
2874 case LT:
2875 if (use_zero_form)
2876 {
2877 comparison = gen_aarch64_cmlt<mode>;
2878 break;
2879 }
2880 /* Fall through. */
2881 case UNLT:
2882 std::swap (operands[2], operands[3]);
2883 /* Fall through. */
2884 case UNGT:
2885 case GT:
2886 comparison = gen_aarch64_cmgt<mode>;
2887 break;
2888 case LE:
2889 if (use_zero_form)
2890 {
2891 comparison = gen_aarch64_cmle<mode>;
2892 break;
2893 }
2894 /* Fall through. */
2895 case UNLE:
2896 std::swap (operands[2], operands[3]);
2897 /* Fall through. */
2898 case UNGE:
2899 case GE:
2900 comparison = gen_aarch64_cmge<mode>;
2901 break;
2902 case NE:
2903 case EQ:
2904 comparison = gen_aarch64_cmeq<mode>;
2905 break;
2906 case UNEQ:
2907 case ORDERED:
2908 case UNORDERED:
2909 case LTGT:
2910 break;
2911 default:
2912 gcc_unreachable ();
2913 }
2914
2915 switch (code)
2916 {
2917 case UNGE:
2918 case UNGT:
2919 case UNLE:
2920 case UNLT:
2921 {
2922 /* All of the above must not raise any FP exceptions. Thus we first
2923 check each operand for NaNs and force any elements containing NaN to
2924 zero before using them in the compare.
2925 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2926 (cm<cc> (isnan (a) ? 0.0 : a,
2927 isnan (b) ? 0.0 : b))
2928 We use the following transformations for doing the comparisions:
2929 a UNGE b -> a GE b
2930 a UNGT b -> a GT b
2931 a UNLE b -> b GE a
2932 a UNLT b -> b GT a. */
2933
2934 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2935 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2936 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2937 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2938 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2939 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2940 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2941 lowpart_subreg (<V_INT_EQUIV>mode,
2942 operands[2],
2943 <MODE>mode)));
2944 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2945 lowpart_subreg (<V_INT_EQUIV>mode,
2946 operands[3],
2947 <MODE>mode)));
2948 gcc_assert (comparison != NULL);
2949 emit_insn (comparison (operands[0],
2950 lowpart_subreg (<MODE>mode,
2951 tmp0, <V_INT_EQUIV>mode),
2952 lowpart_subreg (<MODE>mode,
2953 tmp1, <V_INT_EQUIV>mode)));
2954 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2955 }
2956 break;
2957
2958 case LT:
2959 case LE:
2960 case GT:
2961 case GE:
2962 case EQ:
2963 case NE:
2964 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2965 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2966 a GE b -> a GE b
2967 a GT b -> a GT b
2968 a LE b -> b GE a
2969 a LT b -> b GT a
2970 a EQ b -> a EQ b
2971 a NE b -> ~(a EQ b) */
2972 gcc_assert (comparison != NULL);
2973 emit_insn (comparison (operands[0], operands[2], operands[3]));
2974 if (code == NE)
2975 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2976 break;
2977
2978 case LTGT:
2979 /* LTGT is not guranteed to not generate a FP exception. So let's
2980 go the faster way : ((a > b) || (b > a)). */
2981 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2982 operands[2], operands[3]));
2983 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2984 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2985 break;
2986
2987 case ORDERED:
2988 case UNORDERED:
2989 case UNEQ:
2990 /* cmeq (a, a) & cmeq (b, b). */
2991 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2992 operands[2], operands[2]));
2993 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2994 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2995
2996 if (code == UNORDERED)
2997 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2998 else if (code == UNEQ)
2999 {
3000 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3001 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3002 }
3003 break;
3004
3005 default:
3006 gcc_unreachable ();
3007 }
3008
3009 DONE;
3010 })
3011
3012 (define_expand "vec_cmpu<mode><mode>"
3013 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3014 (match_operator 1 "comparison_operator"
3015 [(match_operand:VSDQ_I_DI 2 "register_operand")
3016 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3017 "TARGET_SIMD"
3018 {
3019 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3020 operands[2], operands[3]));
3021 DONE;
3022 })
3023
3024 (define_expand "vcond<mode><mode>"
3025 [(set (match_operand:VALLDI 0 "register_operand")
3026 (if_then_else:VALLDI
3027 (match_operator 3 "comparison_operator"
3028 [(match_operand:VALLDI 4 "register_operand")
3029 (match_operand:VALLDI 5 "nonmemory_operand")])
3030 (match_operand:VALLDI 1 "nonmemory_operand")
3031 (match_operand:VALLDI 2 "nonmemory_operand")))]
3032 "TARGET_SIMD"
3033 {
3034 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3035 enum rtx_code code = GET_CODE (operands[3]);
3036
3037 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3038 it as well as switch operands 1/2 in order to avoid the additional
3039 NOT instruction. */
3040 if (code == NE)
3041 {
3042 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3043 operands[4], operands[5]);
3044 std::swap (operands[1], operands[2]);
3045 }
3046 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3047 operands[4], operands[5]));
3048 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3049 operands[2], mask));
3050
3051 DONE;
3052 })
3053
3054 (define_expand "vcond<v_cmp_mixed><mode>"
3055 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3056 (if_then_else:<V_cmp_mixed>
3057 (match_operator 3 "comparison_operator"
3058 [(match_operand:VDQF_COND 4 "register_operand")
3059 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3060 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3061 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3062 "TARGET_SIMD"
3063 {
3064 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3065 enum rtx_code code = GET_CODE (operands[3]);
3066
3067 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3068 it as well as switch operands 1/2 in order to avoid the additional
3069 NOT instruction. */
3070 if (code == NE)
3071 {
3072 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3073 operands[4], operands[5]);
3074 std::swap (operands[1], operands[2]);
3075 }
3076 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3077 operands[4], operands[5]));
3078 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3079 operands[0], operands[1],
3080 operands[2], mask));
3081
3082 DONE;
3083 })
3084
3085 (define_expand "vcondu<mode><mode>"
3086 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3087 (if_then_else:VSDQ_I_DI
3088 (match_operator 3 "comparison_operator"
3089 [(match_operand:VSDQ_I_DI 4 "register_operand")
3090 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3091 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3092 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3093 "TARGET_SIMD"
3094 {
3095 rtx mask = gen_reg_rtx (<MODE>mode);
3096 enum rtx_code code = GET_CODE (operands[3]);
3097
3098 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3099 it as well as switch operands 1/2 in order to avoid the additional
3100 NOT instruction. */
3101 if (code == NE)
3102 {
3103 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3104 operands[4], operands[5]);
3105 std::swap (operands[1], operands[2]);
3106 }
3107 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3108 operands[4], operands[5]));
3109 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3110 operands[2], mask));
3111 DONE;
3112 })
3113
3114 (define_expand "vcondu<mode><v_cmp_mixed>"
3115 [(set (match_operand:VDQF 0 "register_operand")
3116 (if_then_else:VDQF
3117 (match_operator 3 "comparison_operator"
3118 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3119 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3120 (match_operand:VDQF 1 "nonmemory_operand")
3121 (match_operand:VDQF 2 "nonmemory_operand")))]
3122 "TARGET_SIMD"
3123 {
3124 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3125 enum rtx_code code = GET_CODE (operands[3]);
3126
3127 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3128 it as well as switch operands 1/2 in order to avoid the additional
3129 NOT instruction. */
3130 if (code == NE)
3131 {
3132 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3133 operands[4], operands[5]);
3134 std::swap (operands[1], operands[2]);
3135 }
3136 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3137 mask, operands[3],
3138 operands[4], operands[5]));
3139 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3140 operands[2], mask));
3141 DONE;
3142 })
3143
3144 ;; Patterns for AArch64 SIMD Intrinsics.
3145
3146 ;; Lane extraction with sign extension to general purpose register.
3147 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3148 [(set (match_operand:GPI 0 "register_operand" "=r")
3149 (sign_extend:GPI
3150 (vec_select:<VDQQH:VEL>
3151 (match_operand:VDQQH 1 "register_operand" "w")
3152 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3153 "TARGET_SIMD"
3154 {
3155 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3156 INTVAL (operands[2]));
3157 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3158 }
3159 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3160 )
3161
3162 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3163 [(set (match_operand:GPI 0 "register_operand" "=r")
3164 (zero_extend:GPI
3165 (vec_select:<VDQQH:VEL>
3166 (match_operand:VDQQH 1 "register_operand" "w")
3167 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3168 "TARGET_SIMD"
3169 {
3170 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3171 INTVAL (operands[2]));
3172 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3173 }
3174 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3175 )
3176
3177 ;; Lane extraction of a value, neither sign nor zero extension
3178 ;; is guaranteed so upper bits should be considered undefined.
3179 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3180 (define_insn "aarch64_get_lane<mode>"
3181 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3182 (vec_select:<VEL>
3183 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3184 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3185 "TARGET_SIMD"
3186 {
3187 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3188 switch (which_alternative)
3189 {
3190 case 0:
3191 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3192 case 1:
3193 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3194 case 2:
3195 return "st1\\t{%1.<Vetype>}[%2], %0";
3196 default:
3197 gcc_unreachable ();
3198 }
3199 }
3200 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3201 )
3202
3203 (define_insn "load_pair_lanes<mode>"
3204 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3205 (vec_concat:<VDBL>
3206 (match_operand:VDC 1 "memory_operand" "Utq")
3207 (match_operand:VDC 2 "memory_operand" "m")))]
3208 "TARGET_SIMD && !STRICT_ALIGNMENT
3209 && rtx_equal_p (XEXP (operands[2], 0),
3210 plus_constant (Pmode,
3211 XEXP (operands[1], 0),
3212 GET_MODE_SIZE (<MODE>mode)))"
3213 "ldr\\t%q0, %1"
3214 [(set_attr "type" "neon_load1_1reg_q")]
3215 )
3216
3217 (define_insn "store_pair_lanes<mode>"
3218 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3219 (vec_concat:<VDBL>
3220 (match_operand:VDC 1 "register_operand" "w, r")
3221 (match_operand:VDC 2 "register_operand" "w, r")))]
3222 "TARGET_SIMD"
3223 "@
3224 stp\\t%d1, %d2, %y0
3225 stp\\t%x1, %x2, %y0"
3226 [(set_attr "type" "neon_stp, store_16")]
3227 )
3228
3229 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3230 ;; dest vector.
3231
3232 (define_insn "@aarch64_combinez<mode>"
3233 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3234 (vec_concat:<VDBL>
3235 (match_operand:VDC 1 "general_operand" "w,?r,m")
3236 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3237 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3238 "@
3239 mov\\t%0.8b, %1.8b
3240 fmov\t%d0, %1
3241 ldr\\t%d0, %1"
3242 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3243 (set_attr "arch" "simd,fp,simd")]
3244 )
3245
3246 (define_insn "@aarch64_combinez_be<mode>"
3247 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3248 (vec_concat:<VDBL>
3249 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3250 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3251 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3252 "@
3253 mov\\t%0.8b, %1.8b
3254 fmov\t%d0, %1
3255 ldr\\t%d0, %1"
3256 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3257 (set_attr "arch" "simd,fp,simd")]
3258 )
3259
3260 (define_expand "aarch64_combine<mode>"
3261 [(match_operand:<VDBL> 0 "register_operand")
3262 (match_operand:VDC 1 "register_operand")
3263 (match_operand:VDC 2 "register_operand")]
3264 "TARGET_SIMD"
3265 {
3266 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3267
3268 DONE;
3269 }
3270 )
3271
3272 (define_expand "@aarch64_simd_combine<mode>"
3273 [(match_operand:<VDBL> 0 "register_operand")
3274 (match_operand:VDC 1 "register_operand")
3275 (match_operand:VDC 2 "register_operand")]
3276 "TARGET_SIMD"
3277 {
3278 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3279 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3280 DONE;
3281 }
3282 [(set_attr "type" "multiple")]
3283 )
3284
3285 ;; <su><addsub>l<q>.
3286
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3290 (match_operand:VQW 1 "register_operand" "w")
3291 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3292 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3293 (match_operand:VQW 2 "register_operand" "w")
3294 (match_dup 3)))))]
3295 "TARGET_SIMD"
3296 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3297 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3298 )
3299
3300 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3301 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3302 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3303 (match_operand:VQW 1 "register_operand" "w")
3304 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3305 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3306 (match_operand:VQW 2 "register_operand" "w")
3307 (match_dup 3)))))]
3308 "TARGET_SIMD"
3309 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3310 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3311 )
3312
3313
3314 (define_expand "aarch64_saddl2<mode>"
3315 [(match_operand:<VWIDE> 0 "register_operand")
3316 (match_operand:VQW 1 "register_operand")
3317 (match_operand:VQW 2 "register_operand")]
3318 "TARGET_SIMD"
3319 {
3320 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3321 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3322 operands[2], p));
3323 DONE;
3324 })
3325
3326 (define_expand "aarch64_uaddl2<mode>"
3327 [(match_operand:<VWIDE> 0 "register_operand")
3328 (match_operand:VQW 1 "register_operand")
3329 (match_operand:VQW 2 "register_operand")]
3330 "TARGET_SIMD"
3331 {
3332 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3333 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3334 operands[2], p));
3335 DONE;
3336 })
3337
3338 (define_expand "aarch64_ssubl2<mode>"
3339 [(match_operand:<VWIDE> 0 "register_operand")
3340 (match_operand:VQW 1 "register_operand")
3341 (match_operand:VQW 2 "register_operand")]
3342 "TARGET_SIMD"
3343 {
3344 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3345 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3346 operands[2], p));
3347 DONE;
3348 })
3349
3350 (define_expand "aarch64_usubl2<mode>"
3351 [(match_operand:<VWIDE> 0 "register_operand")
3352 (match_operand:VQW 1 "register_operand")
3353 (match_operand:VQW 2 "register_operand")]
3354 "TARGET_SIMD"
3355 {
3356 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3357 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3358 operands[2], p));
3359 DONE;
3360 })
3361
3362 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3363 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3364 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3365 (match_operand:VD_BHSI 1 "register_operand" "w"))
3366 (ANY_EXTEND:<VWIDE>
3367 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3368 "TARGET_SIMD"
3369 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3370 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3371 )
3372
3373 ;; <su><addsub>w<q>.
3374
3375 (define_expand "widen_ssum<mode>3"
3376 [(set (match_operand:<VDBLW> 0 "register_operand")
3377 (plus:<VDBLW> (sign_extend:<VDBLW>
3378 (match_operand:VQW 1 "register_operand"))
3379 (match_operand:<VDBLW> 2 "register_operand")))]
3380 "TARGET_SIMD"
3381 {
3382 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3383 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3384
3385 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3386 operands[1], p));
3387 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3388 DONE;
3389 }
3390 )
3391
3392 (define_expand "widen_ssum<mode>3"
3393 [(set (match_operand:<VWIDE> 0 "register_operand")
3394 (plus:<VWIDE> (sign_extend:<VWIDE>
3395 (match_operand:VD_BHSI 1 "register_operand"))
3396 (match_operand:<VWIDE> 2 "register_operand")))]
3397 "TARGET_SIMD"
3398 {
3399 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3400 DONE;
3401 })
3402
3403 (define_expand "widen_usum<mode>3"
3404 [(set (match_operand:<VDBLW> 0 "register_operand")
3405 (plus:<VDBLW> (zero_extend:<VDBLW>
3406 (match_operand:VQW 1 "register_operand"))
3407 (match_operand:<VDBLW> 2 "register_operand")))]
3408 "TARGET_SIMD"
3409 {
3410 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3411 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3412
3413 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3414 operands[1], p));
3415 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3416 DONE;
3417 }
3418 )
3419
3420 (define_expand "widen_usum<mode>3"
3421 [(set (match_operand:<VWIDE> 0 "register_operand")
3422 (plus:<VWIDE> (zero_extend:<VWIDE>
3423 (match_operand:VD_BHSI 1 "register_operand"))
3424 (match_operand:<VWIDE> 2 "register_operand")))]
3425 "TARGET_SIMD"
3426 {
3427 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3428 DONE;
3429 })
3430
3431 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3432 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3433 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3434 (ANY_EXTEND:<VWIDE>
3435 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3436 "TARGET_SIMD"
3437 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3438 [(set_attr "type" "neon_sub_widen")]
3439 )
3440
3441 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3442 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3443 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3444 (ANY_EXTEND:<VWIDE>
3445 (vec_select:<VHALF>
3446 (match_operand:VQW 2 "register_operand" "w")
3447 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3448 "TARGET_SIMD"
3449 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3450 [(set_attr "type" "neon_sub_widen")]
3451 )
3452
3453 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3454 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3455 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3456 (ANY_EXTEND:<VWIDE>
3457 (vec_select:<VHALF>
3458 (match_operand:VQW 2 "register_operand" "w")
3459 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3460 "TARGET_SIMD"
3461 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3462 [(set_attr "type" "neon_sub_widen")]
3463 )
3464
3465 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3466 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3467 (plus:<VWIDE>
3468 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3469 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3470 "TARGET_SIMD"
3471 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3472 [(set_attr "type" "neon_add_widen")]
3473 )
3474
3475 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3476 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3477 (plus:<VWIDE>
3478 (ANY_EXTEND:<VWIDE>
3479 (vec_select:<VHALF>
3480 (match_operand:VQW 2 "register_operand" "w")
3481 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3482 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3483 "TARGET_SIMD"
3484 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3485 [(set_attr "type" "neon_add_widen")]
3486 )
3487
3488 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3489 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3490 (plus:<VWIDE>
3491 (ANY_EXTEND:<VWIDE>
3492 (vec_select:<VHALF>
3493 (match_operand:VQW 2 "register_operand" "w")
3494 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3495 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3496 "TARGET_SIMD"
3497 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3498 [(set_attr "type" "neon_add_widen")]
3499 )
3500
3501 (define_expand "aarch64_saddw2<mode>"
3502 [(match_operand:<VWIDE> 0 "register_operand")
3503 (match_operand:<VWIDE> 1 "register_operand")
3504 (match_operand:VQW 2 "register_operand")]
3505 "TARGET_SIMD"
3506 {
3507 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3509 operands[2], p));
3510 DONE;
3511 })
3512
3513 (define_expand "aarch64_uaddw2<mode>"
3514 [(match_operand:<VWIDE> 0 "register_operand")
3515 (match_operand:<VWIDE> 1 "register_operand")
3516 (match_operand:VQW 2 "register_operand")]
3517 "TARGET_SIMD"
3518 {
3519 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3521 operands[2], p));
3522 DONE;
3523 })
3524
3525
3526 (define_expand "aarch64_ssubw2<mode>"
3527 [(match_operand:<VWIDE> 0 "register_operand")
3528 (match_operand:<VWIDE> 1 "register_operand")
3529 (match_operand:VQW 2 "register_operand")]
3530 "TARGET_SIMD"
3531 {
3532 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3533 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3534 operands[2], p));
3535 DONE;
3536 })
3537
3538 (define_expand "aarch64_usubw2<mode>"
3539 [(match_operand:<VWIDE> 0 "register_operand")
3540 (match_operand:<VWIDE> 1 "register_operand")
3541 (match_operand:VQW 2 "register_operand")]
3542 "TARGET_SIMD"
3543 {
3544 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3545 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3546 operands[2], p));
3547 DONE;
3548 })
3549
3550 ;; <su><r>h<addsub>.
3551
3552 (define_expand "<u>avg<mode>3_floor"
3553 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3554 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3555 (match_operand:VDQ_BHSI 2 "register_operand")]
3556 HADD))]
3557 "TARGET_SIMD"
3558 )
3559
3560 (define_expand "<u>avg<mode>3_ceil"
3561 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3562 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3563 (match_operand:VDQ_BHSI 2 "register_operand")]
3564 RHADD))]
3565 "TARGET_SIMD"
3566 )
3567
3568 (define_insn "aarch64_<sur>h<addsub><mode>"
3569 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3570 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3571 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3572 HADDSUB))]
3573 "TARGET_SIMD"
3574 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3575 [(set_attr "type" "neon_<addsub>_halve<q>")]
3576 )
3577
3578 ;; <r><addsub>hn<q>.
3579
3580 (define_insn "aarch64_<sur><addsub>hn<mode>"
3581 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3582 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3583 (match_operand:VQN 2 "register_operand" "w")]
3584 ADDSUBHN))]
3585 "TARGET_SIMD"
3586 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3587 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3588 )
3589
3590 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3591 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3592 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3593 (match_operand:VQN 2 "register_operand" "w")
3594 (match_operand:VQN 3 "register_operand" "w")]
3595 ADDSUBHN2))]
3596 "TARGET_SIMD"
3597 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3598 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3599 )
3600
3601 ;; pmul.
3602
3603 (define_insn "aarch64_pmul<mode>"
3604 [(set (match_operand:VB 0 "register_operand" "=w")
3605 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3606 (match_operand:VB 2 "register_operand" "w")]
3607 UNSPEC_PMUL))]
3608 "TARGET_SIMD"
3609 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3610 [(set_attr "type" "neon_mul_<Vetype><q>")]
3611 )
3612
3613 ;; fmulx.
3614
3615 (define_insn "aarch64_fmulx<mode>"
3616 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3617 (unspec:VHSDF_HSDF
3618 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3619 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3620 UNSPEC_FMULX))]
3621 "TARGET_SIMD"
3622 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3623 [(set_attr "type" "neon_fp_mul_<stype>")]
3624 )
3625
3626 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3627
3628 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3629 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3630 (unspec:VDQSF
3631 [(match_operand:VDQSF 1 "register_operand" "w")
3632 (vec_duplicate:VDQSF
3633 (vec_select:<VEL>
3634 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3635 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3636 UNSPEC_FMULX))]
3637 "TARGET_SIMD"
3638 {
3639 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3640 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3641 }
3642 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3643 )
3644
3645 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3646
3647 (define_insn "*aarch64_mulx_elt<mode>"
3648 [(set (match_operand:VDQF 0 "register_operand" "=w")
3649 (unspec:VDQF
3650 [(match_operand:VDQF 1 "register_operand" "w")
3651 (vec_duplicate:VDQF
3652 (vec_select:<VEL>
3653 (match_operand:VDQF 2 "register_operand" "w")
3654 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3655 UNSPEC_FMULX))]
3656 "TARGET_SIMD"
3657 {
3658 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3659 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3660 }
3661 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3662 )
3663
3664 ;; vmulxq_lane
3665
3666 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3667 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3668 (unspec:VHSDF
3669 [(match_operand:VHSDF 1 "register_operand" "w")
3670 (vec_duplicate:VHSDF
3671 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3672 UNSPEC_FMULX))]
3673 "TARGET_SIMD"
3674 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3675 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3676 )
3677
3678 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3679 ;; vmulxd_lane_f64 == vmulx_lane_f64
3680 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3681
3682 (define_insn "*aarch64_vgetfmulx<mode>"
3683 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3684 (unspec:<VEL>
3685 [(match_operand:<VEL> 1 "register_operand" "w")
3686 (vec_select:<VEL>
3687 (match_operand:VDQF 2 "register_operand" "w")
3688 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3689 UNSPEC_FMULX))]
3690 "TARGET_SIMD"
3691 {
3692 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3693 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3694 }
3695 [(set_attr "type" "fmul<Vetype>")]
3696 )
3697 ;; <su>q<addsub>
3698
3699 (define_insn "aarch64_<su_optab><optab><mode>"
3700 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3701 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3702 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3703 "TARGET_SIMD"
3704 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3705 [(set_attr "type" "neon_<optab><q>")]
3706 )
3707
3708 ;; suqadd and usqadd
3709
3710 (define_insn "aarch64_<sur>qadd<mode>"
3711 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3712 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3713 (match_operand:VSDQ_I 2 "register_operand" "w")]
3714 USSUQADD))]
3715 "TARGET_SIMD"
3716 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3717 [(set_attr "type" "neon_qadd<q>")]
3718 )
3719
3720 ;; sqmovun
3721
3722 (define_insn "aarch64_sqmovun<mode>"
3723 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3724 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3725 UNSPEC_SQXTUN))]
3726 "TARGET_SIMD"
3727 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3728 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3729 )
3730
3731 ;; sqmovn and uqmovn
3732
3733 (define_insn "aarch64_<sur>qmovn<mode>"
3734 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3735 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3736 SUQMOVN))]
3737 "TARGET_SIMD"
3738 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3739 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3740 )
3741
3742 ;; <su>q<absneg>
3743
3744 (define_insn "aarch64_s<optab><mode>"
3745 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3746 (UNQOPS:VSDQ_I
3747 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3748 "TARGET_SIMD"
3749 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3750 [(set_attr "type" "neon_<optab><q>")]
3751 )
3752
3753 ;; sq<r>dmulh.
3754
3755 (define_insn "aarch64_sq<r>dmulh<mode>"
3756 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3757 (unspec:VSDQ_HSI
3758 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3759 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3760 VQDMULH))]
3761 "TARGET_SIMD"
3762 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3763 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3764 )
3765
3766 ;; sq<r>dmulh_lane
3767
3768 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3769 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3770 (unspec:VDQHS
3771 [(match_operand:VDQHS 1 "register_operand" "w")
3772 (vec_select:<VEL>
3773 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3774 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3775 VQDMULH))]
3776 "TARGET_SIMD"
3777 "*
3778 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3779 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3780 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3781 )
3782
3783 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3784 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3785 (unspec:VDQHS
3786 [(match_operand:VDQHS 1 "register_operand" "w")
3787 (vec_select:<VEL>
3788 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3789 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3790 VQDMULH))]
3791 "TARGET_SIMD"
3792 "*
3793 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3794 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3795 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3796 )
3797
3798 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3799 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3800 (unspec:SD_HSI
3801 [(match_operand:SD_HSI 1 "register_operand" "w")
3802 (vec_select:<VEL>
3803 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3804 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3805 VQDMULH))]
3806 "TARGET_SIMD"
3807 "*
3808 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3809 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3810 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3811 )
3812
3813 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3814 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3815 (unspec:SD_HSI
3816 [(match_operand:SD_HSI 1 "register_operand" "w")
3817 (vec_select:<VEL>
3818 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3819 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3820 VQDMULH))]
3821 "TARGET_SIMD"
3822 "*
3823 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3824 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3825 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3826 )
3827
3828 ;; sqrdml[as]h.
3829
3830 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3831 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3832 (unspec:VSDQ_HSI
3833 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3834 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3835 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3836 SQRDMLH_AS))]
3837 "TARGET_SIMD_RDMA"
3838 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3839 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3840 )
3841
3842 ;; sqrdml[as]h_lane.
3843
3844 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3845 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3846 (unspec:VDQHS
3847 [(match_operand:VDQHS 1 "register_operand" "0")
3848 (match_operand:VDQHS 2 "register_operand" "w")
3849 (vec_select:<VEL>
3850 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3851 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3852 SQRDMLH_AS))]
3853 "TARGET_SIMD_RDMA"
3854 {
3855 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3856 return
3857 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3858 }
3859 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3860 )
3861
3862 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3863 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3864 (unspec:SD_HSI
3865 [(match_operand:SD_HSI 1 "register_operand" "0")
3866 (match_operand:SD_HSI 2 "register_operand" "w")
3867 (vec_select:<VEL>
3868 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3869 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3870 SQRDMLH_AS))]
3871 "TARGET_SIMD_RDMA"
3872 {
3873 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3874 return
3875 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3876 }
3877 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3878 )
3879
3880 ;; sqrdml[as]h_laneq.
3881
3882 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3883 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3884 (unspec:VDQHS
3885 [(match_operand:VDQHS 1 "register_operand" "0")
3886 (match_operand:VDQHS 2 "register_operand" "w")
3887 (vec_select:<VEL>
3888 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3889 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3890 SQRDMLH_AS))]
3891 "TARGET_SIMD_RDMA"
3892 {
3893 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3894 return
3895 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3896 }
3897 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3898 )
3899
3900 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3901 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3902 (unspec:SD_HSI
3903 [(match_operand:SD_HSI 1 "register_operand" "0")
3904 (match_operand:SD_HSI 2 "register_operand" "w")
3905 (vec_select:<VEL>
3906 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3907 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3908 SQRDMLH_AS))]
3909 "TARGET_SIMD_RDMA"
3910 {
3911 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3912 return
3913 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3914 }
3915 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3916 )
3917
3918 ;; vqdml[sa]l
3919
3920 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3921 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3922 (SBINQOPS:<VWIDE>
3923 (match_operand:<VWIDE> 1 "register_operand" "0")
3924 (ss_ashift:<VWIDE>
3925 (mult:<VWIDE>
3926 (sign_extend:<VWIDE>
3927 (match_operand:VSD_HSI 2 "register_operand" "w"))
3928 (sign_extend:<VWIDE>
3929 (match_operand:VSD_HSI 3 "register_operand" "w")))
3930 (const_int 1))))]
3931 "TARGET_SIMD"
3932 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3933 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3934 )
3935
3936 ;; vqdml[sa]l_lane
3937
3938 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3939 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3940 (SBINQOPS:<VWIDE>
3941 (match_operand:<VWIDE> 1 "register_operand" "0")
3942 (ss_ashift:<VWIDE>
3943 (mult:<VWIDE>
3944 (sign_extend:<VWIDE>
3945 (match_operand:VD_HSI 2 "register_operand" "w"))
3946 (sign_extend:<VWIDE>
3947 (vec_duplicate:VD_HSI
3948 (vec_select:<VEL>
3949 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3950 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3951 ))
3952 (const_int 1))))]
3953 "TARGET_SIMD"
3954 {
3955 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3956 return
3957 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3958 }
3959 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3960 )
3961
3962 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3963 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3964 (SBINQOPS:<VWIDE>
3965 (match_operand:<VWIDE> 1 "register_operand" "0")
3966 (ss_ashift:<VWIDE>
3967 (mult:<VWIDE>
3968 (sign_extend:<VWIDE>
3969 (match_operand:VD_HSI 2 "register_operand" "w"))
3970 (sign_extend:<VWIDE>
3971 (vec_duplicate:VD_HSI
3972 (vec_select:<VEL>
3973 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3974 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3975 ))
3976 (const_int 1))))]
3977 "TARGET_SIMD"
3978 {
3979 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3980 return
3981 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3982 }
3983 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3984 )
3985
3986 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3987 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3988 (SBINQOPS:<VWIDE>
3989 (match_operand:<VWIDE> 1 "register_operand" "0")
3990 (ss_ashift:<VWIDE>
3991 (mult:<VWIDE>
3992 (sign_extend:<VWIDE>
3993 (match_operand:SD_HSI 2 "register_operand" "w"))
3994 (sign_extend:<VWIDE>
3995 (vec_select:<VEL>
3996 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3997 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3998 )
3999 (const_int 1))))]
4000 "TARGET_SIMD"
4001 {
4002 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4003 return
4004 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4005 }
4006 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4007 )
4008
4009 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4010 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4011 (SBINQOPS:<VWIDE>
4012 (match_operand:<VWIDE> 1 "register_operand" "0")
4013 (ss_ashift:<VWIDE>
4014 (mult:<VWIDE>
4015 (sign_extend:<VWIDE>
4016 (match_operand:SD_HSI 2 "register_operand" "w"))
4017 (sign_extend:<VWIDE>
4018 (vec_select:<VEL>
4019 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4020 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4021 )
4022 (const_int 1))))]
4023 "TARGET_SIMD"
4024 {
4025 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4026 return
4027 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4028 }
4029 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4030 )
4031
4032 ;; vqdml[sa]l_n
4033
4034 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4035 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4036 (SBINQOPS:<VWIDE>
4037 (match_operand:<VWIDE> 1 "register_operand" "0")
4038 (ss_ashift:<VWIDE>
4039 (mult:<VWIDE>
4040 (sign_extend:<VWIDE>
4041 (match_operand:VD_HSI 2 "register_operand" "w"))
4042 (sign_extend:<VWIDE>
4043 (vec_duplicate:VD_HSI
4044 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4045 (const_int 1))))]
4046 "TARGET_SIMD"
4047 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4048 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4049 )
4050
4051 ;; sqdml[as]l2
4052
4053 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4054 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4055 (SBINQOPS:<VWIDE>
4056 (match_operand:<VWIDE> 1 "register_operand" "0")
4057 (ss_ashift:<VWIDE>
4058 (mult:<VWIDE>
4059 (sign_extend:<VWIDE>
4060 (vec_select:<VHALF>
4061 (match_operand:VQ_HSI 2 "register_operand" "w")
4062 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4063 (sign_extend:<VWIDE>
4064 (vec_select:<VHALF>
4065 (match_operand:VQ_HSI 3 "register_operand" "w")
4066 (match_dup 4))))
4067 (const_int 1))))]
4068 "TARGET_SIMD"
4069 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4070 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4071 )
4072
4073 (define_expand "aarch64_sqdmlal2<mode>"
4074 [(match_operand:<VWIDE> 0 "register_operand")
4075 (match_operand:<VWIDE> 1 "register_operand")
4076 (match_operand:VQ_HSI 2 "register_operand")
4077 (match_operand:VQ_HSI 3 "register_operand")]
4078 "TARGET_SIMD"
4079 {
4080 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4081 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4082 operands[2], operands[3], p));
4083 DONE;
4084 })
4085
4086 (define_expand "aarch64_sqdmlsl2<mode>"
4087 [(match_operand:<VWIDE> 0 "register_operand")
4088 (match_operand:<VWIDE> 1 "register_operand")
4089 (match_operand:VQ_HSI 2 "register_operand")
4090 (match_operand:VQ_HSI 3 "register_operand")]
4091 "TARGET_SIMD"
4092 {
4093 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4094 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4095 operands[2], operands[3], p));
4096 DONE;
4097 })
4098
4099 ;; vqdml[sa]l2_lane
4100
4101 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4102 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4103 (SBINQOPS:<VWIDE>
4104 (match_operand:<VWIDE> 1 "register_operand" "0")
4105 (ss_ashift:<VWIDE>
4106 (mult:<VWIDE>
4107 (sign_extend:<VWIDE>
4108 (vec_select:<VHALF>
4109 (match_operand:VQ_HSI 2 "register_operand" "w")
4110 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4111 (sign_extend:<VWIDE>
4112 (vec_duplicate:<VHALF>
4113 (vec_select:<VEL>
4114 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4115 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4116 ))))
4117 (const_int 1))))]
4118 "TARGET_SIMD"
4119 {
4120 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4121 return
4122 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4123 }
4124 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4125 )
4126
4127 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4128 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4129 (SBINQOPS:<VWIDE>
4130 (match_operand:<VWIDE> 1 "register_operand" "0")
4131 (ss_ashift:<VWIDE>
4132 (mult:<VWIDE>
4133 (sign_extend:<VWIDE>
4134 (vec_select:<VHALF>
4135 (match_operand:VQ_HSI 2 "register_operand" "w")
4136 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4137 (sign_extend:<VWIDE>
4138 (vec_duplicate:<VHALF>
4139 (vec_select:<VEL>
4140 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4141 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4142 ))))
4143 (const_int 1))))]
4144 "TARGET_SIMD"
4145 {
4146 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4147 return
4148 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4149 }
4150 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4151 )
4152
4153 (define_expand "aarch64_sqdmlal2_lane<mode>"
4154 [(match_operand:<VWIDE> 0 "register_operand")
4155 (match_operand:<VWIDE> 1 "register_operand")
4156 (match_operand:VQ_HSI 2 "register_operand")
4157 (match_operand:<VCOND> 3 "register_operand")
4158 (match_operand:SI 4 "immediate_operand")]
4159 "TARGET_SIMD"
4160 {
4161 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4162 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4163 operands[2], operands[3],
4164 operands[4], p));
4165 DONE;
4166 })
4167
4168 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4169 [(match_operand:<VWIDE> 0 "register_operand")
4170 (match_operand:<VWIDE> 1 "register_operand")
4171 (match_operand:VQ_HSI 2 "register_operand")
4172 (match_operand:<VCONQ> 3 "register_operand")
4173 (match_operand:SI 4 "immediate_operand")]
4174 "TARGET_SIMD"
4175 {
4176 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4177 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4178 operands[2], operands[3],
4179 operands[4], p));
4180 DONE;
4181 })
4182
4183 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4184 [(match_operand:<VWIDE> 0 "register_operand")
4185 (match_operand:<VWIDE> 1 "register_operand")
4186 (match_operand:VQ_HSI 2 "register_operand")
4187 (match_operand:<VCOND> 3 "register_operand")
4188 (match_operand:SI 4 "immediate_operand")]
4189 "TARGET_SIMD"
4190 {
4191 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4192 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4193 operands[2], operands[3],
4194 operands[4], p));
4195 DONE;
4196 })
4197
4198 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4199 [(match_operand:<VWIDE> 0 "register_operand")
4200 (match_operand:<VWIDE> 1 "register_operand")
4201 (match_operand:VQ_HSI 2 "register_operand")
4202 (match_operand:<VCONQ> 3 "register_operand")
4203 (match_operand:SI 4 "immediate_operand")]
4204 "TARGET_SIMD"
4205 {
4206 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4207 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4208 operands[2], operands[3],
4209 operands[4], p));
4210 DONE;
4211 })
4212
4213 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4214 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4215 (SBINQOPS:<VWIDE>
4216 (match_operand:<VWIDE> 1 "register_operand" "0")
4217 (ss_ashift:<VWIDE>
4218 (mult:<VWIDE>
4219 (sign_extend:<VWIDE>
4220 (vec_select:<VHALF>
4221 (match_operand:VQ_HSI 2 "register_operand" "w")
4222 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4223 (sign_extend:<VWIDE>
4224 (vec_duplicate:<VHALF>
4225 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4226 (const_int 1))))]
4227 "TARGET_SIMD"
4228 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4229 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4230 )
4231
4232 (define_expand "aarch64_sqdmlal2_n<mode>"
4233 [(match_operand:<VWIDE> 0 "register_operand")
4234 (match_operand:<VWIDE> 1 "register_operand")
4235 (match_operand:VQ_HSI 2 "register_operand")
4236 (match_operand:<VEL> 3 "register_operand")]
4237 "TARGET_SIMD"
4238 {
4239 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4240 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4241 operands[2], operands[3],
4242 p));
4243 DONE;
4244 })
4245
4246 (define_expand "aarch64_sqdmlsl2_n<mode>"
4247 [(match_operand:<VWIDE> 0 "register_operand")
4248 (match_operand:<VWIDE> 1 "register_operand")
4249 (match_operand:VQ_HSI 2 "register_operand")
4250 (match_operand:<VEL> 3 "register_operand")]
4251 "TARGET_SIMD"
4252 {
4253 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4254 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4255 operands[2], operands[3],
4256 p));
4257 DONE;
4258 })
4259
4260 ;; vqdmull
4261
4262 (define_insn "aarch64_sqdmull<mode>"
4263 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4264 (ss_ashift:<VWIDE>
4265 (mult:<VWIDE>
4266 (sign_extend:<VWIDE>
4267 (match_operand:VSD_HSI 1 "register_operand" "w"))
4268 (sign_extend:<VWIDE>
4269 (match_operand:VSD_HSI 2 "register_operand" "w")))
4270 (const_int 1)))]
4271 "TARGET_SIMD"
4272 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4273 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4274 )
4275
4276 ;; vqdmull_lane
4277
4278 (define_insn "aarch64_sqdmull_lane<mode>"
4279 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4280 (ss_ashift:<VWIDE>
4281 (mult:<VWIDE>
4282 (sign_extend:<VWIDE>
4283 (match_operand:VD_HSI 1 "register_operand" "w"))
4284 (sign_extend:<VWIDE>
4285 (vec_duplicate:VD_HSI
4286 (vec_select:<VEL>
4287 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4288 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4289 ))
4290 (const_int 1)))]
4291 "TARGET_SIMD"
4292 {
4293 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4294 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4295 }
4296 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4297 )
4298
4299 (define_insn "aarch64_sqdmull_laneq<mode>"
4300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4301 (ss_ashift:<VWIDE>
4302 (mult:<VWIDE>
4303 (sign_extend:<VWIDE>
4304 (match_operand:VD_HSI 1 "register_operand" "w"))
4305 (sign_extend:<VWIDE>
4306 (vec_duplicate:VD_HSI
4307 (vec_select:<VEL>
4308 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4309 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4310 ))
4311 (const_int 1)))]
4312 "TARGET_SIMD"
4313 {
4314 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4315 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4316 }
4317 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4318 )
4319
4320 (define_insn "aarch64_sqdmull_lane<mode>"
4321 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4322 (ss_ashift:<VWIDE>
4323 (mult:<VWIDE>
4324 (sign_extend:<VWIDE>
4325 (match_operand:SD_HSI 1 "register_operand" "w"))
4326 (sign_extend:<VWIDE>
4327 (vec_select:<VEL>
4328 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4329 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4330 ))
4331 (const_int 1)))]
4332 "TARGET_SIMD"
4333 {
4334 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4335 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4336 }
4337 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4338 )
4339
4340 (define_insn "aarch64_sqdmull_laneq<mode>"
4341 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4342 (ss_ashift:<VWIDE>
4343 (mult:<VWIDE>
4344 (sign_extend:<VWIDE>
4345 (match_operand:SD_HSI 1 "register_operand" "w"))
4346 (sign_extend:<VWIDE>
4347 (vec_select:<VEL>
4348 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4349 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4350 ))
4351 (const_int 1)))]
4352 "TARGET_SIMD"
4353 {
4354 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4355 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4356 }
4357 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4358 )
4359
4360 ;; vqdmull_n
4361
4362 (define_insn "aarch64_sqdmull_n<mode>"
4363 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364 (ss_ashift:<VWIDE>
4365 (mult:<VWIDE>
4366 (sign_extend:<VWIDE>
4367 (match_operand:VD_HSI 1 "register_operand" "w"))
4368 (sign_extend:<VWIDE>
4369 (vec_duplicate:VD_HSI
4370 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4371 )
4372 (const_int 1)))]
4373 "TARGET_SIMD"
4374 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4375 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4376 )
4377
4378 ;; vqdmull2
4379
4380
4381
4382 (define_insn "aarch64_sqdmull2<mode>_internal"
4383 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4384 (ss_ashift:<VWIDE>
4385 (mult:<VWIDE>
4386 (sign_extend:<VWIDE>
4387 (vec_select:<VHALF>
4388 (match_operand:VQ_HSI 1 "register_operand" "w")
4389 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4390 (sign_extend:<VWIDE>
4391 (vec_select:<VHALF>
4392 (match_operand:VQ_HSI 2 "register_operand" "w")
4393 (match_dup 3)))
4394 )
4395 (const_int 1)))]
4396 "TARGET_SIMD"
4397 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4398 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4399 )
4400
4401 (define_expand "aarch64_sqdmull2<mode>"
4402 [(match_operand:<VWIDE> 0 "register_operand")
4403 (match_operand:VQ_HSI 1 "register_operand")
4404 (match_operand:VQ_HSI 2 "register_operand")]
4405 "TARGET_SIMD"
4406 {
4407 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4408 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4409 operands[2], p));
4410 DONE;
4411 })
4412
4413 ;; vqdmull2_lane
4414
4415 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4416 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4417 (ss_ashift:<VWIDE>
4418 (mult:<VWIDE>
4419 (sign_extend:<VWIDE>
4420 (vec_select:<VHALF>
4421 (match_operand:VQ_HSI 1 "register_operand" "w")
4422 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4423 (sign_extend:<VWIDE>
4424 (vec_duplicate:<VHALF>
4425 (vec_select:<VEL>
4426 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4427 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4428 ))
4429 (const_int 1)))]
4430 "TARGET_SIMD"
4431 {
4432 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4433 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4434 }
4435 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4436 )
4437
4438 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4439 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4440 (ss_ashift:<VWIDE>
4441 (mult:<VWIDE>
4442 (sign_extend:<VWIDE>
4443 (vec_select:<VHALF>
4444 (match_operand:VQ_HSI 1 "register_operand" "w")
4445 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4446 (sign_extend:<VWIDE>
4447 (vec_duplicate:<VHALF>
4448 (vec_select:<VEL>
4449 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4450 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4451 ))
4452 (const_int 1)))]
4453 "TARGET_SIMD"
4454 {
4455 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4456 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4457 }
4458 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4459 )
4460
4461 (define_expand "aarch64_sqdmull2_lane<mode>"
4462 [(match_operand:<VWIDE> 0 "register_operand")
4463 (match_operand:VQ_HSI 1 "register_operand")
4464 (match_operand:<VCOND> 2 "register_operand")
4465 (match_operand:SI 3 "immediate_operand")]
4466 "TARGET_SIMD"
4467 {
4468 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4469 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4470 operands[2], operands[3],
4471 p));
4472 DONE;
4473 })
4474
4475 (define_expand "aarch64_sqdmull2_laneq<mode>"
4476 [(match_operand:<VWIDE> 0 "register_operand")
4477 (match_operand:VQ_HSI 1 "register_operand")
4478 (match_operand:<VCONQ> 2 "register_operand")
4479 (match_operand:SI 3 "immediate_operand")]
4480 "TARGET_SIMD"
4481 {
4482 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4483 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4484 operands[2], operands[3],
4485 p));
4486 DONE;
4487 })
4488
4489 ;; vqdmull2_n
4490
4491 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4492 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4493 (ss_ashift:<VWIDE>
4494 (mult:<VWIDE>
4495 (sign_extend:<VWIDE>
4496 (vec_select:<VHALF>
4497 (match_operand:VQ_HSI 1 "register_operand" "w")
4498 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4499 (sign_extend:<VWIDE>
4500 (vec_duplicate:<VHALF>
4501 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4502 )
4503 (const_int 1)))]
4504 "TARGET_SIMD"
4505 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4506 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4507 )
4508
4509 (define_expand "aarch64_sqdmull2_n<mode>"
4510 [(match_operand:<VWIDE> 0 "register_operand")
4511 (match_operand:VQ_HSI 1 "register_operand")
4512 (match_operand:<VEL> 2 "register_operand")]
4513 "TARGET_SIMD"
4514 {
4515 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4516 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4517 operands[2], p));
4518 DONE;
4519 })
4520
4521 ;; vshl
4522
4523 (define_insn "aarch64_<sur>shl<mode>"
4524 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4525 (unspec:VSDQ_I_DI
4526 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4527 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4528 VSHL))]
4529 "TARGET_SIMD"
4530 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4531 [(set_attr "type" "neon_shift_reg<q>")]
4532 )
4533
4534
4535 ;; vqshl
4536
4537 (define_insn "aarch64_<sur>q<r>shl<mode>"
4538 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4539 (unspec:VSDQ_I
4540 [(match_operand:VSDQ_I 1 "register_operand" "w")
4541 (match_operand:VSDQ_I 2 "register_operand" "w")]
4542 VQSHL))]
4543 "TARGET_SIMD"
4544 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4545 [(set_attr "type" "neon_sat_shift_reg<q>")]
4546 )
4547
4548 ;; vshll_n
4549
4550 (define_insn "aarch64_<sur>shll_n<mode>"
4551 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4552 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4553 (match_operand:SI 2
4554 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4555 VSHLL))]
4556 "TARGET_SIMD"
4557 {
4558 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4559 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4560 else
4561 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4562 }
4563 [(set_attr "type" "neon_shift_imm_long")]
4564 )
4565
4566 ;; vshll_high_n
4567
4568 (define_insn "aarch64_<sur>shll2_n<mode>"
4569 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4570 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4571 (match_operand:SI 2 "immediate_operand" "i")]
4572 VSHLL))]
4573 "TARGET_SIMD"
4574 {
4575 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4576 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4577 else
4578 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4579 }
4580 [(set_attr "type" "neon_shift_imm_long")]
4581 )
4582
4583 ;; vrshr_n
4584
4585 (define_insn "aarch64_<sur>shr_n<mode>"
4586 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4587 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4588 (match_operand:SI 2
4589 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4590 VRSHR_N))]
4591 "TARGET_SIMD"
4592 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4593 [(set_attr "type" "neon_sat_shift_imm<q>")]
4594 )
4595
4596 ;; v(r)sra_n
4597
4598 (define_insn "aarch64_<sur>sra_n<mode>"
4599 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4600 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4601 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4602 (match_operand:SI 3
4603 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4604 VSRA))]
4605 "TARGET_SIMD"
4606 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4607 [(set_attr "type" "neon_shift_acc<q>")]
4608 )
4609
4610 ;; vs<lr>i_n
4611
4612 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4613 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4614 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4615 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4616 (match_operand:SI 3
4617 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4618 VSLRI))]
4619 "TARGET_SIMD"
4620 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4621 [(set_attr "type" "neon_shift_imm<q>")]
4622 )
4623
4624 ;; vqshl(u)
4625
4626 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4627 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4628 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4629 (match_operand:SI 2
4630 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4631 VQSHL_N))]
4632 "TARGET_SIMD"
4633 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4634 [(set_attr "type" "neon_sat_shift_imm<q>")]
4635 )
4636
4637
4638 ;; vq(r)shr(u)n_n
4639
4640 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4641 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4642 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4643 (match_operand:SI 2
4644 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4645 VQSHRN_N))]
4646 "TARGET_SIMD"
4647 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4648 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4649 )
4650
4651
4652 ;; cm(eq|ge|gt|lt|le)
4653 ;; Note, we have constraints for Dz and Z as different expanders
4654 ;; have different ideas of what should be passed to this pattern.
4655
4656 (define_insn "aarch64_cm<optab><mode>"
4657 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4658 (neg:<V_INT_EQUIV>
4659 (COMPARISONS:<V_INT_EQUIV>
4660 (match_operand:VDQ_I 1 "register_operand" "w,w")
4661 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4662 )))]
4663 "TARGET_SIMD"
4664 "@
4665 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4666 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4667 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4668 )
4669
4670 (define_insn_and_split "aarch64_cm<optab>di"
4671 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4672 (neg:DI
4673 (COMPARISONS:DI
4674 (match_operand:DI 1 "register_operand" "w,w,r")
4675 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4676 )))
4677 (clobber (reg:CC CC_REGNUM))]
4678 "TARGET_SIMD"
4679 "#"
4680 "&& reload_completed"
4681 [(set (match_operand:DI 0 "register_operand")
4682 (neg:DI
4683 (COMPARISONS:DI
4684 (match_operand:DI 1 "register_operand")
4685 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4686 )))]
4687 {
4688 /* If we are in the general purpose register file,
4689 we split to a sequence of comparison and store. */
4690 if (GP_REGNUM_P (REGNO (operands[0]))
4691 && GP_REGNUM_P (REGNO (operands[1])))
4692 {
4693 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4694 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4695 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4696 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4697 DONE;
4698 }
4699 /* Otherwise, we expand to a similar pattern which does not
4700 clobber CC_REGNUM. */
4701 }
4702 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4703 )
4704
4705 (define_insn "*aarch64_cm<optab>di"
4706 [(set (match_operand:DI 0 "register_operand" "=w,w")
4707 (neg:DI
4708 (COMPARISONS:DI
4709 (match_operand:DI 1 "register_operand" "w,w")
4710 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4711 )))]
4712 "TARGET_SIMD && reload_completed"
4713 "@
4714 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4715 cm<optab>\t%d0, %d1, #0"
4716 [(set_attr "type" "neon_compare, neon_compare_zero")]
4717 )
4718
4719 ;; cm(hs|hi)
4720
4721 (define_insn "aarch64_cm<optab><mode>"
4722 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4723 (neg:<V_INT_EQUIV>
4724 (UCOMPARISONS:<V_INT_EQUIV>
4725 (match_operand:VDQ_I 1 "register_operand" "w")
4726 (match_operand:VDQ_I 2 "register_operand" "w")
4727 )))]
4728 "TARGET_SIMD"
4729 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4730 [(set_attr "type" "neon_compare<q>")]
4731 )
4732
4733 (define_insn_and_split "aarch64_cm<optab>di"
4734 [(set (match_operand:DI 0 "register_operand" "=w,r")
4735 (neg:DI
4736 (UCOMPARISONS:DI
4737 (match_operand:DI 1 "register_operand" "w,r")
4738 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4739 )))
4740 (clobber (reg:CC CC_REGNUM))]
4741 "TARGET_SIMD"
4742 "#"
4743 "&& reload_completed"
4744 [(set (match_operand:DI 0 "register_operand")
4745 (neg:DI
4746 (UCOMPARISONS:DI
4747 (match_operand:DI 1 "register_operand")
4748 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4749 )))]
4750 {
4751 /* If we are in the general purpose register file,
4752 we split to a sequence of comparison and store. */
4753 if (GP_REGNUM_P (REGNO (operands[0]))
4754 && GP_REGNUM_P (REGNO (operands[1])))
4755 {
4756 machine_mode mode = CCmode;
4757 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4758 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4759 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4760 DONE;
4761 }
4762 /* Otherwise, we expand to a similar pattern which does not
4763 clobber CC_REGNUM. */
4764 }
4765 [(set_attr "type" "neon_compare,multiple")]
4766 )
4767
4768 (define_insn "*aarch64_cm<optab>di"
4769 [(set (match_operand:DI 0 "register_operand" "=w")
4770 (neg:DI
4771 (UCOMPARISONS:DI
4772 (match_operand:DI 1 "register_operand" "w")
4773 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4774 )))]
4775 "TARGET_SIMD && reload_completed"
4776 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4777 [(set_attr "type" "neon_compare")]
4778 )
4779
4780 ;; cmtst
4781
4782 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4783 ;; we don't have any insns using ne, and aarch64_vcond outputs
4784 ;; not (neg (eq (and x y) 0))
4785 ;; which is rewritten by simplify_rtx as
4786 ;; plus (eq (and x y) 0) -1.
4787
4788 (define_insn "aarch64_cmtst<mode>"
4789 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4790 (plus:<V_INT_EQUIV>
4791 (eq:<V_INT_EQUIV>
4792 (and:VDQ_I
4793 (match_operand:VDQ_I 1 "register_operand" "w")
4794 (match_operand:VDQ_I 2 "register_operand" "w"))
4795 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4796 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4797 ]
4798 "TARGET_SIMD"
4799 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4800 [(set_attr "type" "neon_tst<q>")]
4801 )
4802
4803 (define_insn_and_split "aarch64_cmtstdi"
4804 [(set (match_operand:DI 0 "register_operand" "=w,r")
4805 (neg:DI
4806 (ne:DI
4807 (and:DI
4808 (match_operand:DI 1 "register_operand" "w,r")
4809 (match_operand:DI 2 "register_operand" "w,r"))
4810 (const_int 0))))
4811 (clobber (reg:CC CC_REGNUM))]
4812 "TARGET_SIMD"
4813 "#"
4814 "&& reload_completed"
4815 [(set (match_operand:DI 0 "register_operand")
4816 (neg:DI
4817 (ne:DI
4818 (and:DI
4819 (match_operand:DI 1 "register_operand")
4820 (match_operand:DI 2 "register_operand"))
4821 (const_int 0))))]
4822 {
4823 /* If we are in the general purpose register file,
4824 we split to a sequence of comparison and store. */
4825 if (GP_REGNUM_P (REGNO (operands[0]))
4826 && GP_REGNUM_P (REGNO (operands[1])))
4827 {
4828 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4829 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4830 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4831 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4832 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4833 DONE;
4834 }
4835 /* Otherwise, we expand to a similar pattern which does not
4836 clobber CC_REGNUM. */
4837 }
4838 [(set_attr "type" "neon_tst,multiple")]
4839 )
4840
4841 (define_insn "*aarch64_cmtstdi"
4842 [(set (match_operand:DI 0 "register_operand" "=w")
4843 (neg:DI
4844 (ne:DI
4845 (and:DI
4846 (match_operand:DI 1 "register_operand" "w")
4847 (match_operand:DI 2 "register_operand" "w"))
4848 (const_int 0))))]
4849 "TARGET_SIMD"
4850 "cmtst\t%d0, %d1, %d2"
4851 [(set_attr "type" "neon_tst")]
4852 )
4853
4854 ;; fcm(eq|ge|gt|le|lt)
4855
4856 (define_insn "aarch64_cm<optab><mode>"
4857 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4858 (neg:<V_INT_EQUIV>
4859 (COMPARISONS:<V_INT_EQUIV>
4860 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4861 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4862 )))]
4863 "TARGET_SIMD"
4864 "@
4865 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4866 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4867 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4868 )
4869
4870 ;; fac(ge|gt)
4871 ;; Note we can also handle what would be fac(le|lt) by
4872 ;; generating fac(ge|gt).
4873
4874 (define_insn "aarch64_fac<optab><mode>"
4875 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4876 (neg:<V_INT_EQUIV>
4877 (FAC_COMPARISONS:<V_INT_EQUIV>
4878 (abs:VHSDF_HSDF
4879 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4880 (abs:VHSDF_HSDF
4881 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4882 )))]
4883 "TARGET_SIMD"
4884 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4885 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4886 )
4887
4888 ;; addp
4889
4890 (define_insn "aarch64_addp<mode>"
4891 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4892 (unspec:VD_BHSI
4893 [(match_operand:VD_BHSI 1 "register_operand" "w")
4894 (match_operand:VD_BHSI 2 "register_operand" "w")]
4895 UNSPEC_ADDP))]
4896 "TARGET_SIMD"
4897 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4898 [(set_attr "type" "neon_reduc_add<q>")]
4899 )
4900
4901 (define_insn "aarch64_addpdi"
4902 [(set (match_operand:DI 0 "register_operand" "=w")
4903 (unspec:DI
4904 [(match_operand:V2DI 1 "register_operand" "w")]
4905 UNSPEC_ADDP))]
4906 "TARGET_SIMD"
4907 "addp\t%d0, %1.2d"
4908 [(set_attr "type" "neon_reduc_add")]
4909 )
4910
4911 ;; sqrt
4912
4913 (define_expand "sqrt<mode>2"
4914 [(set (match_operand:VHSDF 0 "register_operand")
4915 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
4916 "TARGET_SIMD"
4917 {
4918 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4919 DONE;
4920 })
4921
4922 (define_insn "*sqrt<mode>2"
4923 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4924 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4925 "TARGET_SIMD"
4926 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4927 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4928 )
4929
4930 ;; Patterns for vector struct loads and stores.
4931
4932 (define_insn "aarch64_simd_ld2<mode>"
4933 [(set (match_operand:OI 0 "register_operand" "=w")
4934 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4935 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4936 UNSPEC_LD2))]
4937 "TARGET_SIMD"
4938 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4939 [(set_attr "type" "neon_load2_2reg<q>")]
4940 )
4941
4942 (define_insn "aarch64_simd_ld2r<mode>"
4943 [(set (match_operand:OI 0 "register_operand" "=w")
4944 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4946 UNSPEC_LD2_DUP))]
4947 "TARGET_SIMD"
4948 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4949 [(set_attr "type" "neon_load2_all_lanes<q>")]
4950 )
4951
4952 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4953 [(set (match_operand:OI 0 "register_operand" "=w")
4954 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4955 (match_operand:OI 2 "register_operand" "0")
4956 (match_operand:SI 3 "immediate_operand" "i")
4957 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4958 UNSPEC_LD2_LANE))]
4959 "TARGET_SIMD"
4960 {
4961 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4962 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4963 }
4964 [(set_attr "type" "neon_load2_one_lane")]
4965 )
4966
4967 (define_expand "vec_load_lanesoi<mode>"
4968 [(set (match_operand:OI 0 "register_operand")
4969 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
4970 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4971 UNSPEC_LD2))]
4972 "TARGET_SIMD"
4973 {
4974 if (BYTES_BIG_ENDIAN)
4975 {
4976 rtx tmp = gen_reg_rtx (OImode);
4977 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4978 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4979 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4980 }
4981 else
4982 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4983 DONE;
4984 })
4985
4986 (define_insn "aarch64_simd_st2<mode>"
4987 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4988 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4989 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4990 UNSPEC_ST2))]
4991 "TARGET_SIMD"
4992 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4993 [(set_attr "type" "neon_store2_2reg<q>")]
4994 )
4995
4996 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4997 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4998 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4999 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5000 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5001 (match_operand:SI 2 "immediate_operand" "i")]
5002 UNSPEC_ST2_LANE))]
5003 "TARGET_SIMD"
5004 {
5005 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5006 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5007 }
5008 [(set_attr "type" "neon_store2_one_lane<q>")]
5009 )
5010
5011 (define_expand "vec_store_lanesoi<mode>"
5012 [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5013 (unspec:OI [(match_operand:OI 1 "register_operand")
5014 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5015 UNSPEC_ST2))]
5016 "TARGET_SIMD"
5017 {
5018 if (BYTES_BIG_ENDIAN)
5019 {
5020 rtx tmp = gen_reg_rtx (OImode);
5021 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5022 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5023 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5024 }
5025 else
5026 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5027 DONE;
5028 })
5029
5030 (define_insn "aarch64_simd_ld3<mode>"
5031 [(set (match_operand:CI 0 "register_operand" "=w")
5032 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5033 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5034 UNSPEC_LD3))]
5035 "TARGET_SIMD"
5036 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5037 [(set_attr "type" "neon_load3_3reg<q>")]
5038 )
5039
5040 (define_insn "aarch64_simd_ld3r<mode>"
5041 [(set (match_operand:CI 0 "register_operand" "=w")
5042 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5043 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5044 UNSPEC_LD3_DUP))]
5045 "TARGET_SIMD"
5046 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5047 [(set_attr "type" "neon_load3_all_lanes<q>")]
5048 )
5049
5050 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5051 [(set (match_operand:CI 0 "register_operand" "=w")
5052 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5053 (match_operand:CI 2 "register_operand" "0")
5054 (match_operand:SI 3 "immediate_operand" "i")
5055 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5056 UNSPEC_LD3_LANE))]
5057 "TARGET_SIMD"
5058 {
5059 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5060 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5061 }
5062 [(set_attr "type" "neon_load3_one_lane")]
5063 )
5064
5065 (define_expand "vec_load_lanesci<mode>"
5066 [(set (match_operand:CI 0 "register_operand")
5067 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5068 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5069 UNSPEC_LD3))]
5070 "TARGET_SIMD"
5071 {
5072 if (BYTES_BIG_ENDIAN)
5073 {
5074 rtx tmp = gen_reg_rtx (CImode);
5075 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5076 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5077 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5078 }
5079 else
5080 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5081 DONE;
5082 })
5083
5084 (define_insn "aarch64_simd_st3<mode>"
5085 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5086 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5087 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5088 UNSPEC_ST3))]
5089 "TARGET_SIMD"
5090 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5091 [(set_attr "type" "neon_store3_3reg<q>")]
5092 )
5093
5094 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5095 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5096 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5097 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5098 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5099 (match_operand:SI 2 "immediate_operand" "i")]
5100 UNSPEC_ST3_LANE))]
5101 "TARGET_SIMD"
5102 {
5103 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5104 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5105 }
5106 [(set_attr "type" "neon_store3_one_lane<q>")]
5107 )
5108
5109 (define_expand "vec_store_lanesci<mode>"
5110 [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5111 (unspec:CI [(match_operand:CI 1 "register_operand")
5112 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5113 UNSPEC_ST3))]
5114 "TARGET_SIMD"
5115 {
5116 if (BYTES_BIG_ENDIAN)
5117 {
5118 rtx tmp = gen_reg_rtx (CImode);
5119 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5120 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5121 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5122 }
5123 else
5124 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5125 DONE;
5126 })
5127
5128 (define_insn "aarch64_simd_ld4<mode>"
5129 [(set (match_operand:XI 0 "register_operand" "=w")
5130 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5131 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5132 UNSPEC_LD4))]
5133 "TARGET_SIMD"
5134 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5135 [(set_attr "type" "neon_load4_4reg<q>")]
5136 )
5137
5138 (define_insn "aarch64_simd_ld4r<mode>"
5139 [(set (match_operand:XI 0 "register_operand" "=w")
5140 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5141 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5142 UNSPEC_LD4_DUP))]
5143 "TARGET_SIMD"
5144 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5145 [(set_attr "type" "neon_load4_all_lanes<q>")]
5146 )
5147
5148 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5149 [(set (match_operand:XI 0 "register_operand" "=w")
5150 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5151 (match_operand:XI 2 "register_operand" "0")
5152 (match_operand:SI 3 "immediate_operand" "i")
5153 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5154 UNSPEC_LD4_LANE))]
5155 "TARGET_SIMD"
5156 {
5157 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5158 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5159 }
5160 [(set_attr "type" "neon_load4_one_lane")]
5161 )
5162
5163 (define_expand "vec_load_lanesxi<mode>"
5164 [(set (match_operand:XI 0 "register_operand")
5165 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5166 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5167 UNSPEC_LD4))]
5168 "TARGET_SIMD"
5169 {
5170 if (BYTES_BIG_ENDIAN)
5171 {
5172 rtx tmp = gen_reg_rtx (XImode);
5173 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5174 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5175 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5176 }
5177 else
5178 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5179 DONE;
5180 })
5181
5182 (define_insn "aarch64_simd_st4<mode>"
5183 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5184 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5185 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5186 UNSPEC_ST4))]
5187 "TARGET_SIMD"
5188 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5189 [(set_attr "type" "neon_store4_4reg<q>")]
5190 )
5191
5192 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5193 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5194 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5195 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5196 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5197 (match_operand:SI 2 "immediate_operand" "i")]
5198 UNSPEC_ST4_LANE))]
5199 "TARGET_SIMD"
5200 {
5201 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5202 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5203 }
5204 [(set_attr "type" "neon_store4_one_lane<q>")]
5205 )
5206
5207 (define_expand "vec_store_lanesxi<mode>"
5208 [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5209 (unspec:XI [(match_operand:XI 1 "register_operand")
5210 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5211 UNSPEC_ST4))]
5212 "TARGET_SIMD"
5213 {
5214 if (BYTES_BIG_ENDIAN)
5215 {
5216 rtx tmp = gen_reg_rtx (XImode);
5217 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5218 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5219 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5220 }
5221 else
5222 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5223 DONE;
5224 })
5225
5226 (define_insn_and_split "aarch64_rev_reglist<mode>"
5227 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5228 (unspec:VSTRUCT
5229 [(match_operand:VSTRUCT 1 "register_operand" "w")
5230 (match_operand:V16QI 2 "register_operand" "w")]
5231 UNSPEC_REV_REGLIST))]
5232 "TARGET_SIMD"
5233 "#"
5234 "&& reload_completed"
5235 [(const_int 0)]
5236 {
5237 int i;
5238 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5239 for (i = 0; i < nregs; i++)
5240 {
5241 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5242 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5243 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5244 }
5245 DONE;
5246 }
5247 [(set_attr "type" "neon_tbl1_q")
5248 (set_attr "length" "<insn_count>")]
5249 )
5250
5251 ;; Reload patterns for AdvSIMD register list operands.
5252
5253 (define_expand "mov<mode>"
5254 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5255 (match_operand:VSTRUCT 1 "general_operand"))]
5256 "TARGET_SIMD"
5257 {
5258 if (can_create_pseudo_p ())
5259 {
5260 if (GET_CODE (operands[0]) != REG)
5261 operands[1] = force_reg (<MODE>mode, operands[1]);
5262 }
5263 })
5264
5265
5266 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5267 [(match_operand:CI 0 "register_operand")
5268 (match_operand:DI 1 "register_operand")
5269 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5270 "TARGET_SIMD"
5271 {
5272 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5273 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5274 DONE;
5275 })
5276
5277 (define_insn "aarch64_ld1_x3_<mode>"
5278 [(set (match_operand:CI 0 "register_operand" "=w")
5279 (unspec:CI
5280 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5281 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5282 "TARGET_SIMD"
5283 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5284 [(set_attr "type" "neon_load1_3reg<q>")]
5285 )
5286
5287 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5288 [(match_operand:DI 0 "register_operand")
5289 (match_operand:OI 1 "register_operand")
5290 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5291 "TARGET_SIMD"
5292 {
5293 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5294 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5295 DONE;
5296 })
5297
5298 (define_insn "aarch64_st1_x2_<mode>"
5299 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5300 (unspec:OI
5301 [(match_operand:OI 1 "register_operand" "w")
5302 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5303 "TARGET_SIMD"
5304 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5305 [(set_attr "type" "neon_store1_2reg<q>")]
5306 )
5307
5308 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5309 [(match_operand:DI 0 "register_operand")
5310 (match_operand:CI 1 "register_operand")
5311 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5312 "TARGET_SIMD"
5313 {
5314 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5315 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5316 DONE;
5317 })
5318
5319 (define_insn "aarch64_st1_x3_<mode>"
5320 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5321 (unspec:CI
5322 [(match_operand:CI 1 "register_operand" "w")
5323 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5324 "TARGET_SIMD"
5325 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5326 [(set_attr "type" "neon_store1_3reg<q>")]
5327 )
5328
5329 (define_insn "*aarch64_mov<mode>"
5330 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5331 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5332 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5333 && (register_operand (operands[0], <MODE>mode)
5334 || register_operand (operands[1], <MODE>mode))"
5335 "@
5336 #
5337 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5338 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5339 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5340 neon_load<nregs>_<nregs>reg_q")
5341 (set_attr "length" "<insn_count>,4,4")]
5342 )
5343
5344 (define_insn "aarch64_be_ld1<mode>"
5345 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5346 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5347 "aarch64_simd_struct_operand" "Utv")]
5348 UNSPEC_LD1))]
5349 "TARGET_SIMD"
5350 "ld1\\t{%0<Vmtype>}, %1"
5351 [(set_attr "type" "neon_load1_1reg<q>")]
5352 )
5353
5354 (define_insn "aarch64_be_st1<mode>"
5355 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5356 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5357 UNSPEC_ST1))]
5358 "TARGET_SIMD"
5359 "st1\\t{%1<Vmtype>}, %0"
5360 [(set_attr "type" "neon_store1_1reg<q>")]
5361 )
5362
5363 (define_insn "*aarch64_be_movoi"
5364 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5365 (match_operand:OI 1 "general_operand" " w,w,m"))]
5366 "TARGET_SIMD && BYTES_BIG_ENDIAN
5367 && (register_operand (operands[0], OImode)
5368 || register_operand (operands[1], OImode))"
5369 "@
5370 #
5371 stp\\t%q1, %R1, %0
5372 ldp\\t%q0, %R0, %1"
5373 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5374 (set_attr "length" "8,4,4")]
5375 )
5376
5377 (define_insn "*aarch64_be_movci"
5378 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5379 (match_operand:CI 1 "general_operand" " w,w,o"))]
5380 "TARGET_SIMD && BYTES_BIG_ENDIAN
5381 && (register_operand (operands[0], CImode)
5382 || register_operand (operands[1], CImode))"
5383 "#"
5384 [(set_attr "type" "multiple")
5385 (set_attr "length" "12,4,4")]
5386 )
5387
5388 (define_insn "*aarch64_be_movxi"
5389 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5390 (match_operand:XI 1 "general_operand" " w,w,o"))]
5391 "TARGET_SIMD && BYTES_BIG_ENDIAN
5392 && (register_operand (operands[0], XImode)
5393 || register_operand (operands[1], XImode))"
5394 "#"
5395 [(set_attr "type" "multiple")
5396 (set_attr "length" "16,4,4")]
5397 )
5398
5399 (define_split
5400 [(set (match_operand:OI 0 "register_operand")
5401 (match_operand:OI 1 "register_operand"))]
5402 "TARGET_SIMD && reload_completed"
5403 [(const_int 0)]
5404 {
5405 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5406 DONE;
5407 })
5408
5409 (define_split
5410 [(set (match_operand:CI 0 "nonimmediate_operand")
5411 (match_operand:CI 1 "general_operand"))]
5412 "TARGET_SIMD && reload_completed"
5413 [(const_int 0)]
5414 {
5415 if (register_operand (operands[0], CImode)
5416 && register_operand (operands[1], CImode))
5417 {
5418 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5419 DONE;
5420 }
5421 else if (BYTES_BIG_ENDIAN)
5422 {
5423 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5424 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5425 emit_move_insn (gen_lowpart (V16QImode,
5426 simplify_gen_subreg (TImode, operands[0],
5427 CImode, 32)),
5428 gen_lowpart (V16QImode,
5429 simplify_gen_subreg (TImode, operands[1],
5430 CImode, 32)));
5431 DONE;
5432 }
5433 else
5434 FAIL;
5435 })
5436
5437 (define_split
5438 [(set (match_operand:XI 0 "nonimmediate_operand")
5439 (match_operand:XI 1 "general_operand"))]
5440 "TARGET_SIMD && reload_completed"
5441 [(const_int 0)]
5442 {
5443 if (register_operand (operands[0], XImode)
5444 && register_operand (operands[1], XImode))
5445 {
5446 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5447 DONE;
5448 }
5449 else if (BYTES_BIG_ENDIAN)
5450 {
5451 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5452 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5453 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5454 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5455 DONE;
5456 }
5457 else
5458 FAIL;
5459 })
5460
5461 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5462 [(match_operand:VSTRUCT 0 "register_operand")
5463 (match_operand:DI 1 "register_operand")
5464 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5465 "TARGET_SIMD"
5466 {
5467 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5468 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5469 * <VSTRUCT:nregs>);
5470
5471 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5472 mem));
5473 DONE;
5474 })
5475
5476 (define_insn "aarch64_ld2<mode>_dreg"
5477 [(set (match_operand:OI 0 "register_operand" "=w")
5478 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5479 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5480 UNSPEC_LD2_DREG))]
5481 "TARGET_SIMD"
5482 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5483 [(set_attr "type" "neon_load2_2reg<q>")]
5484 )
5485
5486 (define_insn "aarch64_ld2<mode>_dreg"
5487 [(set (match_operand:OI 0 "register_operand" "=w")
5488 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5489 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5490 UNSPEC_LD2_DREG))]
5491 "TARGET_SIMD"
5492 "ld1\\t{%S0.1d - %T0.1d}, %1"
5493 [(set_attr "type" "neon_load1_2reg<q>")]
5494 )
5495
5496 (define_insn "aarch64_ld3<mode>_dreg"
5497 [(set (match_operand:CI 0 "register_operand" "=w")
5498 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5499 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5500 UNSPEC_LD3_DREG))]
5501 "TARGET_SIMD"
5502 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5503 [(set_attr "type" "neon_load3_3reg<q>")]
5504 )
5505
5506 (define_insn "aarch64_ld3<mode>_dreg"
5507 [(set (match_operand:CI 0 "register_operand" "=w")
5508 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5509 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5510 UNSPEC_LD3_DREG))]
5511 "TARGET_SIMD"
5512 "ld1\\t{%S0.1d - %U0.1d}, %1"
5513 [(set_attr "type" "neon_load1_3reg<q>")]
5514 )
5515
5516 (define_insn "aarch64_ld4<mode>_dreg"
5517 [(set (match_operand:XI 0 "register_operand" "=w")
5518 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5519 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5520 UNSPEC_LD4_DREG))]
5521 "TARGET_SIMD"
5522 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5523 [(set_attr "type" "neon_load4_4reg<q>")]
5524 )
5525
5526 (define_insn "aarch64_ld4<mode>_dreg"
5527 [(set (match_operand:XI 0 "register_operand" "=w")
5528 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5529 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5530 UNSPEC_LD4_DREG))]
5531 "TARGET_SIMD"
5532 "ld1\\t{%S0.1d - %V0.1d}, %1"
5533 [(set_attr "type" "neon_load1_4reg<q>")]
5534 )
5535
5536 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5537 [(match_operand:VSTRUCT 0 "register_operand")
5538 (match_operand:DI 1 "register_operand")
5539 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5540 "TARGET_SIMD"
5541 {
5542 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5543 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5544
5545 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5546 DONE;
5547 })
5548
5549 (define_expand "aarch64_ld1<VALL_F16:mode>"
5550 [(match_operand:VALL_F16 0 "register_operand")
5551 (match_operand:DI 1 "register_operand")]
5552 "TARGET_SIMD"
5553 {
5554 machine_mode mode = <VALL_F16:MODE>mode;
5555 rtx mem = gen_rtx_MEM (mode, operands[1]);
5556
5557 if (BYTES_BIG_ENDIAN)
5558 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5559 else
5560 emit_move_insn (operands[0], mem);
5561 DONE;
5562 })
5563
5564 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5565 [(match_operand:VSTRUCT 0 "register_operand")
5566 (match_operand:DI 1 "register_operand")
5567 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568 "TARGET_SIMD"
5569 {
5570 machine_mode mode = <VSTRUCT:MODE>mode;
5571 rtx mem = gen_rtx_MEM (mode, operands[1]);
5572
5573 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5574 DONE;
5575 })
5576
5577 (define_expand "aarch64_ld1x2<VQ:mode>"
5578 [(match_operand:OI 0 "register_operand")
5579 (match_operand:DI 1 "register_operand")
5580 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5581 "TARGET_SIMD"
5582 {
5583 machine_mode mode = OImode;
5584 rtx mem = gen_rtx_MEM (mode, operands[1]);
5585
5586 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5587 DONE;
5588 })
5589
5590 (define_expand "aarch64_ld1x2<VDC:mode>"
5591 [(match_operand:OI 0 "register_operand")
5592 (match_operand:DI 1 "register_operand")
5593 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5594 "TARGET_SIMD"
5595 {
5596 machine_mode mode = OImode;
5597 rtx mem = gen_rtx_MEM (mode, operands[1]);
5598
5599 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5600 DONE;
5601 })
5602
5603
5604 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5605 [(match_operand:VSTRUCT 0 "register_operand")
5606 (match_operand:DI 1 "register_operand")
5607 (match_operand:VSTRUCT 2 "register_operand")
5608 (match_operand:SI 3 "immediate_operand")
5609 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5610 "TARGET_SIMD"
5611 {
5612 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5613 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5614 * <VSTRUCT:nregs>);
5615
5616 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5617 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5618 operands[0], mem, operands[2], operands[3]));
5619 DONE;
5620 })
5621
5622 ;; Expanders for builtins to extract vector registers from large
5623 ;; opaque integer modes.
5624
5625 ;; D-register list.
5626
5627 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5628 [(match_operand:VDC 0 "register_operand")
5629 (match_operand:VSTRUCT 1 "register_operand")
5630 (match_operand:SI 2 "immediate_operand")]
5631 "TARGET_SIMD"
5632 {
5633 int part = INTVAL (operands[2]);
5634 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5635 int offset = part * 16;
5636
5637 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5638 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5639 DONE;
5640 })
5641
5642 ;; Q-register list.
5643
5644 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5645 [(match_operand:VQ 0 "register_operand")
5646 (match_operand:VSTRUCT 1 "register_operand")
5647 (match_operand:SI 2 "immediate_operand")]
5648 "TARGET_SIMD"
5649 {
5650 int part = INTVAL (operands[2]);
5651 int offset = part * 16;
5652
5653 emit_move_insn (operands[0],
5654 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5655 DONE;
5656 })
5657
5658 ;; Permuted-store expanders for neon intrinsics.
5659
5660 ;; Permute instructions
5661
5662 ;; vec_perm support
5663
5664 (define_expand "vec_perm<mode>"
5665 [(match_operand:VB 0 "register_operand")
5666 (match_operand:VB 1 "register_operand")
5667 (match_operand:VB 2 "register_operand")
5668 (match_operand:VB 3 "register_operand")]
5669 "TARGET_SIMD"
5670 {
5671 aarch64_expand_vec_perm (operands[0], operands[1],
5672 operands[2], operands[3], <nunits>);
5673 DONE;
5674 })
5675
5676 (define_insn "aarch64_tbl1<mode>"
5677 [(set (match_operand:VB 0 "register_operand" "=w")
5678 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5679 (match_operand:VB 2 "register_operand" "w")]
5680 UNSPEC_TBL))]
5681 "TARGET_SIMD"
5682 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5683 [(set_attr "type" "neon_tbl1<q>")]
5684 )
5685
5686 ;; Two source registers.
5687
5688 (define_insn "aarch64_tbl2v16qi"
5689 [(set (match_operand:V16QI 0 "register_operand" "=w")
5690 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5691 (match_operand:V16QI 2 "register_operand" "w")]
5692 UNSPEC_TBL))]
5693 "TARGET_SIMD"
5694 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5695 [(set_attr "type" "neon_tbl2_q")]
5696 )
5697
5698 (define_insn "aarch64_tbl3<mode>"
5699 [(set (match_operand:VB 0 "register_operand" "=w")
5700 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5701 (match_operand:VB 2 "register_operand" "w")]
5702 UNSPEC_TBL))]
5703 "TARGET_SIMD"
5704 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5705 [(set_attr "type" "neon_tbl3")]
5706 )
5707
5708 (define_insn "aarch64_tbx4<mode>"
5709 [(set (match_operand:VB 0 "register_operand" "=w")
5710 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5711 (match_operand:OI 2 "register_operand" "w")
5712 (match_operand:VB 3 "register_operand" "w")]
5713 UNSPEC_TBX))]
5714 "TARGET_SIMD"
5715 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5716 [(set_attr "type" "neon_tbl4")]
5717 )
5718
5719 ;; Three source registers.
5720
5721 (define_insn "aarch64_qtbl3<mode>"
5722 [(set (match_operand:VB 0 "register_operand" "=w")
5723 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5724 (match_operand:VB 2 "register_operand" "w")]
5725 UNSPEC_TBL))]
5726 "TARGET_SIMD"
5727 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5728 [(set_attr "type" "neon_tbl3")]
5729 )
5730
5731 (define_insn "aarch64_qtbx3<mode>"
5732 [(set (match_operand:VB 0 "register_operand" "=w")
5733 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5734 (match_operand:CI 2 "register_operand" "w")
5735 (match_operand:VB 3 "register_operand" "w")]
5736 UNSPEC_TBX))]
5737 "TARGET_SIMD"
5738 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5739 [(set_attr "type" "neon_tbl3")]
5740 )
5741
5742 ;; Four source registers.
5743
5744 (define_insn "aarch64_qtbl4<mode>"
5745 [(set (match_operand:VB 0 "register_operand" "=w")
5746 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5747 (match_operand:VB 2 "register_operand" "w")]
5748 UNSPEC_TBL))]
5749 "TARGET_SIMD"
5750 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5751 [(set_attr "type" "neon_tbl4")]
5752 )
5753
5754 (define_insn "aarch64_qtbx4<mode>"
5755 [(set (match_operand:VB 0 "register_operand" "=w")
5756 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5757 (match_operand:XI 2 "register_operand" "w")
5758 (match_operand:VB 3 "register_operand" "w")]
5759 UNSPEC_TBX))]
5760 "TARGET_SIMD"
5761 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5762 [(set_attr "type" "neon_tbl4")]
5763 )
5764
5765 (define_insn_and_split "aarch64_combinev16qi"
5766 [(set (match_operand:OI 0 "register_operand" "=w")
5767 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5768 (match_operand:V16QI 2 "register_operand" "w")]
5769 UNSPEC_CONCAT))]
5770 "TARGET_SIMD"
5771 "#"
5772 "&& reload_completed"
5773 [(const_int 0)]
5774 {
5775 aarch64_split_combinev16qi (operands);
5776 DONE;
5777 }
5778 [(set_attr "type" "multiple")]
5779 )
5780
5781 ;; This instruction's pattern is generated directly by
5782 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5783 ;; need corresponding changes there.
5784 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
5785 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5786 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5787 (match_operand:VALL_F16 2 "register_operand" "w")]
5788 PERMUTE))]
5789 "TARGET_SIMD"
5790 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5791 [(set_attr "type" "neon_permute<q>")]
5792 )
5793
5794 ;; This instruction's pattern is generated directly by
5795 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5796 ;; need corresponding changes there. Note that the immediate (third)
5797 ;; operand is a lane index not a byte index.
5798 (define_insn "aarch64_ext<mode>"
5799 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5800 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5801 (match_operand:VALL_F16 2 "register_operand" "w")
5802 (match_operand:SI 3 "immediate_operand" "i")]
5803 UNSPEC_EXT))]
5804 "TARGET_SIMD"
5805 {
5806 operands[3] = GEN_INT (INTVAL (operands[3])
5807 * GET_MODE_UNIT_SIZE (<MODE>mode));
5808 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5809 }
5810 [(set_attr "type" "neon_ext<q>")]
5811 )
5812
5813 ;; This instruction's pattern is generated directly by
5814 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5815 ;; need corresponding changes there.
5816 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5817 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5818 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5819 REVERSE))]
5820 "TARGET_SIMD"
5821 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5822 [(set_attr "type" "neon_rev<q>")]
5823 )
5824
5825 (define_insn "aarch64_st2<mode>_dreg"
5826 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5827 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5828 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5829 UNSPEC_ST2))]
5830 "TARGET_SIMD"
5831 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5832 [(set_attr "type" "neon_store2_2reg")]
5833 )
5834
5835 (define_insn "aarch64_st2<mode>_dreg"
5836 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5837 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5838 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5839 UNSPEC_ST2))]
5840 "TARGET_SIMD"
5841 "st1\\t{%S1.1d - %T1.1d}, %0"
5842 [(set_attr "type" "neon_store1_2reg")]
5843 )
5844
5845 (define_insn "aarch64_st3<mode>_dreg"
5846 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5847 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5848 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5849 UNSPEC_ST3))]
5850 "TARGET_SIMD"
5851 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5852 [(set_attr "type" "neon_store3_3reg")]
5853 )
5854
5855 (define_insn "aarch64_st3<mode>_dreg"
5856 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5857 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5858 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5859 UNSPEC_ST3))]
5860 "TARGET_SIMD"
5861 "st1\\t{%S1.1d - %U1.1d}, %0"
5862 [(set_attr "type" "neon_store1_3reg")]
5863 )
5864
5865 (define_insn "aarch64_st4<mode>_dreg"
5866 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5867 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5868 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5869 UNSPEC_ST4))]
5870 "TARGET_SIMD"
5871 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5872 [(set_attr "type" "neon_store4_4reg")]
5873 )
5874
5875 (define_insn "aarch64_st4<mode>_dreg"
5876 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5877 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5878 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5879 UNSPEC_ST4))]
5880 "TARGET_SIMD"
5881 "st1\\t{%S1.1d - %V1.1d}, %0"
5882 [(set_attr "type" "neon_store1_4reg")]
5883 )
5884
5885 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5886 [(match_operand:DI 0 "register_operand")
5887 (match_operand:VSTRUCT 1 "register_operand")
5888 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5889 "TARGET_SIMD"
5890 {
5891 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5892 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5893
5894 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5895 DONE;
5896 })
5897
5898 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5899 [(match_operand:DI 0 "register_operand")
5900 (match_operand:VSTRUCT 1 "register_operand")
5901 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5902 "TARGET_SIMD"
5903 {
5904 machine_mode mode = <VSTRUCT:MODE>mode;
5905 rtx mem = gen_rtx_MEM (mode, operands[0]);
5906
5907 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5908 DONE;
5909 })
5910
5911 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5912 [(match_operand:DI 0 "register_operand")
5913 (match_operand:VSTRUCT 1 "register_operand")
5914 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5915 (match_operand:SI 2 "immediate_operand")]
5916 "TARGET_SIMD"
5917 {
5918 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5919 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5920 * <VSTRUCT:nregs>);
5921
5922 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5923 mem, operands[1], operands[2]));
5924 DONE;
5925 })
5926
5927 (define_expand "aarch64_st1<VALL_F16:mode>"
5928 [(match_operand:DI 0 "register_operand")
5929 (match_operand:VALL_F16 1 "register_operand")]
5930 "TARGET_SIMD"
5931 {
5932 machine_mode mode = <VALL_F16:MODE>mode;
5933 rtx mem = gen_rtx_MEM (mode, operands[0]);
5934
5935 if (BYTES_BIG_ENDIAN)
5936 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5937 else
5938 emit_move_insn (mem, operands[1]);
5939 DONE;
5940 })
5941
5942 ;; Expander for builtins to insert vector registers into large
5943 ;; opaque integer modes.
5944
5945 ;; Q-register list. We don't need a D-reg inserter as we zero
5946 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5947
5948 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5949 [(match_operand:VSTRUCT 0 "register_operand")
5950 (match_operand:VSTRUCT 1 "register_operand")
5951 (match_operand:VQ 2 "register_operand")
5952 (match_operand:SI 3 "immediate_operand")]
5953 "TARGET_SIMD"
5954 {
5955 int part = INTVAL (operands[3]);
5956 int offset = part * 16;
5957
5958 emit_move_insn (operands[0], operands[1]);
5959 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5960 operands[2]);
5961 DONE;
5962 })
5963
5964 ;; Standard pattern name vec_init<mode><Vel>.
5965
5966 (define_expand "vec_init<mode><Vel>"
5967 [(match_operand:VALL_F16 0 "register_operand")
5968 (match_operand 1 "" "")]
5969 "TARGET_SIMD"
5970 {
5971 aarch64_expand_vector_init (operands[0], operands[1]);
5972 DONE;
5973 })
5974
5975 (define_expand "vec_init<mode><Vhalf>"
5976 [(match_operand:VQ_NO2E 0 "register_operand")
5977 (match_operand 1 "" "")]
5978 "TARGET_SIMD"
5979 {
5980 aarch64_expand_vector_init (operands[0], operands[1]);
5981 DONE;
5982 })
5983
5984 (define_insn "*aarch64_simd_ld1r<mode>"
5985 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5986 (vec_duplicate:VALL_F16
5987 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5988 "TARGET_SIMD"
5989 "ld1r\\t{%0.<Vtype>}, %1"
5990 [(set_attr "type" "neon_load1_all_lanes")]
5991 )
5992
5993 (define_insn "aarch64_simd_ld1<mode>_x2"
5994 [(set (match_operand:OI 0 "register_operand" "=w")
5995 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5996 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5997 UNSPEC_LD1))]
5998 "TARGET_SIMD"
5999 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6000 [(set_attr "type" "neon_load1_2reg<q>")]
6001 )
6002
6003 (define_insn "aarch64_simd_ld1<mode>_x2"
6004 [(set (match_operand:OI 0 "register_operand" "=w")
6005 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6006 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6007 UNSPEC_LD1))]
6008 "TARGET_SIMD"
6009 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6010 [(set_attr "type" "neon_load1_2reg<q>")]
6011 )
6012
6013
6014 (define_insn "@aarch64_frecpe<mode>"
6015 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6016 (unspec:VHSDF_HSDF
6017 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6018 UNSPEC_FRECPE))]
6019 "TARGET_SIMD"
6020 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6021 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6022 )
6023
6024 (define_insn "aarch64_frecpx<mode>"
6025 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6026 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6027 UNSPEC_FRECPX))]
6028 "TARGET_SIMD"
6029 "frecpx\t%<s>0, %<s>1"
6030 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6031 )
6032
6033 (define_insn "@aarch64_frecps<mode>"
6034 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6035 (unspec:VHSDF_HSDF
6036 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6037 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6038 UNSPEC_FRECPS))]
6039 "TARGET_SIMD"
6040 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6041 [(set_attr "type" "neon_fp_recps_<stype><q>")]
6042 )
6043
6044 (define_insn "aarch64_urecpe<mode>"
6045 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6046 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6047 UNSPEC_URECPE))]
6048 "TARGET_SIMD"
6049 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6050 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6051
6052 ;; Standard pattern name vec_extract<mode><Vel>.
6053
6054 (define_expand "vec_extract<mode><Vel>"
6055 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6056 (match_operand:VALL_F16 1 "register_operand")
6057 (match_operand:SI 2 "immediate_operand")]
6058 "TARGET_SIMD"
6059 {
6060 emit_insn
6061 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6062 DONE;
6063 })
6064
6065 ;; aes
6066
6067 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6068 [(set (match_operand:V16QI 0 "register_operand" "=w")
6069 (unspec:V16QI
6070 [(xor:V16QI
6071 (match_operand:V16QI 1 "register_operand" "%0")
6072 (match_operand:V16QI 2 "register_operand" "w"))]
6073 CRYPTO_AES))]
6074 "TARGET_SIMD && TARGET_AES"
6075 "aes<aes_op>\\t%0.16b, %2.16b"
6076 [(set_attr "type" "crypto_aese")]
6077 )
6078
6079 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6080 [(set (match_operand:V16QI 0 "register_operand" "=w")
6081 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6082 CRYPTO_AESMC))]
6083 "TARGET_SIMD && TARGET_AES"
6084 "aes<aesmc_op>\\t%0.16b, %1.16b"
6085 [(set_attr "type" "crypto_aesmc")]
6086 )
6087
6088 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6089 ;; and enforce the register dependency without scheduling or register
6090 ;; allocation messing up the order or introducing moves inbetween.
6091 ;; Mash the two together during combine.
6092
6093 (define_insn "*aarch64_crypto_aese_fused"
6094 [(set (match_operand:V16QI 0 "register_operand" "=w")
6095 (unspec:V16QI
6096 [(unspec:V16QI
6097 [(xor:V16QI
6098 (match_operand:V16QI 1 "register_operand" "%0")
6099 (match_operand:V16QI 2 "register_operand" "w"))]
6100 UNSPEC_AESE)]
6101 UNSPEC_AESMC))]
6102 "TARGET_SIMD && TARGET_AES
6103 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6104 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6105 [(set_attr "type" "crypto_aese")
6106 (set_attr "length" "8")]
6107 )
6108
6109 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6110 ;; and enforce the register dependency without scheduling or register
6111 ;; allocation messing up the order or introducing moves inbetween.
6112 ;; Mash the two together during combine.
6113
6114 (define_insn "*aarch64_crypto_aesd_fused"
6115 [(set (match_operand:V16QI 0 "register_operand" "=w")
6116 (unspec:V16QI
6117 [(unspec:V16QI
6118 [(xor:V16QI
6119 (match_operand:V16QI 1 "register_operand" "%0")
6120 (match_operand:V16QI 2 "register_operand" "w"))]
6121 UNSPEC_AESD)]
6122 UNSPEC_AESIMC))]
6123 "TARGET_SIMD && TARGET_AES
6124 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6125 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6126 [(set_attr "type" "crypto_aese")
6127 (set_attr "length" "8")]
6128 )
6129
6130 ;; sha1
6131
6132 (define_insn "aarch64_crypto_sha1hsi"
6133 [(set (match_operand:SI 0 "register_operand" "=w")
6134 (unspec:SI [(match_operand:SI 1
6135 "register_operand" "w")]
6136 UNSPEC_SHA1H))]
6137 "TARGET_SIMD && TARGET_SHA2"
6138 "sha1h\\t%s0, %s1"
6139 [(set_attr "type" "crypto_sha1_fast")]
6140 )
6141
6142 (define_insn "aarch64_crypto_sha1hv4si"
6143 [(set (match_operand:SI 0 "register_operand" "=w")
6144 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6145 (parallel [(const_int 0)]))]
6146 UNSPEC_SHA1H))]
6147 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6148 "sha1h\\t%s0, %s1"
6149 [(set_attr "type" "crypto_sha1_fast")]
6150 )
6151
6152 (define_insn "aarch64_be_crypto_sha1hv4si"
6153 [(set (match_operand:SI 0 "register_operand" "=w")
6154 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6155 (parallel [(const_int 3)]))]
6156 UNSPEC_SHA1H))]
6157 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6158 "sha1h\\t%s0, %s1"
6159 [(set_attr "type" "crypto_sha1_fast")]
6160 )
6161
6162 (define_insn "aarch64_crypto_sha1su1v4si"
6163 [(set (match_operand:V4SI 0 "register_operand" "=w")
6164 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6165 (match_operand:V4SI 2 "register_operand" "w")]
6166 UNSPEC_SHA1SU1))]
6167 "TARGET_SIMD && TARGET_SHA2"
6168 "sha1su1\\t%0.4s, %2.4s"
6169 [(set_attr "type" "crypto_sha1_fast")]
6170 )
6171
6172 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6173 [(set (match_operand:V4SI 0 "register_operand" "=w")
6174 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6175 (match_operand:SI 2 "register_operand" "w")
6176 (match_operand:V4SI 3 "register_operand" "w")]
6177 CRYPTO_SHA1))]
6178 "TARGET_SIMD && TARGET_SHA2"
6179 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6180 [(set_attr "type" "crypto_sha1_slow")]
6181 )
6182
6183 (define_insn "aarch64_crypto_sha1su0v4si"
6184 [(set (match_operand:V4SI 0 "register_operand" "=w")
6185 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6186 (match_operand:V4SI 2 "register_operand" "w")
6187 (match_operand:V4SI 3 "register_operand" "w")]
6188 UNSPEC_SHA1SU0))]
6189 "TARGET_SIMD && TARGET_SHA2"
6190 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6191 [(set_attr "type" "crypto_sha1_xor")]
6192 )
6193
6194 ;; sha256
6195
6196 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6197 [(set (match_operand:V4SI 0 "register_operand" "=w")
6198 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6199 (match_operand:V4SI 2 "register_operand" "w")
6200 (match_operand:V4SI 3 "register_operand" "w")]
6201 CRYPTO_SHA256))]
6202 "TARGET_SIMD && TARGET_SHA2"
6203 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6204 [(set_attr "type" "crypto_sha256_slow")]
6205 )
6206
6207 (define_insn "aarch64_crypto_sha256su0v4si"
6208 [(set (match_operand:V4SI 0 "register_operand" "=w")
6209 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6210 (match_operand:V4SI 2 "register_operand" "w")]
6211 UNSPEC_SHA256SU0))]
6212 "TARGET_SIMD && TARGET_SHA2"
6213 "sha256su0\\t%0.4s, %2.4s"
6214 [(set_attr "type" "crypto_sha256_fast")]
6215 )
6216
6217 (define_insn "aarch64_crypto_sha256su1v4si"
6218 [(set (match_operand:V4SI 0 "register_operand" "=w")
6219 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6220 (match_operand:V4SI 2 "register_operand" "w")
6221 (match_operand:V4SI 3 "register_operand" "w")]
6222 UNSPEC_SHA256SU1))]
6223 "TARGET_SIMD && TARGET_SHA2"
6224 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6225 [(set_attr "type" "crypto_sha256_slow")]
6226 )
6227
6228 ;; sha512
6229
6230 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6231 [(set (match_operand:V2DI 0 "register_operand" "=w")
6232 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6233 (match_operand:V2DI 2 "register_operand" "w")
6234 (match_operand:V2DI 3 "register_operand" "w")]
6235 CRYPTO_SHA512))]
6236 "TARGET_SIMD && TARGET_SHA3"
6237 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6238 [(set_attr "type" "crypto_sha512")]
6239 )
6240
6241 (define_insn "aarch64_crypto_sha512su0qv2di"
6242 [(set (match_operand:V2DI 0 "register_operand" "=w")
6243 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6244 (match_operand:V2DI 2 "register_operand" "w")]
6245 UNSPEC_SHA512SU0))]
6246 "TARGET_SIMD && TARGET_SHA3"
6247 "sha512su0\\t%0.2d, %2.2d"
6248 [(set_attr "type" "crypto_sha512")]
6249 )
6250
6251 (define_insn "aarch64_crypto_sha512su1qv2di"
6252 [(set (match_operand:V2DI 0 "register_operand" "=w")
6253 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6254 (match_operand:V2DI 2 "register_operand" "w")
6255 (match_operand:V2DI 3 "register_operand" "w")]
6256 UNSPEC_SHA512SU1))]
6257 "TARGET_SIMD && TARGET_SHA3"
6258 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6259 [(set_attr "type" "crypto_sha512")]
6260 )
6261
6262 ;; sha3
6263
6264 (define_insn "eor3q<mode>4"
6265 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6266 (xor:VQ_I
6267 (xor:VQ_I
6268 (match_operand:VQ_I 2 "register_operand" "w")
6269 (match_operand:VQ_I 3 "register_operand" "w"))
6270 (match_operand:VQ_I 1 "register_operand" "w")))]
6271 "TARGET_SIMD && TARGET_SHA3"
6272 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6273 [(set_attr "type" "crypto_sha3")]
6274 )
6275
6276 (define_insn "aarch64_rax1qv2di"
6277 [(set (match_operand:V2DI 0 "register_operand" "=w")
6278 (xor:V2DI
6279 (rotate:V2DI
6280 (match_operand:V2DI 2 "register_operand" "w")
6281 (const_int 1))
6282 (match_operand:V2DI 1 "register_operand" "w")))]
6283 "TARGET_SIMD && TARGET_SHA3"
6284 "rax1\\t%0.2d, %1.2d, %2.2d"
6285 [(set_attr "type" "crypto_sha3")]
6286 )
6287
6288 (define_insn "aarch64_xarqv2di"
6289 [(set (match_operand:V2DI 0 "register_operand" "=w")
6290 (rotatert:V2DI
6291 (xor:V2DI
6292 (match_operand:V2DI 1 "register_operand" "%w")
6293 (match_operand:V2DI 2 "register_operand" "w"))
6294 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6295 "TARGET_SIMD && TARGET_SHA3"
6296 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6297 [(set_attr "type" "crypto_sha3")]
6298 )
6299
6300 (define_insn "bcaxq<mode>4"
6301 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6302 (xor:VQ_I
6303 (and:VQ_I
6304 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6305 (match_operand:VQ_I 2 "register_operand" "w"))
6306 (match_operand:VQ_I 1 "register_operand" "w")))]
6307 "TARGET_SIMD && TARGET_SHA3"
6308 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6309 [(set_attr "type" "crypto_sha3")]
6310 )
6311
6312 ;; SM3
6313
6314 (define_insn "aarch64_sm3ss1qv4si"
6315 [(set (match_operand:V4SI 0 "register_operand" "=w")
6316 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6317 (match_operand:V4SI 2 "register_operand" "w")
6318 (match_operand:V4SI 3 "register_operand" "w")]
6319 UNSPEC_SM3SS1))]
6320 "TARGET_SIMD && TARGET_SM4"
6321 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6322 [(set_attr "type" "crypto_sm3")]
6323 )
6324
6325
6326 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6327 [(set (match_operand:V4SI 0 "register_operand" "=w")
6328 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6329 (match_operand:V4SI 2 "register_operand" "w")
6330 (match_operand:V4SI 3 "register_operand" "w")
6331 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6332 CRYPTO_SM3TT))]
6333 "TARGET_SIMD && TARGET_SM4"
6334 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6335 [(set_attr "type" "crypto_sm3")]
6336 )
6337
6338 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6339 [(set (match_operand:V4SI 0 "register_operand" "=w")
6340 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6341 (match_operand:V4SI 2 "register_operand" "w")
6342 (match_operand:V4SI 3 "register_operand" "w")]
6343 CRYPTO_SM3PART))]
6344 "TARGET_SIMD && TARGET_SM4"
6345 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6346 [(set_attr "type" "crypto_sm3")]
6347 )
6348
6349 ;; SM4
6350
6351 (define_insn "aarch64_sm4eqv4si"
6352 [(set (match_operand:V4SI 0 "register_operand" "=w")
6353 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6354 (match_operand:V4SI 2 "register_operand" "w")]
6355 UNSPEC_SM4E))]
6356 "TARGET_SIMD && TARGET_SM4"
6357 "sm4e\\t%0.4s, %2.4s"
6358 [(set_attr "type" "crypto_sm4")]
6359 )
6360
6361 (define_insn "aarch64_sm4ekeyqv4si"
6362 [(set (match_operand:V4SI 0 "register_operand" "=w")
6363 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6364 (match_operand:V4SI 2 "register_operand" "w")]
6365 UNSPEC_SM4EKEY))]
6366 "TARGET_SIMD && TARGET_SM4"
6367 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6368 [(set_attr "type" "crypto_sm4")]
6369 )
6370
6371 ;; fp16fml
6372
6373 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6374 [(set (match_operand:VDQSF 0 "register_operand")
6375 (unspec:VDQSF
6376 [(match_operand:VDQSF 1 "register_operand")
6377 (match_operand:<VFMLA_W> 2 "register_operand")
6378 (match_operand:<VFMLA_W> 3 "register_operand")]
6379 VFMLA16_LOW))]
6380 "TARGET_F16FML"
6381 {
6382 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6383 <nunits> * 2, false);
6384 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6385 <nunits> * 2, false);
6386
6387 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6388 operands[1],
6389 operands[2],
6390 operands[3],
6391 p1, p2));
6392 DONE;
6393
6394 })
6395
6396 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6397 [(set (match_operand:VDQSF 0 "register_operand")
6398 (unspec:VDQSF
6399 [(match_operand:VDQSF 1 "register_operand")
6400 (match_operand:<VFMLA_W> 2 "register_operand")
6401 (match_operand:<VFMLA_W> 3 "register_operand")]
6402 VFMLA16_HIGH))]
6403 "TARGET_F16FML"
6404 {
6405 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6406 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6407
6408 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6409 operands[1],
6410 operands[2],
6411 operands[3],
6412 p1, p2));
6413 DONE;
6414 })
6415
6416 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6417 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6418 (fma:VDQSF
6419 (float_extend:VDQSF
6420 (vec_select:<VFMLA_SEL_W>
6421 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6422 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6423 (float_extend:VDQSF
6424 (vec_select:<VFMLA_SEL_W>
6425 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6426 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6427 (match_operand:VDQSF 1 "register_operand" "0")))]
6428 "TARGET_F16FML"
6429 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6430 [(set_attr "type" "neon_fp_mul_s")]
6431 )
6432
6433 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6434 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6435 (fma:VDQSF
6436 (float_extend:VDQSF
6437 (neg:<VFMLA_SEL_W>
6438 (vec_select:<VFMLA_SEL_W>
6439 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6440 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6441 (float_extend:VDQSF
6442 (vec_select:<VFMLA_SEL_W>
6443 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6444 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6445 (match_operand:VDQSF 1 "register_operand" "0")))]
6446 "TARGET_F16FML"
6447 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6448 [(set_attr "type" "neon_fp_mul_s")]
6449 )
6450
6451 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6452 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6453 (fma:VDQSF
6454 (float_extend:VDQSF
6455 (vec_select:<VFMLA_SEL_W>
6456 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6457 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6458 (float_extend:VDQSF
6459 (vec_select:<VFMLA_SEL_W>
6460 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6461 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6462 (match_operand:VDQSF 1 "register_operand" "0")))]
6463 "TARGET_F16FML"
6464 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6465 [(set_attr "type" "neon_fp_mul_s")]
6466 )
6467
6468 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6469 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6470 (fma:VDQSF
6471 (float_extend:VDQSF
6472 (neg:<VFMLA_SEL_W>
6473 (vec_select:<VFMLA_SEL_W>
6474 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6475 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6476 (float_extend:VDQSF
6477 (vec_select:<VFMLA_SEL_W>
6478 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6479 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6480 (match_operand:VDQSF 1 "register_operand" "0")))]
6481 "TARGET_F16FML"
6482 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6483 [(set_attr "type" "neon_fp_mul_s")]
6484 )
6485
6486 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6487 [(set (match_operand:V2SF 0 "register_operand")
6488 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6489 (match_operand:V4HF 2 "register_operand")
6490 (match_operand:V4HF 3 "register_operand")
6491 (match_operand:SI 4 "aarch64_imm2")]
6492 VFMLA16_LOW))]
6493 "TARGET_F16FML"
6494 {
6495 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6496 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6497
6498 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6499 operands[1],
6500 operands[2],
6501 operands[3],
6502 p1, lane));
6503 DONE;
6504 }
6505 )
6506
6507 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6508 [(set (match_operand:V2SF 0 "register_operand")
6509 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6510 (match_operand:V4HF 2 "register_operand")
6511 (match_operand:V4HF 3 "register_operand")
6512 (match_operand:SI 4 "aarch64_imm2")]
6513 VFMLA16_HIGH))]
6514 "TARGET_F16FML"
6515 {
6516 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6517 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6518
6519 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6520 operands[1],
6521 operands[2],
6522 operands[3],
6523 p1, lane));
6524 DONE;
6525 })
6526
6527 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6528 [(set (match_operand:V2SF 0 "register_operand" "=w")
6529 (fma:V2SF
6530 (float_extend:V2SF
6531 (vec_select:V2HF
6532 (match_operand:V4HF 2 "register_operand" "w")
6533 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6534 (float_extend:V2SF
6535 (vec_duplicate:V2HF
6536 (vec_select:HF
6537 (match_operand:V4HF 3 "register_operand" "x")
6538 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6539 (match_operand:V2SF 1 "register_operand" "0")))]
6540 "TARGET_F16FML"
6541 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6542 [(set_attr "type" "neon_fp_mul_s")]
6543 )
6544
6545 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6546 [(set (match_operand:V2SF 0 "register_operand" "=w")
6547 (fma:V2SF
6548 (float_extend:V2SF
6549 (neg:V2HF
6550 (vec_select:V2HF
6551 (match_operand:V4HF 2 "register_operand" "w")
6552 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6553 (float_extend:V2SF
6554 (vec_duplicate:V2HF
6555 (vec_select:HF
6556 (match_operand:V4HF 3 "register_operand" "x")
6557 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6558 (match_operand:V2SF 1 "register_operand" "0")))]
6559 "TARGET_F16FML"
6560 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6561 [(set_attr "type" "neon_fp_mul_s")]
6562 )
6563
6564 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6565 [(set (match_operand:V2SF 0 "register_operand" "=w")
6566 (fma:V2SF
6567 (float_extend:V2SF
6568 (vec_select:V2HF
6569 (match_operand:V4HF 2 "register_operand" "w")
6570 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6571 (float_extend:V2SF
6572 (vec_duplicate:V2HF
6573 (vec_select:HF
6574 (match_operand:V4HF 3 "register_operand" "x")
6575 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6576 (match_operand:V2SF 1 "register_operand" "0")))]
6577 "TARGET_F16FML"
6578 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6579 [(set_attr "type" "neon_fp_mul_s")]
6580 )
6581
6582 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6583 [(set (match_operand:V2SF 0 "register_operand" "=w")
6584 (fma:V2SF
6585 (float_extend:V2SF
6586 (neg:V2HF
6587 (vec_select:V2HF
6588 (match_operand:V4HF 2 "register_operand" "w")
6589 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6590 (float_extend:V2SF
6591 (vec_duplicate:V2HF
6592 (vec_select:HF
6593 (match_operand:V4HF 3 "register_operand" "x")
6594 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6595 (match_operand:V2SF 1 "register_operand" "0")))]
6596 "TARGET_F16FML"
6597 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6598 [(set_attr "type" "neon_fp_mul_s")]
6599 )
6600
6601 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6602 [(set (match_operand:V4SF 0 "register_operand")
6603 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6604 (match_operand:V8HF 2 "register_operand")
6605 (match_operand:V8HF 3 "register_operand")
6606 (match_operand:SI 4 "aarch64_lane_imm3")]
6607 VFMLA16_LOW))]
6608 "TARGET_F16FML"
6609 {
6610 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6611 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6612
6613 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6614 operands[1],
6615 operands[2],
6616 operands[3],
6617 p1, lane));
6618 DONE;
6619 })
6620
6621 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6622 [(set (match_operand:V4SF 0 "register_operand")
6623 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6624 (match_operand:V8HF 2 "register_operand")
6625 (match_operand:V8HF 3 "register_operand")
6626 (match_operand:SI 4 "aarch64_lane_imm3")]
6627 VFMLA16_HIGH))]
6628 "TARGET_F16FML"
6629 {
6630 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6631 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6632
6633 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6634 operands[1],
6635 operands[2],
6636 operands[3],
6637 p1, lane));
6638 DONE;
6639 })
6640
6641 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6642 [(set (match_operand:V4SF 0 "register_operand" "=w")
6643 (fma:V4SF
6644 (float_extend:V4SF
6645 (vec_select:V4HF
6646 (match_operand:V8HF 2 "register_operand" "w")
6647 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6648 (float_extend:V4SF
6649 (vec_duplicate:V4HF
6650 (vec_select:HF
6651 (match_operand:V8HF 3 "register_operand" "x")
6652 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6653 (match_operand:V4SF 1 "register_operand" "0")))]
6654 "TARGET_F16FML"
6655 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6656 [(set_attr "type" "neon_fp_mul_s")]
6657 )
6658
6659 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6660 [(set (match_operand:V4SF 0 "register_operand" "=w")
6661 (fma:V4SF
6662 (float_extend:V4SF
6663 (neg:V4HF
6664 (vec_select:V4HF
6665 (match_operand:V8HF 2 "register_operand" "w")
6666 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6667 (float_extend:V4SF
6668 (vec_duplicate:V4HF
6669 (vec_select:HF
6670 (match_operand:V8HF 3 "register_operand" "x")
6671 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6672 (match_operand:V4SF 1 "register_operand" "0")))]
6673 "TARGET_F16FML"
6674 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6675 [(set_attr "type" "neon_fp_mul_s")]
6676 )
6677
6678 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6679 [(set (match_operand:V4SF 0 "register_operand" "=w")
6680 (fma:V4SF
6681 (float_extend:V4SF
6682 (vec_select:V4HF
6683 (match_operand:V8HF 2 "register_operand" "w")
6684 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6685 (float_extend:V4SF
6686 (vec_duplicate:V4HF
6687 (vec_select:HF
6688 (match_operand:V8HF 3 "register_operand" "x")
6689 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6690 (match_operand:V4SF 1 "register_operand" "0")))]
6691 "TARGET_F16FML"
6692 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6693 [(set_attr "type" "neon_fp_mul_s")]
6694 )
6695
6696 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6697 [(set (match_operand:V4SF 0 "register_operand" "=w")
6698 (fma:V4SF
6699 (float_extend:V4SF
6700 (neg:V4HF
6701 (vec_select:V4HF
6702 (match_operand:V8HF 2 "register_operand" "w")
6703 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6704 (float_extend:V4SF
6705 (vec_duplicate:V4HF
6706 (vec_select:HF
6707 (match_operand:V8HF 3 "register_operand" "x")
6708 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6709 (match_operand:V4SF 1 "register_operand" "0")))]
6710 "TARGET_F16FML"
6711 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6712 [(set_attr "type" "neon_fp_mul_s")]
6713 )
6714
6715 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6716 [(set (match_operand:V2SF 0 "register_operand")
6717 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6718 (match_operand:V4HF 2 "register_operand")
6719 (match_operand:V8HF 3 "register_operand")
6720 (match_operand:SI 4 "aarch64_lane_imm3")]
6721 VFMLA16_LOW))]
6722 "TARGET_F16FML"
6723 {
6724 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6725 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6726
6727 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6728 operands[1],
6729 operands[2],
6730 operands[3],
6731 p1, lane));
6732 DONE;
6733
6734 })
6735
6736 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6737 [(set (match_operand:V2SF 0 "register_operand")
6738 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6739 (match_operand:V4HF 2 "register_operand")
6740 (match_operand:V8HF 3 "register_operand")
6741 (match_operand:SI 4 "aarch64_lane_imm3")]
6742 VFMLA16_HIGH))]
6743 "TARGET_F16FML"
6744 {
6745 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6746 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6747
6748 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6749 operands[1],
6750 operands[2],
6751 operands[3],
6752 p1, lane));
6753 DONE;
6754
6755 })
6756
6757 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6758 [(set (match_operand:V2SF 0 "register_operand" "=w")
6759 (fma:V2SF
6760 (float_extend:V2SF
6761 (vec_select:V2HF
6762 (match_operand:V4HF 2 "register_operand" "w")
6763 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6764 (float_extend:V2SF
6765 (vec_duplicate:V2HF
6766 (vec_select:HF
6767 (match_operand:V8HF 3 "register_operand" "x")
6768 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6769 (match_operand:V2SF 1 "register_operand" "0")))]
6770 "TARGET_F16FML"
6771 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6772 [(set_attr "type" "neon_fp_mul_s")]
6773 )
6774
6775 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6776 [(set (match_operand:V2SF 0 "register_operand" "=w")
6777 (fma:V2SF
6778 (float_extend:V2SF
6779 (neg:V2HF
6780 (vec_select:V2HF
6781 (match_operand:V4HF 2 "register_operand" "w")
6782 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6783 (float_extend:V2SF
6784 (vec_duplicate:V2HF
6785 (vec_select:HF
6786 (match_operand:V8HF 3 "register_operand" "x")
6787 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6788 (match_operand:V2SF 1 "register_operand" "0")))]
6789 "TARGET_F16FML"
6790 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6791 [(set_attr "type" "neon_fp_mul_s")]
6792 )
6793
6794 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6795 [(set (match_operand:V2SF 0 "register_operand" "=w")
6796 (fma:V2SF
6797 (float_extend:V2SF
6798 (vec_select:V2HF
6799 (match_operand:V4HF 2 "register_operand" "w")
6800 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6801 (float_extend:V2SF
6802 (vec_duplicate:V2HF
6803 (vec_select:HF
6804 (match_operand:V8HF 3 "register_operand" "x")
6805 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6806 (match_operand:V2SF 1 "register_operand" "0")))]
6807 "TARGET_F16FML"
6808 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6809 [(set_attr "type" "neon_fp_mul_s")]
6810 )
6811
6812 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6813 [(set (match_operand:V2SF 0 "register_operand" "=w")
6814 (fma:V2SF
6815 (float_extend:V2SF
6816 (neg:V2HF
6817 (vec_select:V2HF
6818 (match_operand:V4HF 2 "register_operand" "w")
6819 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6820 (float_extend:V2SF
6821 (vec_duplicate:V2HF
6822 (vec_select:HF
6823 (match_operand:V8HF 3 "register_operand" "x")
6824 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6825 (match_operand:V2SF 1 "register_operand" "0")))]
6826 "TARGET_F16FML"
6827 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6828 [(set_attr "type" "neon_fp_mul_s")]
6829 )
6830
6831 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6832 [(set (match_operand:V4SF 0 "register_operand")
6833 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6834 (match_operand:V8HF 2 "register_operand")
6835 (match_operand:V4HF 3 "register_operand")
6836 (match_operand:SI 4 "aarch64_imm2")]
6837 VFMLA16_LOW))]
6838 "TARGET_F16FML"
6839 {
6840 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6841 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6842
6843 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6844 operands[1],
6845 operands[2],
6846 operands[3],
6847 p1, lane));
6848 DONE;
6849 })
6850
6851 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6852 [(set (match_operand:V4SF 0 "register_operand")
6853 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6854 (match_operand:V8HF 2 "register_operand")
6855 (match_operand:V4HF 3 "register_operand")
6856 (match_operand:SI 4 "aarch64_imm2")]
6857 VFMLA16_HIGH))]
6858 "TARGET_F16FML"
6859 {
6860 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6861 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6862
6863 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6864 operands[1],
6865 operands[2],
6866 operands[3],
6867 p1, lane));
6868 DONE;
6869 })
6870
6871 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6872 [(set (match_operand:V4SF 0 "register_operand" "=w")
6873 (fma:V4SF
6874 (float_extend:V4SF
6875 (vec_select:V4HF
6876 (match_operand:V8HF 2 "register_operand" "w")
6877 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6878 (float_extend:V4SF
6879 (vec_duplicate:V4HF
6880 (vec_select:HF
6881 (match_operand:V4HF 3 "register_operand" "x")
6882 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6883 (match_operand:V4SF 1 "register_operand" "0")))]
6884 "TARGET_F16FML"
6885 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6886 [(set_attr "type" "neon_fp_mul_s")]
6887 )
6888
6889 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6890 [(set (match_operand:V4SF 0 "register_operand" "=w")
6891 (fma:V4SF
6892 (float_extend:V4SF
6893 (neg:V4HF
6894 (vec_select:V4HF
6895 (match_operand:V8HF 2 "register_operand" "w")
6896 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6897 (float_extend:V4SF
6898 (vec_duplicate:V4HF
6899 (vec_select:HF
6900 (match_operand:V4HF 3 "register_operand" "x")
6901 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6902 (match_operand:V4SF 1 "register_operand" "0")))]
6903 "TARGET_F16FML"
6904 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6905 [(set_attr "type" "neon_fp_mul_s")]
6906 )
6907
6908 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6909 [(set (match_operand:V4SF 0 "register_operand" "=w")
6910 (fma:V4SF
6911 (float_extend:V4SF
6912 (vec_select:V4HF
6913 (match_operand:V8HF 2 "register_operand" "w")
6914 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6915 (float_extend:V4SF
6916 (vec_duplicate:V4HF
6917 (vec_select:HF
6918 (match_operand:V4HF 3 "register_operand" "x")
6919 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6920 (match_operand:V4SF 1 "register_operand" "0")))]
6921 "TARGET_F16FML"
6922 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6923 [(set_attr "type" "neon_fp_mul_s")]
6924 )
6925
6926 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6927 [(set (match_operand:V4SF 0 "register_operand" "=w")
6928 (fma:V4SF
6929 (float_extend:V4SF
6930 (neg:V4HF
6931 (vec_select:V4HF
6932 (match_operand:V8HF 2 "register_operand" "w")
6933 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6934 (float_extend:V4SF
6935 (vec_duplicate:V4HF
6936 (vec_select:HF
6937 (match_operand:V4HF 3 "register_operand" "x")
6938 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6939 (match_operand:V4SF 1 "register_operand" "0")))]
6940 "TARGET_F16FML"
6941 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6942 [(set_attr "type" "neon_fp_mul_s")]
6943 )
6944
6945 ;; pmull
6946
6947 (define_insn "aarch64_crypto_pmulldi"
6948 [(set (match_operand:TI 0 "register_operand" "=w")
6949 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6950 (match_operand:DI 2 "register_operand" "w")]
6951 UNSPEC_PMULL))]
6952 "TARGET_SIMD && TARGET_AES"
6953 "pmull\\t%0.1q, %1.1d, %2.1d"
6954 [(set_attr "type" "crypto_pmull")]
6955 )
6956
6957 (define_insn "aarch64_crypto_pmullv2di"
6958 [(set (match_operand:TI 0 "register_operand" "=w")
6959 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6960 (match_operand:V2DI 2 "register_operand" "w")]
6961 UNSPEC_PMULL2))]
6962 "TARGET_SIMD && TARGET_AES"
6963 "pmull2\\t%0.1q, %1.2d, %2.2d"
6964 [(set_attr "type" "crypto_pmull")]
6965 )