]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
[AArch64] PR tree-optimization/90332: Implement vec_init<M><N> where N is a vector...
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %1"
219 [(set_attr "type" "neon_ldp_q")]
220 )
221
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %0"
232 [(set_attr "type" "neon_stp_q")]
233 )
234
235
236 (define_split
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
242 [(const_int 0)]
243 {
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245 DONE;
246 })
247
248 (define_split
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254 [(const_int 0)]
255 {
256 aarch64_split_simd_move (operands[0], operands[1]);
257 DONE;
258 })
259
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
263 "TARGET_SIMD"
264 {
265 rtx dst = operands[0];
266 rtx src = operands[1];
267
268 if (GP_REGNUM_P (REGNO (src)))
269 {
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
272
273 emit_insn
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
275 emit_insn
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
277 }
278
279 else
280 {
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285
286 emit_insn
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
288 emit_insn
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
290 }
291 DONE;
292 }
293 )
294
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297 (vec_select:<VHALF>
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
301 "umov\t%0, %1.d[0]"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
304 ])
305
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308 (vec_select:<VHALF>
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
312 "umov\t%0, %1.d[1]"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
315 ])
316
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
324 )
325
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
333 )
334
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
339 "TARGET_SIMD"
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
342 )
343
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
348 "TARGET_SIMD"
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
351 )
352
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
357 "TARGET_SIMD"
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
360 )
361
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365 "TARGET_SIMD"
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
368 )
369
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373 UNSPEC_RBIT))]
374 "TARGET_SIMD"
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
377 )
378
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
382 "TARGET_SIMD"
383 {
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386 <MODE>mode, 0);
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389 DONE;
390 }
391 )
392
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
397 "TARGET_SIMD"
398 {
399
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
404
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
407
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
409
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
413
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
418 DONE;
419 }
420 )
421
422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
423 ;; fact that their usage need to guarantee that the source vectors are
424 ;; contiguous. It would be wrong to describe the operation without being able
425 ;; to describe the permute that is also required, but even if that is done
426 ;; the permute would have been created as a LOAD_LANES which means the values
427 ;; in the registers are in the wrong order.
428 (define_insn "aarch64_fcadd<rot><mode>"
429 [(set (match_operand:VHSDF 0 "register_operand" "=w")
430 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
431 (match_operand:VHSDF 2 "register_operand" "w")]
432 FCADD))]
433 "TARGET_COMPLEX"
434 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
435 [(set_attr "type" "neon_fcadd")]
436 )
437
438 (define_insn "aarch64_fcmla<rot><mode>"
439 [(set (match_operand:VHSDF 0 "register_operand" "=w")
440 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
441 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
442 (match_operand:VHSDF 3 "register_operand" "w")]
443 FCMLA)))]
444 "TARGET_COMPLEX"
445 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
446 [(set_attr "type" "neon_fcmla")]
447 )
448
449
450 (define_insn "aarch64_fcmla_lane<rot><mode>"
451 [(set (match_operand:VHSDF 0 "register_operand" "=w")
452 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
453 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
454 (match_operand:VHSDF 3 "register_operand" "w")
455 (match_operand:SI 4 "const_int_operand" "n")]
456 FCMLA)))]
457 "TARGET_COMPLEX"
458 {
459 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
460 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
461 }
462 [(set_attr "type" "neon_fcmla")]
463 )
464
465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
466 [(set (match_operand:V4HF 0 "register_operand" "=w")
467 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
468 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
469 (match_operand:V8HF 3 "register_operand" "w")
470 (match_operand:SI 4 "const_int_operand" "n")]
471 FCMLA)))]
472 "TARGET_COMPLEX"
473 {
474 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
475 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
476 }
477 [(set_attr "type" "neon_fcmla")]
478 )
479
480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
481 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
482 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
483 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
484 (match_operand:<VHALF> 3 "register_operand" "w")
485 (match_operand:SI 4 "const_int_operand" "n")]
486 FCMLA)))]
487 "TARGET_COMPLEX"
488 {
489 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
490 operands[4]
491 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
492 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
493 }
494 [(set_attr "type" "neon_fcmla")]
495 )
496
497 ;; These instructions map to the __builtins for the Dot Product operations.
498 (define_insn "aarch64_<sur>dot<vsi2qi>"
499 [(set (match_operand:VS 0 "register_operand" "=w")
500 (plus:VS (match_operand:VS 1 "register_operand" "0")
501 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
502 (match_operand:<VSI2QI> 3 "register_operand" "w")]
503 DOTPROD)))]
504 "TARGET_DOTPROD"
505 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
506 [(set_attr "type" "neon_dot<q>")]
507 )
508
509 ;; These expands map to the Dot Product optab the vectorizer checks for.
510 ;; The auto-vectorizer expects a dot product builtin that also does an
511 ;; accumulation into the provided register.
512 ;; Given the following pattern
513 ;;
514 ;; for (i=0; i<len; i++) {
515 ;; c = a[i] * b[i];
516 ;; r += c;
517 ;; }
518 ;; return result;
519 ;;
520 ;; This can be auto-vectorized to
521 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
522 ;;
523 ;; given enough iterations. However the vectorizer can keep unrolling the loop
524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
526 ;; ...
527 ;;
528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
529 (define_expand "<sur>dot_prod<vsi2qi>"
530 [(set (match_operand:VS 0 "register_operand")
531 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
532 (match_operand:<VSI2QI> 2 "register_operand")]
533 DOTPROD)
534 (match_operand:VS 3 "register_operand")))]
535 "TARGET_DOTPROD"
536 {
537 emit_insn (
538 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
539 operands[2]));
540 emit_insn (gen_rtx_SET (operands[0], operands[3]));
541 DONE;
542 })
543
544 ;; These instructions map to the __builtins for the Dot Product
545 ;; indexed operations.
546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
547 [(set (match_operand:VS 0 "register_operand" "=w")
548 (plus:VS (match_operand:VS 1 "register_operand" "0")
549 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
550 (match_operand:V8QI 3 "register_operand" "<h_con>")
551 (match_operand:SI 4 "immediate_operand" "i")]
552 DOTPROD)))]
553 "TARGET_DOTPROD"
554 {
555 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
556 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
557 }
558 [(set_attr "type" "neon_dot<q>")]
559 )
560
561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
562 [(set (match_operand:VS 0 "register_operand" "=w")
563 (plus:VS (match_operand:VS 1 "register_operand" "0")
564 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
565 (match_operand:V16QI 3 "register_operand" "<h_con>")
566 (match_operand:SI 4 "immediate_operand" "i")]
567 DOTPROD)))]
568 "TARGET_DOTPROD"
569 {
570 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
571 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
572 }
573 [(set_attr "type" "neon_dot<q>")]
574 )
575
576 (define_expand "copysign<mode>3"
577 [(match_operand:VHSDF 0 "register_operand")
578 (match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
580 "TARGET_FLOAT && TARGET_SIMD"
581 {
582 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
583 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
584
585 emit_move_insn (v_bitmask,
586 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
587 HOST_WIDE_INT_M1U << bits));
588 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
589 operands[2], operands[1]));
590 DONE;
591 }
592 )
593
594 (define_insn "*aarch64_mul3_elt<mode>"
595 [(set (match_operand:VMUL 0 "register_operand" "=w")
596 (mult:VMUL
597 (vec_duplicate:VMUL
598 (vec_select:<VEL>
599 (match_operand:VMUL 1 "register_operand" "<h_con>")
600 (parallel [(match_operand:SI 2 "immediate_operand")])))
601 (match_operand:VMUL 3 "register_operand" "w")))]
602 "TARGET_SIMD"
603 {
604 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
605 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
606 }
607 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
608 )
609
610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
611 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
612 (mult:VMUL_CHANGE_NLANES
613 (vec_duplicate:VMUL_CHANGE_NLANES
614 (vec_select:<VEL>
615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
616 (parallel [(match_operand:SI 2 "immediate_operand")])))
617 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
618 "TARGET_SIMD"
619 {
620 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
621 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
622 }
623 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
624 )
625
626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
627 [(set (match_operand:VMUL 0 "register_operand" "=w")
628 (mult:VMUL
629 (vec_duplicate:VMUL
630 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
631 (match_operand:VMUL 2 "register_operand" "w")))]
632 "TARGET_SIMD"
633 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
634 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
635 )
636
637 (define_insn "@aarch64_rsqrte<mode>"
638 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
639 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
640 UNSPEC_RSQRTE))]
641 "TARGET_SIMD"
642 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
643 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
644
645 (define_insn "@aarch64_rsqrts<mode>"
646 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
647 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
648 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
649 UNSPEC_RSQRTS))]
650 "TARGET_SIMD"
651 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
652 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
653
654 (define_expand "rsqrt<mode>2"
655 [(set (match_operand:VALLF 0 "register_operand" "=w")
656 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
657 UNSPEC_RSQRT))]
658 "TARGET_SIMD"
659 {
660 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
661 DONE;
662 })
663
664 (define_insn "*aarch64_mul3_elt_to_64v2df"
665 [(set (match_operand:DF 0 "register_operand" "=w")
666 (mult:DF
667 (vec_select:DF
668 (match_operand:V2DF 1 "register_operand" "w")
669 (parallel [(match_operand:SI 2 "immediate_operand")]))
670 (match_operand:DF 3 "register_operand" "w")))]
671 "TARGET_SIMD"
672 {
673 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
674 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
675 }
676 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
677 )
678
679 (define_insn "neg<mode>2"
680 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
682 "TARGET_SIMD"
683 "neg\t%0.<Vtype>, %1.<Vtype>"
684 [(set_attr "type" "neon_neg<q>")]
685 )
686
687 (define_insn "abs<mode>2"
688 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
689 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
690 "TARGET_SIMD"
691 "abs\t%0.<Vtype>, %1.<Vtype>"
692 [(set_attr "type" "neon_abs<q>")]
693 )
694
695 ;; The intrinsic version of integer ABS must not be allowed to
696 ;; combine with any operation with an integerated ABS step, such
697 ;; as SABD.
698 (define_insn "aarch64_abs<mode>"
699 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
700 (unspec:VSDQ_I_DI
701 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
702 UNSPEC_ABS))]
703 "TARGET_SIMD"
704 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
705 [(set_attr "type" "neon_abs<q>")]
706 )
707
708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
709 ;; This isn't accurate as ABS treats always its input as a signed value.
710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
713 (define_insn "aarch64_<su>abd<mode>_3"
714 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
715 (minus:VDQ_BHSI
716 (USMAX:VDQ_BHSI
717 (match_operand:VDQ_BHSI 1 "register_operand" "w")
718 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
719 (<max_opp>:VDQ_BHSI
720 (match_dup 1)
721 (match_dup 2))))]
722 "TARGET_SIMD"
723 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
724 [(set_attr "type" "neon_abd<q>")]
725 )
726
727 (define_insn "aarch64_<sur>abdl2<mode>_3"
728 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
729 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
730 (match_operand:VDQV_S 2 "register_operand" "w")]
731 ABDL2))]
732 "TARGET_SIMD"
733 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
734 [(set_attr "type" "neon_abd<q>")]
735 )
736
737 (define_insn "aarch64_<sur>abal<mode>_4"
738 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
739 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
740 (match_operand:VDQV_S 2 "register_operand" "w")
741 (match_operand:<VDBLW> 3 "register_operand" "0")]
742 ABAL))]
743 "TARGET_SIMD"
744 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
745 [(set_attr "type" "neon_arith_acc<q>")]
746 )
747
748 (define_insn "aarch64_<sur>adalp<mode>_3"
749 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
750 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
751 (match_operand:<VDBLW> 2 "register_operand" "0")]
752 ADALP))]
753 "TARGET_SIMD"
754 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
755 [(set_attr "type" "neon_reduc_add<q>")]
756 )
757
758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
759 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
760 ;; reduction of the difference into a V4SI vector and accumulate that into
761 ;; operand 3 before copying that into the result operand 0.
762 ;; Perform that with a sequence of:
763 ;; UABDL2 tmp.8h, op1.16b, op2.16b
764 ;; UABAL tmp.8h, op1.16b, op2.16b
765 ;; UADALP op3.4s, tmp.8h
766 ;; MOV op0, op3 // should be eliminated in later passes.
767 ;;
768 ;; For TARGET_DOTPROD we do:
769 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
770 ;; UABD tmp2.16b, op1.16b, op2.16b
771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
772 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
773 ;;
774 ;; The signed version just uses the signed variants of the above instructions
775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
776 ;; unsigned.
777
778 (define_expand "<sur>sadv16qi"
779 [(use (match_operand:V4SI 0 "register_operand"))
780 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
781 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
782 (use (match_operand:V4SI 3 "register_operand"))]
783 "TARGET_SIMD"
784 {
785 if (TARGET_DOTPROD)
786 {
787 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
788 rtx abd = gen_reg_rtx (V16QImode);
789 emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
790 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
791 abd, ones));
792 DONE;
793 }
794 rtx reduc = gen_reg_rtx (V8HImode);
795 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
796 operands[2]));
797 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
798 operands[2], reduc));
799 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
800 operands[3]));
801 emit_move_insn (operands[0], operands[3]);
802 DONE;
803 }
804 )
805
806 (define_insn "aba<mode>_3"
807 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
808 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
809 (match_operand:VDQ_BHSI 1 "register_operand" "w")
810 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
811 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
812 "TARGET_SIMD"
813 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
814 [(set_attr "type" "neon_arith_acc<q>")]
815 )
816
817 (define_insn "fabd<mode>3"
818 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
819 (abs:VHSDF_HSDF
820 (minus:VHSDF_HSDF
821 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
822 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
823 "TARGET_SIMD"
824 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
825 [(set_attr "type" "neon_fp_abd_<stype><q>")]
826 )
827
828 ;; For AND (vector, register) and BIC (vector, immediate)
829 (define_insn "and<mode>3"
830 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
831 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
832 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
833 "TARGET_SIMD"
834 {
835 switch (which_alternative)
836 {
837 case 0:
838 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
839 case 1:
840 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
841 AARCH64_CHECK_BIC);
842 default:
843 gcc_unreachable ();
844 }
845 }
846 [(set_attr "type" "neon_logic<q>")]
847 )
848
849 ;; For ORR (vector, register) and ORR (vector, immediate)
850 (define_insn "ior<mode>3"
851 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
852 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
853 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
854 "TARGET_SIMD"
855 {
856 switch (which_alternative)
857 {
858 case 0:
859 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
860 case 1:
861 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
862 AARCH64_CHECK_ORR);
863 default:
864 gcc_unreachable ();
865 }
866 }
867 [(set_attr "type" "neon_logic<q>")]
868 )
869
870 (define_insn "xor<mode>3"
871 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
872 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
873 (match_operand:VDQ_I 2 "register_operand" "w")))]
874 "TARGET_SIMD"
875 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
876 [(set_attr "type" "neon_logic<q>")]
877 )
878
879 (define_insn "one_cmpl<mode>2"
880 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
881 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
882 "TARGET_SIMD"
883 "not\t%0.<Vbtype>, %1.<Vbtype>"
884 [(set_attr "type" "neon_logic<q>")]
885 )
886
887 (define_insn "aarch64_simd_vec_set<mode>"
888 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
889 (vec_merge:VALL_F16
890 (vec_duplicate:VALL_F16
891 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
892 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
893 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
894 "TARGET_SIMD"
895 {
896 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
897 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
898 switch (which_alternative)
899 {
900 case 0:
901 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
902 case 1:
903 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
904 case 2:
905 return "ld1\\t{%0.<Vetype>}[%p2], %1";
906 default:
907 gcc_unreachable ();
908 }
909 }
910 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
911 )
912
913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
914 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
915 (vec_merge:VALL_F16
916 (vec_duplicate:VALL_F16
917 (vec_select:<VEL>
918 (match_operand:VALL_F16 3 "register_operand" "w")
919 (parallel
920 [(match_operand:SI 4 "immediate_operand" "i")])))
921 (match_operand:VALL_F16 1 "register_operand" "0")
922 (match_operand:SI 2 "immediate_operand" "i")))]
923 "TARGET_SIMD"
924 {
925 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
926 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
927 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
928
929 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
930 }
931 [(set_attr "type" "neon_ins<q>")]
932 )
933
934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
935 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
936 (vec_merge:VALL_F16_NO_V2Q
937 (vec_duplicate:VALL_F16_NO_V2Q
938 (vec_select:<VEL>
939 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
940 (parallel
941 [(match_operand:SI 4 "immediate_operand" "i")])))
942 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
943 (match_operand:SI 2 "immediate_operand" "i")))]
944 "TARGET_SIMD"
945 {
946 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
947 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
948 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
949 INTVAL (operands[4]));
950
951 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
952 }
953 [(set_attr "type" "neon_ins<q>")]
954 )
955
956 (define_expand "signbit<mode>2"
957 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
958 (use (match_operand:VDQSF 1 "register_operand"))]
959 "TARGET_SIMD"
960 {
961 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
962 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
963 shift_amount);
964 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
965
966 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
967 shift_vector));
968 DONE;
969 })
970
971 (define_insn "aarch64_simd_lshr<mode>"
972 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
973 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
974 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
975 "TARGET_SIMD"
976 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
977 [(set_attr "type" "neon_shift_imm<q>")]
978 )
979
980 (define_insn "aarch64_simd_ashr<mode>"
981 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
982 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
983 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
984 "TARGET_SIMD"
985 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
986 [(set_attr "type" "neon_shift_imm<q>")]
987 )
988
989 (define_insn "aarch64_simd_imm_shl<mode>"
990 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
991 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
992 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
993 "TARGET_SIMD"
994 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
995 [(set_attr "type" "neon_shift_imm<q>")]
996 )
997
998 (define_insn "aarch64_simd_reg_sshl<mode>"
999 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1000 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1001 (match_operand:VDQ_I 2 "register_operand" "w")))]
1002 "TARGET_SIMD"
1003 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1004 [(set_attr "type" "neon_shift_reg<q>")]
1005 )
1006
1007 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1008 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1009 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1010 (match_operand:VDQ_I 2 "register_operand" "w")]
1011 UNSPEC_ASHIFT_UNSIGNED))]
1012 "TARGET_SIMD"
1013 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1014 [(set_attr "type" "neon_shift_reg<q>")]
1015 )
1016
1017 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1018 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1019 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1020 (match_operand:VDQ_I 2 "register_operand" "w")]
1021 UNSPEC_ASHIFT_SIGNED))]
1022 "TARGET_SIMD"
1023 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1024 [(set_attr "type" "neon_shift_reg<q>")]
1025 )
1026
1027 (define_expand "ashl<mode>3"
1028 [(match_operand:VDQ_I 0 "register_operand" "")
1029 (match_operand:VDQ_I 1 "register_operand" "")
1030 (match_operand:SI 2 "general_operand" "")]
1031 "TARGET_SIMD"
1032 {
1033 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1034 int shift_amount;
1035
1036 if (CONST_INT_P (operands[2]))
1037 {
1038 shift_amount = INTVAL (operands[2]);
1039 if (shift_amount >= 0 && shift_amount < bit_width)
1040 {
1041 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1042 shift_amount);
1043 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1044 operands[1],
1045 tmp));
1046 DONE;
1047 }
1048 else
1049 {
1050 operands[2] = force_reg (SImode, operands[2]);
1051 }
1052 }
1053 else if (MEM_P (operands[2]))
1054 {
1055 operands[2] = force_reg (SImode, operands[2]);
1056 }
1057
1058 if (REG_P (operands[2]))
1059 {
1060 rtx tmp = gen_reg_rtx (<MODE>mode);
1061 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1062 convert_to_mode (<VEL>mode,
1063 operands[2],
1064 0)));
1065 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1066 tmp));
1067 DONE;
1068 }
1069 else
1070 FAIL;
1071 }
1072 )
1073
1074 (define_expand "lshr<mode>3"
1075 [(match_operand:VDQ_I 0 "register_operand" "")
1076 (match_operand:VDQ_I 1 "register_operand" "")
1077 (match_operand:SI 2 "general_operand" "")]
1078 "TARGET_SIMD"
1079 {
1080 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1081 int shift_amount;
1082
1083 if (CONST_INT_P (operands[2]))
1084 {
1085 shift_amount = INTVAL (operands[2]);
1086 if (shift_amount > 0 && shift_amount <= bit_width)
1087 {
1088 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1089 shift_amount);
1090 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1091 operands[1],
1092 tmp));
1093 DONE;
1094 }
1095 else
1096 operands[2] = force_reg (SImode, operands[2]);
1097 }
1098 else if (MEM_P (operands[2]))
1099 {
1100 operands[2] = force_reg (SImode, operands[2]);
1101 }
1102
1103 if (REG_P (operands[2]))
1104 {
1105 rtx tmp = gen_reg_rtx (SImode);
1106 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1107 emit_insn (gen_negsi2 (tmp, operands[2]));
1108 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1109 convert_to_mode (<VEL>mode,
1110 tmp, 0)));
1111 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1112 operands[1],
1113 tmp1));
1114 DONE;
1115 }
1116 else
1117 FAIL;
1118 }
1119 )
1120
1121 (define_expand "ashr<mode>3"
1122 [(match_operand:VDQ_I 0 "register_operand" "")
1123 (match_operand:VDQ_I 1 "register_operand" "")
1124 (match_operand:SI 2 "general_operand" "")]
1125 "TARGET_SIMD"
1126 {
1127 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1128 int shift_amount;
1129
1130 if (CONST_INT_P (operands[2]))
1131 {
1132 shift_amount = INTVAL (operands[2]);
1133 if (shift_amount > 0 && shift_amount <= bit_width)
1134 {
1135 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1136 shift_amount);
1137 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1138 operands[1],
1139 tmp));
1140 DONE;
1141 }
1142 else
1143 operands[2] = force_reg (SImode, operands[2]);
1144 }
1145 else if (MEM_P (operands[2]))
1146 {
1147 operands[2] = force_reg (SImode, operands[2]);
1148 }
1149
1150 if (REG_P (operands[2]))
1151 {
1152 rtx tmp = gen_reg_rtx (SImode);
1153 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1154 emit_insn (gen_negsi2 (tmp, operands[2]));
1155 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1156 convert_to_mode (<VEL>mode,
1157 tmp, 0)));
1158 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1159 operands[1],
1160 tmp1));
1161 DONE;
1162 }
1163 else
1164 FAIL;
1165 }
1166 )
1167
1168 (define_expand "vashl<mode>3"
1169 [(match_operand:VDQ_I 0 "register_operand" "")
1170 (match_operand:VDQ_I 1 "register_operand" "")
1171 (match_operand:VDQ_I 2 "register_operand" "")]
1172 "TARGET_SIMD"
1173 {
1174 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1175 operands[2]));
1176 DONE;
1177 })
1178
1179 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1180 ;; Negating individual lanes most certainly offsets the
1181 ;; gain from vectorization.
1182 (define_expand "vashr<mode>3"
1183 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1184 (match_operand:VDQ_BHSI 1 "register_operand" "")
1185 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1186 "TARGET_SIMD"
1187 {
1188 rtx neg = gen_reg_rtx (<MODE>mode);
1189 emit (gen_neg<mode>2 (neg, operands[2]));
1190 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1191 neg));
1192 DONE;
1193 })
1194
1195 ;; DI vector shift
1196 (define_expand "aarch64_ashr_simddi"
1197 [(match_operand:DI 0 "register_operand" "=w")
1198 (match_operand:DI 1 "register_operand" "w")
1199 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1200 "TARGET_SIMD"
1201 {
1202 /* An arithmetic shift right by 64 fills the result with copies of the sign
1203 bit, just like asr by 63 - however the standard pattern does not handle
1204 a shift by 64. */
1205 if (INTVAL (operands[2]) == 64)
1206 operands[2] = GEN_INT (63);
1207 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1208 DONE;
1209 }
1210 )
1211
1212 (define_expand "vlshr<mode>3"
1213 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1214 (match_operand:VDQ_BHSI 1 "register_operand" "")
1215 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1216 "TARGET_SIMD"
1217 {
1218 rtx neg = gen_reg_rtx (<MODE>mode);
1219 emit (gen_neg<mode>2 (neg, operands[2]));
1220 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1221 neg));
1222 DONE;
1223 })
1224
1225 (define_expand "aarch64_lshr_simddi"
1226 [(match_operand:DI 0 "register_operand" "=w")
1227 (match_operand:DI 1 "register_operand" "w")
1228 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1229 "TARGET_SIMD"
1230 {
1231 if (INTVAL (operands[2]) == 64)
1232 emit_move_insn (operands[0], const0_rtx);
1233 else
1234 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1235 DONE;
1236 }
1237 )
1238
1239 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1240 (define_insn "vec_shr_<mode>"
1241 [(set (match_operand:VD 0 "register_operand" "=w")
1242 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1243 (match_operand:SI 2 "immediate_operand" "i")]
1244 UNSPEC_VEC_SHR))]
1245 "TARGET_SIMD"
1246 {
1247 if (BYTES_BIG_ENDIAN)
1248 return "shl %d0, %d1, %2";
1249 else
1250 return "ushr %d0, %d1, %2";
1251 }
1252 [(set_attr "type" "neon_shift_imm")]
1253 )
1254
1255 (define_expand "vec_set<mode>"
1256 [(match_operand:VALL_F16 0 "register_operand" "+w")
1257 (match_operand:<VEL> 1 "register_operand" "w")
1258 (match_operand:SI 2 "immediate_operand" "")]
1259 "TARGET_SIMD"
1260 {
1261 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1262 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1263 GEN_INT (elem), operands[0]));
1264 DONE;
1265 }
1266 )
1267
1268
1269 (define_insn "aarch64_mla<mode>"
1270 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1271 (plus:VDQ_BHSI (mult:VDQ_BHSI
1272 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1274 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1275 "TARGET_SIMD"
1276 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1277 [(set_attr "type" "neon_mla_<Vetype><q>")]
1278 )
1279
1280 (define_insn "*aarch64_mla_elt<mode>"
1281 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1282 (plus:VDQHS
1283 (mult:VDQHS
1284 (vec_duplicate:VDQHS
1285 (vec_select:<VEL>
1286 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1287 (parallel [(match_operand:SI 2 "immediate_operand")])))
1288 (match_operand:VDQHS 3 "register_operand" "w"))
1289 (match_operand:VDQHS 4 "register_operand" "0")))]
1290 "TARGET_SIMD"
1291 {
1292 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1293 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1294 }
1295 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1296 )
1297
1298 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1299 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1300 (plus:VDQHS
1301 (mult:VDQHS
1302 (vec_duplicate:VDQHS
1303 (vec_select:<VEL>
1304 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1305 (parallel [(match_operand:SI 2 "immediate_operand")])))
1306 (match_operand:VDQHS 3 "register_operand" "w"))
1307 (match_operand:VDQHS 4 "register_operand" "0")))]
1308 "TARGET_SIMD"
1309 {
1310 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1311 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1312 }
1313 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1314 )
1315
1316 (define_insn "*aarch64_mla_elt_merge<mode>"
1317 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1318 (plus:VDQHS
1319 (mult:VDQHS (vec_duplicate:VDQHS
1320 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1321 (match_operand:VDQHS 2 "register_operand" "w"))
1322 (match_operand:VDQHS 3 "register_operand" "0")))]
1323 "TARGET_SIMD"
1324 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1325 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1326 )
1327
1328 (define_insn "aarch64_mls<mode>"
1329 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1330 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1331 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1332 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1333 "TARGET_SIMD"
1334 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1335 [(set_attr "type" "neon_mla_<Vetype><q>")]
1336 )
1337
1338 (define_insn "*aarch64_mls_elt<mode>"
1339 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1340 (minus:VDQHS
1341 (match_operand:VDQHS 4 "register_operand" "0")
1342 (mult:VDQHS
1343 (vec_duplicate:VDQHS
1344 (vec_select:<VEL>
1345 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1346 (parallel [(match_operand:SI 2 "immediate_operand")])))
1347 (match_operand:VDQHS 3 "register_operand" "w"))))]
1348 "TARGET_SIMD"
1349 {
1350 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1351 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1352 }
1353 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1354 )
1355
1356 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1357 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1358 (minus:VDQHS
1359 (match_operand:VDQHS 4 "register_operand" "0")
1360 (mult:VDQHS
1361 (vec_duplicate:VDQHS
1362 (vec_select:<VEL>
1363 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1364 (parallel [(match_operand:SI 2 "immediate_operand")])))
1365 (match_operand:VDQHS 3 "register_operand" "w"))))]
1366 "TARGET_SIMD"
1367 {
1368 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1369 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1370 }
1371 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1372 )
1373
1374 (define_insn "*aarch64_mls_elt_merge<mode>"
1375 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1376 (minus:VDQHS
1377 (match_operand:VDQHS 1 "register_operand" "0")
1378 (mult:VDQHS (vec_duplicate:VDQHS
1379 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1380 (match_operand:VDQHS 3 "register_operand" "w"))))]
1381 "TARGET_SIMD"
1382 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1383 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1384 )
1385
1386 ;; Max/Min operations.
1387 (define_insn "<su><maxmin><mode>3"
1388 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1389 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1390 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1391 "TARGET_SIMD"
1392 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1393 [(set_attr "type" "neon_minmax<q>")]
1394 )
1395
1396 (define_expand "<su><maxmin>v2di3"
1397 [(set (match_operand:V2DI 0 "register_operand" "")
1398 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1399 (match_operand:V2DI 2 "register_operand" "")))]
1400 "TARGET_SIMD"
1401 {
1402 enum rtx_code cmp_operator;
1403 rtx cmp_fmt;
1404
1405 switch (<CODE>)
1406 {
1407 case UMIN:
1408 cmp_operator = LTU;
1409 break;
1410 case SMIN:
1411 cmp_operator = LT;
1412 break;
1413 case UMAX:
1414 cmp_operator = GTU;
1415 break;
1416 case SMAX:
1417 cmp_operator = GT;
1418 break;
1419 default:
1420 gcc_unreachable ();
1421 }
1422
1423 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1424 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1425 operands[2], cmp_fmt, operands[1], operands[2]));
1426 DONE;
1427 })
1428
1429 ;; Pairwise Integer Max/Min operations.
1430 (define_insn "aarch64_<maxmin_uns>p<mode>"
1431 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1432 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1433 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1434 MAXMINV))]
1435 "TARGET_SIMD"
1436 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1437 [(set_attr "type" "neon_minmax<q>")]
1438 )
1439
1440 ;; Pairwise FP Max/Min operations.
1441 (define_insn "aarch64_<maxmin_uns>p<mode>"
1442 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1443 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1444 (match_operand:VHSDF 2 "register_operand" "w")]
1445 FMAXMINV))]
1446 "TARGET_SIMD"
1447 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1448 [(set_attr "type" "neon_minmax<q>")]
1449 )
1450
1451 ;; vec_concat gives a new vector with the low elements from operand 1, and
1452 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1453 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1454 ;; What that means, is that the RTL descriptions of the below patterns
1455 ;; need to change depending on endianness.
1456
1457 ;; Move to the low architectural bits of the register.
1458 ;; On little-endian this is { operand, zeroes }
1459 ;; On big-endian this is { zeroes, operand }
1460
1461 (define_insn "move_lo_quad_internal_<mode>"
1462 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1463 (vec_concat:VQ_NO2E
1464 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1465 (vec_duplicate:<VHALF> (const_int 0))))]
1466 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1467 "@
1468 dup\\t%d0, %1.d[0]
1469 fmov\\t%d0, %1
1470 dup\\t%d0, %1"
1471 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1472 (set_attr "length" "4")
1473 (set_attr "arch" "simd,fp,simd")]
1474 )
1475
1476 (define_insn "move_lo_quad_internal_<mode>"
1477 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1478 (vec_concat:VQ_2E
1479 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1480 (const_int 0)))]
1481 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1482 "@
1483 dup\\t%d0, %1.d[0]
1484 fmov\\t%d0, %1
1485 dup\\t%d0, %1"
1486 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1487 (set_attr "length" "4")
1488 (set_attr "arch" "simd,fp,simd")]
1489 )
1490
1491 (define_insn "move_lo_quad_internal_be_<mode>"
1492 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1493 (vec_concat:VQ_NO2E
1494 (vec_duplicate:<VHALF> (const_int 0))
1495 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1496 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1497 "@
1498 dup\\t%d0, %1.d[0]
1499 fmov\\t%d0, %1
1500 dup\\t%d0, %1"
1501 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1502 (set_attr "length" "4")
1503 (set_attr "arch" "simd,fp,simd")]
1504 )
1505
1506 (define_insn "move_lo_quad_internal_be_<mode>"
1507 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1508 (vec_concat:VQ_2E
1509 (const_int 0)
1510 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1511 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1512 "@
1513 dup\\t%d0, %1.d[0]
1514 fmov\\t%d0, %1
1515 dup\\t%d0, %1"
1516 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1517 (set_attr "length" "4")
1518 (set_attr "arch" "simd,fp,simd")]
1519 )
1520
1521 (define_expand "move_lo_quad_<mode>"
1522 [(match_operand:VQ 0 "register_operand")
1523 (match_operand:VQ 1 "register_operand")]
1524 "TARGET_SIMD"
1525 {
1526 if (BYTES_BIG_ENDIAN)
1527 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1528 else
1529 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1530 DONE;
1531 }
1532 )
1533
1534 ;; Move operand1 to the high architectural bits of the register, keeping
1535 ;; the low architectural bits of operand2.
1536 ;; For little-endian this is { operand2, operand1 }
1537 ;; For big-endian this is { operand1, operand2 }
1538
1539 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1540 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1541 (vec_concat:VQ
1542 (vec_select:<VHALF>
1543 (match_dup 0)
1544 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1545 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1546 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1547 "@
1548 ins\\t%0.d[1], %1.d[0]
1549 ins\\t%0.d[1], %1"
1550 [(set_attr "type" "neon_ins")]
1551 )
1552
1553 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1554 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1555 (vec_concat:VQ
1556 (match_operand:<VHALF> 1 "register_operand" "w,r")
1557 (vec_select:<VHALF>
1558 (match_dup 0)
1559 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1560 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1561 "@
1562 ins\\t%0.d[1], %1.d[0]
1563 ins\\t%0.d[1], %1"
1564 [(set_attr "type" "neon_ins")]
1565 )
1566
1567 (define_expand "move_hi_quad_<mode>"
1568 [(match_operand:VQ 0 "register_operand" "")
1569 (match_operand:<VHALF> 1 "register_operand" "")]
1570 "TARGET_SIMD"
1571 {
1572 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1573 if (BYTES_BIG_ENDIAN)
1574 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1575 operands[1], p));
1576 else
1577 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1578 operands[1], p));
1579 DONE;
1580 })
1581
1582 ;; Narrowing operations.
1583
1584 ;; For doubles.
1585 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1586 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1587 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1588 "TARGET_SIMD"
1589 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1590 [(set_attr "type" "neon_shift_imm_narrow_q")]
1591 )
1592
1593 (define_expand "vec_pack_trunc_<mode>"
1594 [(match_operand:<VNARROWD> 0 "register_operand" "")
1595 (match_operand:VDN 1 "register_operand" "")
1596 (match_operand:VDN 2 "register_operand" "")]
1597 "TARGET_SIMD"
1598 {
1599 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1600 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1601 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1602
1603 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1604 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1605 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1606 DONE;
1607 })
1608
1609 ;; For quads.
1610
1611 (define_insn "vec_pack_trunc_<mode>"
1612 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1613 (vec_concat:<VNARROWQ2>
1614 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1615 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1616 "TARGET_SIMD"
1617 {
1618 if (BYTES_BIG_ENDIAN)
1619 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1620 else
1621 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1622 }
1623 [(set_attr "type" "multiple")
1624 (set_attr "length" "8")]
1625 )
1626
1627 ;; Widening operations.
1628
1629 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1630 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1631 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1632 (match_operand:VQW 1 "register_operand" "w")
1633 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1634 )))]
1635 "TARGET_SIMD"
1636 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1637 [(set_attr "type" "neon_shift_imm_long")]
1638 )
1639
1640 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1641 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1642 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1643 (match_operand:VQW 1 "register_operand" "w")
1644 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1645 )))]
1646 "TARGET_SIMD"
1647 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1648 [(set_attr "type" "neon_shift_imm_long")]
1649 )
1650
1651 (define_expand "vec_unpack<su>_hi_<mode>"
1652 [(match_operand:<VWIDE> 0 "register_operand" "")
1653 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1654 "TARGET_SIMD"
1655 {
1656 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1657 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1658 operands[1], p));
1659 DONE;
1660 }
1661 )
1662
1663 (define_expand "vec_unpack<su>_lo_<mode>"
1664 [(match_operand:<VWIDE> 0 "register_operand" "")
1665 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1666 "TARGET_SIMD"
1667 {
1668 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1669 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1670 operands[1], p));
1671 DONE;
1672 }
1673 )
1674
1675 ;; Widening arithmetic.
1676
1677 (define_insn "*aarch64_<su>mlal_lo<mode>"
1678 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1679 (plus:<VWIDE>
1680 (mult:<VWIDE>
1681 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682 (match_operand:VQW 2 "register_operand" "w")
1683 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685 (match_operand:VQW 4 "register_operand" "w")
1686 (match_dup 3))))
1687 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1688 "TARGET_SIMD"
1689 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1690 [(set_attr "type" "neon_mla_<Vetype>_long")]
1691 )
1692
1693 (define_insn "*aarch64_<su>mlal_hi<mode>"
1694 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1695 (plus:<VWIDE>
1696 (mult:<VWIDE>
1697 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698 (match_operand:VQW 2 "register_operand" "w")
1699 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701 (match_operand:VQW 4 "register_operand" "w")
1702 (match_dup 3))))
1703 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1704 "TARGET_SIMD"
1705 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1706 [(set_attr "type" "neon_mla_<Vetype>_long")]
1707 )
1708
1709 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1710 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1711 (minus:<VWIDE>
1712 (match_operand:<VWIDE> 1 "register_operand" "0")
1713 (mult:<VWIDE>
1714 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1715 (match_operand:VQW 2 "register_operand" "w")
1716 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1717 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1718 (match_operand:VQW 4 "register_operand" "w")
1719 (match_dup 3))))))]
1720 "TARGET_SIMD"
1721 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1722 [(set_attr "type" "neon_mla_<Vetype>_long")]
1723 )
1724
1725 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1726 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1727 (minus:<VWIDE>
1728 (match_operand:<VWIDE> 1 "register_operand" "0")
1729 (mult:<VWIDE>
1730 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1731 (match_operand:VQW 2 "register_operand" "w")
1732 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1733 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1734 (match_operand:VQW 4 "register_operand" "w")
1735 (match_dup 3))))))]
1736 "TARGET_SIMD"
1737 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1738 [(set_attr "type" "neon_mla_<Vetype>_long")]
1739 )
1740
1741 (define_insn "*aarch64_<su>mlal<mode>"
1742 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1743 (plus:<VWIDE>
1744 (mult:<VWIDE>
1745 (ANY_EXTEND:<VWIDE>
1746 (match_operand:VD_BHSI 1 "register_operand" "w"))
1747 (ANY_EXTEND:<VWIDE>
1748 (match_operand:VD_BHSI 2 "register_operand" "w")))
1749 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1750 "TARGET_SIMD"
1751 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1752 [(set_attr "type" "neon_mla_<Vetype>_long")]
1753 )
1754
1755 (define_insn "*aarch64_<su>mlsl<mode>"
1756 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1757 (minus:<VWIDE>
1758 (match_operand:<VWIDE> 1 "register_operand" "0")
1759 (mult:<VWIDE>
1760 (ANY_EXTEND:<VWIDE>
1761 (match_operand:VD_BHSI 2 "register_operand" "w"))
1762 (ANY_EXTEND:<VWIDE>
1763 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1764 "TARGET_SIMD"
1765 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1766 [(set_attr "type" "neon_mla_<Vetype>_long")]
1767 )
1768
1769 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1770 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1771 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1772 (match_operand:VQW 1 "register_operand" "w")
1773 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1774 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1775 (match_operand:VQW 2 "register_operand" "w")
1776 (match_dup 3)))))]
1777 "TARGET_SIMD"
1778 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1779 [(set_attr "type" "neon_mul_<Vetype>_long")]
1780 )
1781
1782 (define_expand "vec_widen_<su>mult_lo_<mode>"
1783 [(match_operand:<VWIDE> 0 "register_operand" "")
1784 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1785 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1786 "TARGET_SIMD"
1787 {
1788 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1789 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1790 operands[1],
1791 operands[2], p));
1792 DONE;
1793 }
1794 )
1795
1796 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1797 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1798 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1799 (match_operand:VQW 1 "register_operand" "w")
1800 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1801 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1802 (match_operand:VQW 2 "register_operand" "w")
1803 (match_dup 3)))))]
1804 "TARGET_SIMD"
1805 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1806 [(set_attr "type" "neon_mul_<Vetype>_long")]
1807 )
1808
1809 (define_expand "vec_widen_<su>mult_hi_<mode>"
1810 [(match_operand:<VWIDE> 0 "register_operand" "")
1811 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1812 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1813 "TARGET_SIMD"
1814 {
1815 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1816 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1817 operands[1],
1818 operands[2], p));
1819 DONE;
1820
1821 }
1822 )
1823
1824 ;; FP vector operations.
1825 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1826 ;; double-precision (64-bit) floating-point data types and arithmetic as
1827 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1828 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1829 ;;
1830 ;; Floating-point operations can raise an exception. Vectorizing such
1831 ;; operations are safe because of reasons explained below.
1832 ;;
1833 ;; ARMv8 permits an extension to enable trapped floating-point
1834 ;; exception handling, however this is an optional feature. In the
1835 ;; event of a floating-point exception being raised by vectorised
1836 ;; code then:
1837 ;; 1. If trapped floating-point exceptions are available, then a trap
1838 ;; will be taken when any lane raises an enabled exception. A trap
1839 ;; handler may determine which lane raised the exception.
1840 ;; 2. Alternatively a sticky exception flag is set in the
1841 ;; floating-point status register (FPSR). Software may explicitly
1842 ;; test the exception flags, in which case the tests will either
1843 ;; prevent vectorisation, allowing precise identification of the
1844 ;; failing operation, or if tested outside of vectorisable regions
1845 ;; then the specific operation and lane are not of interest.
1846
1847 ;; FP arithmetic operations.
1848
1849 (define_insn "add<mode>3"
1850 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1851 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1852 (match_operand:VHSDF 2 "register_operand" "w")))]
1853 "TARGET_SIMD"
1854 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1855 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1856 )
1857
1858 (define_insn "sub<mode>3"
1859 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1860 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1861 (match_operand:VHSDF 2 "register_operand" "w")))]
1862 "TARGET_SIMD"
1863 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1864 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1865 )
1866
1867 (define_insn "mul<mode>3"
1868 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1869 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1870 (match_operand:VHSDF 2 "register_operand" "w")))]
1871 "TARGET_SIMD"
1872 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1873 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1874 )
1875
1876 (define_expand "div<mode>3"
1877 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1878 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1879 (match_operand:VHSDF 2 "register_operand" "w")))]
1880 "TARGET_SIMD"
1881 {
1882 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1883 DONE;
1884
1885 operands[1] = force_reg (<MODE>mode, operands[1]);
1886 })
1887
1888 (define_insn "*div<mode>3"
1889 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1890 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1891 (match_operand:VHSDF 2 "register_operand" "w")))]
1892 "TARGET_SIMD"
1893 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1894 [(set_attr "type" "neon_fp_div_<stype><q>")]
1895 )
1896
1897 (define_insn "neg<mode>2"
1898 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1899 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1900 "TARGET_SIMD"
1901 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1902 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1903 )
1904
1905 (define_insn "abs<mode>2"
1906 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1907 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1908 "TARGET_SIMD"
1909 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1910 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1911 )
1912
1913 (define_insn "fma<mode>4"
1914 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1915 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1916 (match_operand:VHSDF 2 "register_operand" "w")
1917 (match_operand:VHSDF 3 "register_operand" "0")))]
1918 "TARGET_SIMD"
1919 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1920 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1921 )
1922
1923 (define_insn "*aarch64_fma4_elt<mode>"
1924 [(set (match_operand:VDQF 0 "register_operand" "=w")
1925 (fma:VDQF
1926 (vec_duplicate:VDQF
1927 (vec_select:<VEL>
1928 (match_operand:VDQF 1 "register_operand" "<h_con>")
1929 (parallel [(match_operand:SI 2 "immediate_operand")])))
1930 (match_operand:VDQF 3 "register_operand" "w")
1931 (match_operand:VDQF 4 "register_operand" "0")))]
1932 "TARGET_SIMD"
1933 {
1934 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1935 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1936 }
1937 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1938 )
1939
1940 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1941 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1942 (fma:VDQSF
1943 (vec_duplicate:VDQSF
1944 (vec_select:<VEL>
1945 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1946 (parallel [(match_operand:SI 2 "immediate_operand")])))
1947 (match_operand:VDQSF 3 "register_operand" "w")
1948 (match_operand:VDQSF 4 "register_operand" "0")))]
1949 "TARGET_SIMD"
1950 {
1951 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1952 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1953 }
1954 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1955 )
1956
1957 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1958 [(set (match_operand:VMUL 0 "register_operand" "=w")
1959 (fma:VMUL
1960 (vec_duplicate:VMUL
1961 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1962 (match_operand:VMUL 2 "register_operand" "w")
1963 (match_operand:VMUL 3 "register_operand" "0")))]
1964 "TARGET_SIMD"
1965 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1966 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1967 )
1968
1969 (define_insn "*aarch64_fma4_elt_to_64v2df"
1970 [(set (match_operand:DF 0 "register_operand" "=w")
1971 (fma:DF
1972 (vec_select:DF
1973 (match_operand:V2DF 1 "register_operand" "w")
1974 (parallel [(match_operand:SI 2 "immediate_operand")]))
1975 (match_operand:DF 3 "register_operand" "w")
1976 (match_operand:DF 4 "register_operand" "0")))]
1977 "TARGET_SIMD"
1978 {
1979 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1980 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1981 }
1982 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1983 )
1984
1985 (define_insn "fnma<mode>4"
1986 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1987 (fma:VHSDF
1988 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1989 (match_operand:VHSDF 2 "register_operand" "w")
1990 (match_operand:VHSDF 3 "register_operand" "0")))]
1991 "TARGET_SIMD"
1992 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1993 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1994 )
1995
1996 (define_insn "*aarch64_fnma4_elt<mode>"
1997 [(set (match_operand:VDQF 0 "register_operand" "=w")
1998 (fma:VDQF
1999 (neg:VDQF
2000 (match_operand:VDQF 3 "register_operand" "w"))
2001 (vec_duplicate:VDQF
2002 (vec_select:<VEL>
2003 (match_operand:VDQF 1 "register_operand" "<h_con>")
2004 (parallel [(match_operand:SI 2 "immediate_operand")])))
2005 (match_operand:VDQF 4 "register_operand" "0")))]
2006 "TARGET_SIMD"
2007 {
2008 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2009 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2010 }
2011 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2012 )
2013
2014 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2015 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2016 (fma:VDQSF
2017 (neg:VDQSF
2018 (match_operand:VDQSF 3 "register_operand" "w"))
2019 (vec_duplicate:VDQSF
2020 (vec_select:<VEL>
2021 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2022 (parallel [(match_operand:SI 2 "immediate_operand")])))
2023 (match_operand:VDQSF 4 "register_operand" "0")))]
2024 "TARGET_SIMD"
2025 {
2026 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2027 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2028 }
2029 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2030 )
2031
2032 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2033 [(set (match_operand:VMUL 0 "register_operand" "=w")
2034 (fma:VMUL
2035 (neg:VMUL
2036 (match_operand:VMUL 2 "register_operand" "w"))
2037 (vec_duplicate:VMUL
2038 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2039 (match_operand:VMUL 3 "register_operand" "0")))]
2040 "TARGET_SIMD"
2041 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2042 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2043 )
2044
2045 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2046 [(set (match_operand:DF 0 "register_operand" "=w")
2047 (fma:DF
2048 (vec_select:DF
2049 (match_operand:V2DF 1 "register_operand" "w")
2050 (parallel [(match_operand:SI 2 "immediate_operand")]))
2051 (neg:DF
2052 (match_operand:DF 3 "register_operand" "w"))
2053 (match_operand:DF 4 "register_operand" "0")))]
2054 "TARGET_SIMD"
2055 {
2056 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2057 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2058 }
2059 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2060 )
2061
2062 ;; Vector versions of the floating-point frint patterns.
2063 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2064 (define_insn "<frint_pattern><mode>2"
2065 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2066 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2067 FRINT))]
2068 "TARGET_SIMD"
2069 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2070 [(set_attr "type" "neon_fp_round_<stype><q>")]
2071 )
2072
2073 ;; Vector versions of the fcvt standard patterns.
2074 ;; Expands to lbtrunc, lround, lceil, lfloor
2075 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2076 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2077 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2078 [(match_operand:VHSDF 1 "register_operand" "w")]
2079 FCVT)))]
2080 "TARGET_SIMD"
2081 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2082 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2083 )
2084
2085 ;; HF Scalar variants of related SIMD instructions.
2086 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2087 [(set (match_operand:HI 0 "register_operand" "=w")
2088 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2089 FCVT)))]
2090 "TARGET_SIMD_F16INST"
2091 "fcvt<frint_suffix><su>\t%h0, %h1"
2092 [(set_attr "type" "neon_fp_to_int_s")]
2093 )
2094
2095 (define_insn "<optab>_trunchfhi2"
2096 [(set (match_operand:HI 0 "register_operand" "=w")
2097 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2098 "TARGET_SIMD_F16INST"
2099 "fcvtz<su>\t%h0, %h1"
2100 [(set_attr "type" "neon_fp_to_int_s")]
2101 )
2102
2103 (define_insn "<optab>hihf2"
2104 [(set (match_operand:HF 0 "register_operand" "=w")
2105 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2106 "TARGET_SIMD_F16INST"
2107 "<su_optab>cvtf\t%h0, %h1"
2108 [(set_attr "type" "neon_int_to_fp_s")]
2109 )
2110
2111 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2112 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2113 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2114 [(mult:VDQF
2115 (match_operand:VDQF 1 "register_operand" "w")
2116 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2117 UNSPEC_FRINTZ)))]
2118 "TARGET_SIMD
2119 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2120 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2121 {
2122 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2123 char buf[64];
2124 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2125 output_asm_insn (buf, operands);
2126 return "";
2127 }
2128 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2129 )
2130
2131 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2132 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2133 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2134 [(match_operand:VHSDF 1 "register_operand")]
2135 UNSPEC_FRINTZ)))]
2136 "TARGET_SIMD"
2137 {})
2138
2139 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2140 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2141 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2142 [(match_operand:VHSDF 1 "register_operand")]
2143 UNSPEC_FRINTZ)))]
2144 "TARGET_SIMD"
2145 {})
2146
2147 (define_expand "ftrunc<VHSDF:mode>2"
2148 [(set (match_operand:VHSDF 0 "register_operand")
2149 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2150 UNSPEC_FRINTZ))]
2151 "TARGET_SIMD"
2152 {})
2153
2154 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2155 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2156 (FLOATUORS:VHSDF
2157 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2158 "TARGET_SIMD"
2159 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2160 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2161 )
2162
2163 ;; Conversions between vectors of floats and doubles.
2164 ;; Contains a mix of patterns to match standard pattern names
2165 ;; and those for intrinsics.
2166
2167 ;; Float widening operations.
2168
2169 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2170 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2171 (float_extend:<VWIDE> (vec_select:<VHALF>
2172 (match_operand:VQ_HSF 1 "register_operand" "w")
2173 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2174 )))]
2175 "TARGET_SIMD"
2176 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2177 [(set_attr "type" "neon_fp_cvt_widen_s")]
2178 )
2179
2180 ;; Convert between fixed-point and floating-point (vector modes)
2181
2182 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2183 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2184 (unspec:<VHSDF:FCVT_TARGET>
2185 [(match_operand:VHSDF 1 "register_operand" "w")
2186 (match_operand:SI 2 "immediate_operand" "i")]
2187 FCVT_F2FIXED))]
2188 "TARGET_SIMD"
2189 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2190 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2191 )
2192
2193 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2194 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2195 (unspec:<VDQ_HSDI:FCVT_TARGET>
2196 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2197 (match_operand:SI 2 "immediate_operand" "i")]
2198 FCVT_FIXED2F))]
2199 "TARGET_SIMD"
2200 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2201 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2202 )
2203
2204 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2205 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2206 ;; the meaning of HI and LO changes depending on the target endianness.
2207 ;; While elsewhere we map the higher numbered elements of a vector to
2208 ;; the lower architectural lanes of the vector, for these patterns we want
2209 ;; to always treat "hi" as referring to the higher architectural lanes.
2210 ;; Consequently, while the patterns below look inconsistent with our
2211 ;; other big-endian patterns their behavior is as required.
2212
2213 (define_expand "vec_unpacks_lo_<mode>"
2214 [(match_operand:<VWIDE> 0 "register_operand" "")
2215 (match_operand:VQ_HSF 1 "register_operand" "")]
2216 "TARGET_SIMD"
2217 {
2218 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2219 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2220 operands[1], p));
2221 DONE;
2222 }
2223 )
2224
2225 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2226 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2227 (float_extend:<VWIDE> (vec_select:<VHALF>
2228 (match_operand:VQ_HSF 1 "register_operand" "w")
2229 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2230 )))]
2231 "TARGET_SIMD"
2232 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2233 [(set_attr "type" "neon_fp_cvt_widen_s")]
2234 )
2235
2236 (define_expand "vec_unpacks_hi_<mode>"
2237 [(match_operand:<VWIDE> 0 "register_operand" "")
2238 (match_operand:VQ_HSF 1 "register_operand" "")]
2239 "TARGET_SIMD"
2240 {
2241 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2242 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2243 operands[1], p));
2244 DONE;
2245 }
2246 )
2247 (define_insn "aarch64_float_extend_lo_<Vwide>"
2248 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2249 (float_extend:<VWIDE>
2250 (match_operand:VDF 1 "register_operand" "w")))]
2251 "TARGET_SIMD"
2252 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2253 [(set_attr "type" "neon_fp_cvt_widen_s")]
2254 )
2255
2256 ;; Float narrowing operations.
2257
2258 (define_insn "aarch64_float_truncate_lo_<mode>"
2259 [(set (match_operand:VDF 0 "register_operand" "=w")
2260 (float_truncate:VDF
2261 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2262 "TARGET_SIMD"
2263 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2264 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2265 )
2266
2267 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2268 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2269 (vec_concat:<VDBL>
2270 (match_operand:VDF 1 "register_operand" "0")
2271 (float_truncate:VDF
2272 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2273 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2274 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2275 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2276 )
2277
2278 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2279 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2280 (vec_concat:<VDBL>
2281 (float_truncate:VDF
2282 (match_operand:<VWIDE> 2 "register_operand" "w"))
2283 (match_operand:VDF 1 "register_operand" "0")))]
2284 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2285 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2286 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2287 )
2288
2289 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2290 [(match_operand:<VDBL> 0 "register_operand" "=w")
2291 (match_operand:VDF 1 "register_operand" "0")
2292 (match_operand:<VWIDE> 2 "register_operand" "w")]
2293 "TARGET_SIMD"
2294 {
2295 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2296 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2297 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2298 emit_insn (gen (operands[0], operands[1], operands[2]));
2299 DONE;
2300 }
2301 )
2302
2303 (define_expand "vec_pack_trunc_v2df"
2304 [(set (match_operand:V4SF 0 "register_operand")
2305 (vec_concat:V4SF
2306 (float_truncate:V2SF
2307 (match_operand:V2DF 1 "register_operand"))
2308 (float_truncate:V2SF
2309 (match_operand:V2DF 2 "register_operand"))
2310 ))]
2311 "TARGET_SIMD"
2312 {
2313 rtx tmp = gen_reg_rtx (V2SFmode);
2314 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2315 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2316
2317 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2318 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2319 tmp, operands[hi]));
2320 DONE;
2321 }
2322 )
2323
2324 (define_expand "vec_pack_trunc_df"
2325 [(set (match_operand:V2SF 0 "register_operand")
2326 (vec_concat:V2SF
2327 (float_truncate:SF
2328 (match_operand:DF 1 "register_operand"))
2329 (float_truncate:SF
2330 (match_operand:DF 2 "register_operand"))
2331 ))]
2332 "TARGET_SIMD"
2333 {
2334 rtx tmp = gen_reg_rtx (V2SFmode);
2335 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2336 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2337
2338 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2339 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2340 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2341 DONE;
2342 }
2343 )
2344
2345 ;; FP Max/Min
2346 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2347 ;; expression like:
2348 ;; a = (b < c) ? b : c;
2349 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2350 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2351 ;; -ffast-math.
2352 ;;
2353 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2354 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2355 ;; operand will be returned when both operands are zero (i.e. they may not
2356 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2357 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2358 ;; NaNs.
2359
2360 (define_insn "<su><maxmin><mode>3"
2361 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2362 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2363 (match_operand:VHSDF 2 "register_operand" "w")))]
2364 "TARGET_SIMD"
2365 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2366 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2367 )
2368
2369 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2370 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2371 ;; which implement the IEEE fmax ()/fmin () functions.
2372 (define_insn "<maxmin_uns><mode>3"
2373 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2375 (match_operand:VHSDF 2 "register_operand" "w")]
2376 FMAXMIN_UNS))]
2377 "TARGET_SIMD"
2378 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2379 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2380 )
2381
2382 ;; 'across lanes' add.
2383
2384 (define_expand "reduc_plus_scal_<mode>"
2385 [(match_operand:<VEL> 0 "register_operand" "=w")
2386 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2387 UNSPEC_ADDV)]
2388 "TARGET_SIMD"
2389 {
2390 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2391 rtx scratch = gen_reg_rtx (<MODE>mode);
2392 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2393 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2394 DONE;
2395 }
2396 )
2397
2398 (define_insn "aarch64_faddp<mode>"
2399 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2400 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2401 (match_operand:VHSDF 2 "register_operand" "w")]
2402 UNSPEC_FADDV))]
2403 "TARGET_SIMD"
2404 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2405 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2406 )
2407
2408 (define_insn "aarch64_reduc_plus_internal<mode>"
2409 [(set (match_operand:VDQV 0 "register_operand" "=w")
2410 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2411 UNSPEC_ADDV))]
2412 "TARGET_SIMD"
2413 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2414 [(set_attr "type" "neon_reduc_add<q>")]
2415 )
2416
2417 (define_insn "aarch64_reduc_plus_internalv2si"
2418 [(set (match_operand:V2SI 0 "register_operand" "=w")
2419 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2420 UNSPEC_ADDV))]
2421 "TARGET_SIMD"
2422 "addp\\t%0.2s, %1.2s, %1.2s"
2423 [(set_attr "type" "neon_reduc_add")]
2424 )
2425
2426 (define_insn "reduc_plus_scal_<mode>"
2427 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2428 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2429 UNSPEC_FADDV))]
2430 "TARGET_SIMD"
2431 "faddp\\t%<Vetype>0, %1.<Vtype>"
2432 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2433 )
2434
2435 (define_expand "reduc_plus_scal_v4sf"
2436 [(set (match_operand:SF 0 "register_operand")
2437 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2438 UNSPEC_FADDV))]
2439 "TARGET_SIMD"
2440 {
2441 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2442 rtx scratch = gen_reg_rtx (V4SFmode);
2443 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2444 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2445 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2446 DONE;
2447 })
2448
2449 (define_insn "clrsb<mode>2"
2450 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2451 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2452 "TARGET_SIMD"
2453 "cls\\t%0.<Vtype>, %1.<Vtype>"
2454 [(set_attr "type" "neon_cls<q>")]
2455 )
2456
2457 (define_insn "clz<mode>2"
2458 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2459 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2460 "TARGET_SIMD"
2461 "clz\\t%0.<Vtype>, %1.<Vtype>"
2462 [(set_attr "type" "neon_cls<q>")]
2463 )
2464
2465 (define_insn "popcount<mode>2"
2466 [(set (match_operand:VB 0 "register_operand" "=w")
2467 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2468 "TARGET_SIMD"
2469 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2470 [(set_attr "type" "neon_cnt<q>")]
2471 )
2472
2473 ;; 'across lanes' max and min ops.
2474
2475 ;; Template for outputting a scalar, so we can create __builtins which can be
2476 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2477 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2478 [(match_operand:<VEL> 0 "register_operand")
2479 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2480 FMAXMINV)]
2481 "TARGET_SIMD"
2482 {
2483 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2484 rtx scratch = gen_reg_rtx (<MODE>mode);
2485 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2486 operands[1]));
2487 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2488 DONE;
2489 }
2490 )
2491
2492 ;; Likewise for integer cases, signed and unsigned.
2493 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2494 [(match_operand:<VEL> 0 "register_operand")
2495 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2496 MAXMINV)]
2497 "TARGET_SIMD"
2498 {
2499 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2500 rtx scratch = gen_reg_rtx (<MODE>mode);
2501 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2502 operands[1]));
2503 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2504 DONE;
2505 }
2506 )
2507
2508 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2509 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2510 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2511 MAXMINV))]
2512 "TARGET_SIMD"
2513 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2514 [(set_attr "type" "neon_reduc_minmax<q>")]
2515 )
2516
2517 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2518 [(set (match_operand:V2SI 0 "register_operand" "=w")
2519 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2520 MAXMINV))]
2521 "TARGET_SIMD"
2522 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2523 [(set_attr "type" "neon_reduc_minmax")]
2524 )
2525
2526 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2527 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2528 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2529 FMAXMINV))]
2530 "TARGET_SIMD"
2531 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2532 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2533 )
2534
2535 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2536 ;; allocation.
2537 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2538 ;; to select.
2539 ;;
2540 ;; Thus our BSL is of the form:
2541 ;; op0 = bsl (mask, op2, op3)
2542 ;; We can use any of:
2543 ;;
2544 ;; if (op0 = mask)
2545 ;; bsl mask, op1, op2
2546 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2547 ;; bit op0, op2, mask
2548 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2549 ;; bif op0, op1, mask
2550 ;;
2551 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2552 ;; Some forms of straight-line code may generate the equivalent form
2553 ;; in *aarch64_simd_bsl<mode>_alt.
2554
2555 (define_insn "aarch64_simd_bsl<mode>_internal"
2556 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2557 (xor:VDQ_I
2558 (and:VDQ_I
2559 (xor:VDQ_I
2560 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2561 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2562 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2563 (match_dup:<V_INT_EQUIV> 3)
2564 ))]
2565 "TARGET_SIMD"
2566 "@
2567 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2568 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2569 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2570 [(set_attr "type" "neon_bsl<q>")]
2571 )
2572
2573 ;; We need this form in addition to the above pattern to match the case
2574 ;; when combine tries merging three insns such that the second operand of
2575 ;; the outer XOR matches the second operand of the inner XOR rather than
2576 ;; the first. The two are equivalent but since recog doesn't try all
2577 ;; permutations of commutative operations, we have to have a separate pattern.
2578
2579 (define_insn "*aarch64_simd_bsl<mode>_alt"
2580 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2581 (xor:VDQ_I
2582 (and:VDQ_I
2583 (xor:VDQ_I
2584 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2585 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2586 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2587 (match_dup:<V_INT_EQUIV> 2)))]
2588 "TARGET_SIMD"
2589 "@
2590 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2591 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2592 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2593 [(set_attr "type" "neon_bsl<q>")]
2594 )
2595
2596 ;; DImode is special, we want to avoid computing operations which are
2597 ;; more naturally computed in general purpose registers in the vector
2598 ;; registers. If we do that, we need to move all three operands from general
2599 ;; purpose registers to vector registers, then back again. However, we
2600 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2601 ;; optimizations based on the component operations of a BSL.
2602 ;;
2603 ;; That means we need a splitter back to the individual operations, if they
2604 ;; would be better calculated on the integer side.
2605
2606 (define_insn_and_split "aarch64_simd_bsldi_internal"
2607 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2608 (xor:DI
2609 (and:DI
2610 (xor:DI
2611 (match_operand:DI 3 "register_operand" "w,0,w,r")
2612 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2613 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2614 (match_dup:DI 3)
2615 ))]
2616 "TARGET_SIMD"
2617 "@
2618 bsl\\t%0.8b, %2.8b, %3.8b
2619 bit\\t%0.8b, %2.8b, %1.8b
2620 bif\\t%0.8b, %3.8b, %1.8b
2621 #"
2622 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2623 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2624 {
2625 /* Split back to individual operations. If we're before reload, and
2626 able to create a temporary register, do so. If we're after reload,
2627 we've got an early-clobber destination register, so use that.
2628 Otherwise, we can't create pseudos and we can't yet guarantee that
2629 operands[0] is safe to write, so FAIL to split. */
2630
2631 rtx scratch;
2632 if (reload_completed)
2633 scratch = operands[0];
2634 else if (can_create_pseudo_p ())
2635 scratch = gen_reg_rtx (DImode);
2636 else
2637 FAIL;
2638
2639 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2640 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2641 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2642 DONE;
2643 }
2644 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2645 (set_attr "length" "4,4,4,12")]
2646 )
2647
2648 (define_insn_and_split "aarch64_simd_bsldi_alt"
2649 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2650 (xor:DI
2651 (and:DI
2652 (xor:DI
2653 (match_operand:DI 3 "register_operand" "w,w,0,r")
2654 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2655 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2656 (match_dup:DI 2)
2657 ))]
2658 "TARGET_SIMD"
2659 "@
2660 bsl\\t%0.8b, %3.8b, %2.8b
2661 bit\\t%0.8b, %3.8b, %1.8b
2662 bif\\t%0.8b, %2.8b, %1.8b
2663 #"
2664 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2665 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2666 {
2667 /* Split back to individual operations. If we're before reload, and
2668 able to create a temporary register, do so. If we're after reload,
2669 we've got an early-clobber destination register, so use that.
2670 Otherwise, we can't create pseudos and we can't yet guarantee that
2671 operands[0] is safe to write, so FAIL to split. */
2672
2673 rtx scratch;
2674 if (reload_completed)
2675 scratch = operands[0];
2676 else if (can_create_pseudo_p ())
2677 scratch = gen_reg_rtx (DImode);
2678 else
2679 FAIL;
2680
2681 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2682 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2683 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2684 DONE;
2685 }
2686 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2687 (set_attr "length" "4,4,4,12")]
2688 )
2689
2690 (define_expand "aarch64_simd_bsl<mode>"
2691 [(match_operand:VALLDIF 0 "register_operand")
2692 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2693 (match_operand:VALLDIF 2 "register_operand")
2694 (match_operand:VALLDIF 3 "register_operand")]
2695 "TARGET_SIMD"
2696 {
2697 /* We can't alias operands together if they have different modes. */
2698 rtx tmp = operands[0];
2699 if (FLOAT_MODE_P (<MODE>mode))
2700 {
2701 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2702 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2703 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2704 }
2705 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2706 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2707 operands[1],
2708 operands[2],
2709 operands[3]));
2710 if (tmp != operands[0])
2711 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2712
2713 DONE;
2714 })
2715
2716 (define_expand "vcond_mask_<mode><v_int_equiv>"
2717 [(match_operand:VALLDI 0 "register_operand")
2718 (match_operand:VALLDI 1 "nonmemory_operand")
2719 (match_operand:VALLDI 2 "nonmemory_operand")
2720 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2721 "TARGET_SIMD"
2722 {
2723 /* If we have (a = (P) ? -1 : 0);
2724 Then we can simply move the generated mask (result must be int). */
2725 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2726 && operands[2] == CONST0_RTX (<MODE>mode))
2727 emit_move_insn (operands[0], operands[3]);
2728 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2729 else if (operands[1] == CONST0_RTX (<MODE>mode)
2730 && operands[2] == CONSTM1_RTX (<MODE>mode))
2731 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2732 else
2733 {
2734 if (!REG_P (operands[1]))
2735 operands[1] = force_reg (<MODE>mode, operands[1]);
2736 if (!REG_P (operands[2]))
2737 operands[2] = force_reg (<MODE>mode, operands[2]);
2738 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2739 operands[1], operands[2]));
2740 }
2741
2742 DONE;
2743 })
2744
2745 ;; Patterns comparing two vectors to produce a mask.
2746
2747 (define_expand "vec_cmp<mode><mode>"
2748 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2749 (match_operator 1 "comparison_operator"
2750 [(match_operand:VSDQ_I_DI 2 "register_operand")
2751 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2752 "TARGET_SIMD"
2753 {
2754 rtx mask = operands[0];
2755 enum rtx_code code = GET_CODE (operands[1]);
2756
2757 switch (code)
2758 {
2759 case NE:
2760 case LE:
2761 case LT:
2762 case GE:
2763 case GT:
2764 case EQ:
2765 if (operands[3] == CONST0_RTX (<MODE>mode))
2766 break;
2767
2768 /* Fall through. */
2769 default:
2770 if (!REG_P (operands[3]))
2771 operands[3] = force_reg (<MODE>mode, operands[3]);
2772
2773 break;
2774 }
2775
2776 switch (code)
2777 {
2778 case LT:
2779 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2780 break;
2781
2782 case GE:
2783 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2784 break;
2785
2786 case LE:
2787 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2788 break;
2789
2790 case GT:
2791 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2792 break;
2793
2794 case LTU:
2795 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2796 break;
2797
2798 case GEU:
2799 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2800 break;
2801
2802 case LEU:
2803 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2804 break;
2805
2806 case GTU:
2807 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2808 break;
2809
2810 case NE:
2811 /* Handle NE as !EQ. */
2812 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2813 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2814 break;
2815
2816 case EQ:
2817 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2818 break;
2819
2820 default:
2821 gcc_unreachable ();
2822 }
2823
2824 DONE;
2825 })
2826
2827 (define_expand "vec_cmp<mode><v_int_equiv>"
2828 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2829 (match_operator 1 "comparison_operator"
2830 [(match_operand:VDQF 2 "register_operand")
2831 (match_operand:VDQF 3 "nonmemory_operand")]))]
2832 "TARGET_SIMD"
2833 {
2834 int use_zero_form = 0;
2835 enum rtx_code code = GET_CODE (operands[1]);
2836 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2837
2838 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2839
2840 switch (code)
2841 {
2842 case LE:
2843 case LT:
2844 case GE:
2845 case GT:
2846 case EQ:
2847 if (operands[3] == CONST0_RTX (<MODE>mode))
2848 {
2849 use_zero_form = 1;
2850 break;
2851 }
2852 /* Fall through. */
2853 default:
2854 if (!REG_P (operands[3]))
2855 operands[3] = force_reg (<MODE>mode, operands[3]);
2856
2857 break;
2858 }
2859
2860 switch (code)
2861 {
2862 case LT:
2863 if (use_zero_form)
2864 {
2865 comparison = gen_aarch64_cmlt<mode>;
2866 break;
2867 }
2868 /* Fall through. */
2869 case UNLT:
2870 std::swap (operands[2], operands[3]);
2871 /* Fall through. */
2872 case UNGT:
2873 case GT:
2874 comparison = gen_aarch64_cmgt<mode>;
2875 break;
2876 case LE:
2877 if (use_zero_form)
2878 {
2879 comparison = gen_aarch64_cmle<mode>;
2880 break;
2881 }
2882 /* Fall through. */
2883 case UNLE:
2884 std::swap (operands[2], operands[3]);
2885 /* Fall through. */
2886 case UNGE:
2887 case GE:
2888 comparison = gen_aarch64_cmge<mode>;
2889 break;
2890 case NE:
2891 case EQ:
2892 comparison = gen_aarch64_cmeq<mode>;
2893 break;
2894 case UNEQ:
2895 case ORDERED:
2896 case UNORDERED:
2897 case LTGT:
2898 break;
2899 default:
2900 gcc_unreachable ();
2901 }
2902
2903 switch (code)
2904 {
2905 case UNGE:
2906 case UNGT:
2907 case UNLE:
2908 case UNLT:
2909 {
2910 /* All of the above must not raise any FP exceptions. Thus we first
2911 check each operand for NaNs and force any elements containing NaN to
2912 zero before using them in the compare.
2913 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2914 (cm<cc> (isnan (a) ? 0.0 : a,
2915 isnan (b) ? 0.0 : b))
2916 We use the following transformations for doing the comparisions:
2917 a UNGE b -> a GE b
2918 a UNGT b -> a GT b
2919 a UNLE b -> b GE a
2920 a UNLT b -> b GT a. */
2921
2922 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2923 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2924 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2925 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2926 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2927 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2928 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2929 lowpart_subreg (<V_INT_EQUIV>mode,
2930 operands[2],
2931 <MODE>mode)));
2932 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2933 lowpart_subreg (<V_INT_EQUIV>mode,
2934 operands[3],
2935 <MODE>mode)));
2936 gcc_assert (comparison != NULL);
2937 emit_insn (comparison (operands[0],
2938 lowpart_subreg (<MODE>mode,
2939 tmp0, <V_INT_EQUIV>mode),
2940 lowpart_subreg (<MODE>mode,
2941 tmp1, <V_INT_EQUIV>mode)));
2942 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2943 }
2944 break;
2945
2946 case LT:
2947 case LE:
2948 case GT:
2949 case GE:
2950 case EQ:
2951 case NE:
2952 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2953 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2954 a GE b -> a GE b
2955 a GT b -> a GT b
2956 a LE b -> b GE a
2957 a LT b -> b GT a
2958 a EQ b -> a EQ b
2959 a NE b -> ~(a EQ b) */
2960 gcc_assert (comparison != NULL);
2961 emit_insn (comparison (operands[0], operands[2], operands[3]));
2962 if (code == NE)
2963 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2964 break;
2965
2966 case LTGT:
2967 /* LTGT is not guranteed to not generate a FP exception. So let's
2968 go the faster way : ((a > b) || (b > a)). */
2969 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2970 operands[2], operands[3]));
2971 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2972 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2973 break;
2974
2975 case ORDERED:
2976 case UNORDERED:
2977 case UNEQ:
2978 /* cmeq (a, a) & cmeq (b, b). */
2979 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2980 operands[2], operands[2]));
2981 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2982 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2983
2984 if (code == UNORDERED)
2985 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2986 else if (code == UNEQ)
2987 {
2988 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2989 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2990 }
2991 break;
2992
2993 default:
2994 gcc_unreachable ();
2995 }
2996
2997 DONE;
2998 })
2999
3000 (define_expand "vec_cmpu<mode><mode>"
3001 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3002 (match_operator 1 "comparison_operator"
3003 [(match_operand:VSDQ_I_DI 2 "register_operand")
3004 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3005 "TARGET_SIMD"
3006 {
3007 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3008 operands[2], operands[3]));
3009 DONE;
3010 })
3011
3012 (define_expand "vcond<mode><mode>"
3013 [(set (match_operand:VALLDI 0 "register_operand")
3014 (if_then_else:VALLDI
3015 (match_operator 3 "comparison_operator"
3016 [(match_operand:VALLDI 4 "register_operand")
3017 (match_operand:VALLDI 5 "nonmemory_operand")])
3018 (match_operand:VALLDI 1 "nonmemory_operand")
3019 (match_operand:VALLDI 2 "nonmemory_operand")))]
3020 "TARGET_SIMD"
3021 {
3022 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3023 enum rtx_code code = GET_CODE (operands[3]);
3024
3025 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3026 it as well as switch operands 1/2 in order to avoid the additional
3027 NOT instruction. */
3028 if (code == NE)
3029 {
3030 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3031 operands[4], operands[5]);
3032 std::swap (operands[1], operands[2]);
3033 }
3034 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3035 operands[4], operands[5]));
3036 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3037 operands[2], mask));
3038
3039 DONE;
3040 })
3041
3042 (define_expand "vcond<v_cmp_mixed><mode>"
3043 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3044 (if_then_else:<V_cmp_mixed>
3045 (match_operator 3 "comparison_operator"
3046 [(match_operand:VDQF_COND 4 "register_operand")
3047 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3048 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3049 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3050 "TARGET_SIMD"
3051 {
3052 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3053 enum rtx_code code = GET_CODE (operands[3]);
3054
3055 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3056 it as well as switch operands 1/2 in order to avoid the additional
3057 NOT instruction. */
3058 if (code == NE)
3059 {
3060 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3061 operands[4], operands[5]);
3062 std::swap (operands[1], operands[2]);
3063 }
3064 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3065 operands[4], operands[5]));
3066 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3067 operands[0], operands[1],
3068 operands[2], mask));
3069
3070 DONE;
3071 })
3072
3073 (define_expand "vcondu<mode><mode>"
3074 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3075 (if_then_else:VSDQ_I_DI
3076 (match_operator 3 "comparison_operator"
3077 [(match_operand:VSDQ_I_DI 4 "register_operand")
3078 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3079 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3080 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3081 "TARGET_SIMD"
3082 {
3083 rtx mask = gen_reg_rtx (<MODE>mode);
3084 enum rtx_code code = GET_CODE (operands[3]);
3085
3086 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3087 it as well as switch operands 1/2 in order to avoid the additional
3088 NOT instruction. */
3089 if (code == NE)
3090 {
3091 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3092 operands[4], operands[5]);
3093 std::swap (operands[1], operands[2]);
3094 }
3095 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3096 operands[4], operands[5]));
3097 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3098 operands[2], mask));
3099 DONE;
3100 })
3101
3102 (define_expand "vcondu<mode><v_cmp_mixed>"
3103 [(set (match_operand:VDQF 0 "register_operand")
3104 (if_then_else:VDQF
3105 (match_operator 3 "comparison_operator"
3106 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3107 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3108 (match_operand:VDQF 1 "nonmemory_operand")
3109 (match_operand:VDQF 2 "nonmemory_operand")))]
3110 "TARGET_SIMD"
3111 {
3112 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3113 enum rtx_code code = GET_CODE (operands[3]);
3114
3115 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3116 it as well as switch operands 1/2 in order to avoid the additional
3117 NOT instruction. */
3118 if (code == NE)
3119 {
3120 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3121 operands[4], operands[5]);
3122 std::swap (operands[1], operands[2]);
3123 }
3124 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3125 mask, operands[3],
3126 operands[4], operands[5]));
3127 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3128 operands[2], mask));
3129 DONE;
3130 })
3131
3132 ;; Patterns for AArch64 SIMD Intrinsics.
3133
3134 ;; Lane extraction with sign extension to general purpose register.
3135 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3136 [(set (match_operand:GPI 0 "register_operand" "=r")
3137 (sign_extend:GPI
3138 (vec_select:<VEL>
3139 (match_operand:VDQQH 1 "register_operand" "w")
3140 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3141 "TARGET_SIMD"
3142 {
3143 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3144 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3145 }
3146 [(set_attr "type" "neon_to_gp<q>")]
3147 )
3148
3149 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3150 [(set (match_operand:GPI 0 "register_operand" "=r")
3151 (zero_extend:GPI
3152 (vec_select:<VEL>
3153 (match_operand:VDQQH 1 "register_operand" "w")
3154 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3155 "TARGET_SIMD"
3156 {
3157 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3158 INTVAL (operands[2]));
3159 return "umov\\t%w0, %1.<Vetype>[%2]";
3160 }
3161 [(set_attr "type" "neon_to_gp<q>")]
3162 )
3163
3164 ;; Lane extraction of a value, neither sign nor zero extension
3165 ;; is guaranteed so upper bits should be considered undefined.
3166 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3167 (define_insn "aarch64_get_lane<mode>"
3168 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3169 (vec_select:<VEL>
3170 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3171 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3172 "TARGET_SIMD"
3173 {
3174 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3175 switch (which_alternative)
3176 {
3177 case 0:
3178 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3179 case 1:
3180 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3181 case 2:
3182 return "st1\\t{%1.<Vetype>}[%2], %0";
3183 default:
3184 gcc_unreachable ();
3185 }
3186 }
3187 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3188 )
3189
3190 (define_insn "load_pair_lanes<mode>"
3191 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3192 (vec_concat:<VDBL>
3193 (match_operand:VDC 1 "memory_operand" "Utq")
3194 (match_operand:VDC 2 "memory_operand" "m")))]
3195 "TARGET_SIMD && !STRICT_ALIGNMENT
3196 && rtx_equal_p (XEXP (operands[2], 0),
3197 plus_constant (Pmode,
3198 XEXP (operands[1], 0),
3199 GET_MODE_SIZE (<MODE>mode)))"
3200 "ldr\\t%q0, %1"
3201 [(set_attr "type" "neon_load1_1reg_q")]
3202 )
3203
3204 (define_insn "store_pair_lanes<mode>"
3205 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3206 (vec_concat:<VDBL>
3207 (match_operand:VDC 1 "register_operand" "w, r")
3208 (match_operand:VDC 2 "register_operand" "w, r")))]
3209 "TARGET_SIMD"
3210 "@
3211 stp\\t%d1, %d2, %y0
3212 stp\\t%x1, %x2, %y0"
3213 [(set_attr "type" "neon_stp, store_16")]
3214 )
3215
3216 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3217 ;; dest vector.
3218
3219 (define_insn "@aarch64_combinez<mode>"
3220 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3221 (vec_concat:<VDBL>
3222 (match_operand:VDC 1 "general_operand" "w,?r,m")
3223 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3224 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3225 "@
3226 mov\\t%0.8b, %1.8b
3227 fmov\t%d0, %1
3228 ldr\\t%d0, %1"
3229 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3230 (set_attr "arch" "simd,fp,simd")]
3231 )
3232
3233 (define_insn "@aarch64_combinez_be<mode>"
3234 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3235 (vec_concat:<VDBL>
3236 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3237 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3238 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3239 "@
3240 mov\\t%0.8b, %1.8b
3241 fmov\t%d0, %1
3242 ldr\\t%d0, %1"
3243 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3244 (set_attr "arch" "simd,fp,simd")]
3245 )
3246
3247 (define_expand "aarch64_combine<mode>"
3248 [(match_operand:<VDBL> 0 "register_operand")
3249 (match_operand:VDC 1 "register_operand")
3250 (match_operand:VDC 2 "register_operand")]
3251 "TARGET_SIMD"
3252 {
3253 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3254
3255 DONE;
3256 }
3257 )
3258
3259 (define_expand "@aarch64_simd_combine<mode>"
3260 [(match_operand:<VDBL> 0 "register_operand")
3261 (match_operand:VDC 1 "register_operand")
3262 (match_operand:VDC 2 "register_operand")]
3263 "TARGET_SIMD"
3264 {
3265 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3266 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3267 DONE;
3268 }
3269 [(set_attr "type" "multiple")]
3270 )
3271
3272 ;; <su><addsub>l<q>.
3273
3274 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3275 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3276 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3277 (match_operand:VQW 1 "register_operand" "w")
3278 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3279 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3280 (match_operand:VQW 2 "register_operand" "w")
3281 (match_dup 3)))))]
3282 "TARGET_SIMD"
3283 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3284 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3285 )
3286
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3290 (match_operand:VQW 1 "register_operand" "w")
3291 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3292 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3293 (match_operand:VQW 2 "register_operand" "w")
3294 (match_dup 3)))))]
3295 "TARGET_SIMD"
3296 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3297 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3298 )
3299
3300
3301 (define_expand "aarch64_saddl2<mode>"
3302 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3303 (match_operand:VQW 1 "register_operand" "w")
3304 (match_operand:VQW 2 "register_operand" "w")]
3305 "TARGET_SIMD"
3306 {
3307 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3308 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3309 operands[2], p));
3310 DONE;
3311 })
3312
3313 (define_expand "aarch64_uaddl2<mode>"
3314 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3315 (match_operand:VQW 1 "register_operand" "w")
3316 (match_operand:VQW 2 "register_operand" "w")]
3317 "TARGET_SIMD"
3318 {
3319 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3320 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3321 operands[2], p));
3322 DONE;
3323 })
3324
3325 (define_expand "aarch64_ssubl2<mode>"
3326 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3327 (match_operand:VQW 1 "register_operand" "w")
3328 (match_operand:VQW 2 "register_operand" "w")]
3329 "TARGET_SIMD"
3330 {
3331 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3332 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3333 operands[2], p));
3334 DONE;
3335 })
3336
3337 (define_expand "aarch64_usubl2<mode>"
3338 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3339 (match_operand:VQW 1 "register_operand" "w")
3340 (match_operand:VQW 2 "register_operand" "w")]
3341 "TARGET_SIMD"
3342 {
3343 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3344 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3345 operands[2], p));
3346 DONE;
3347 })
3348
3349 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3350 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3351 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3352 (match_operand:VD_BHSI 1 "register_operand" "w"))
3353 (ANY_EXTEND:<VWIDE>
3354 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3355 "TARGET_SIMD"
3356 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3357 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3358 )
3359
3360 ;; <su><addsub>w<q>.
3361
3362 (define_expand "widen_ssum<mode>3"
3363 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3364 (plus:<VDBLW> (sign_extend:<VDBLW>
3365 (match_operand:VQW 1 "register_operand" ""))
3366 (match_operand:<VDBLW> 2 "register_operand" "")))]
3367 "TARGET_SIMD"
3368 {
3369 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3370 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3371
3372 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3373 operands[1], p));
3374 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3375 DONE;
3376 }
3377 )
3378
3379 (define_expand "widen_ssum<mode>3"
3380 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3381 (plus:<VWIDE> (sign_extend:<VWIDE>
3382 (match_operand:VD_BHSI 1 "register_operand" ""))
3383 (match_operand:<VWIDE> 2 "register_operand" "")))]
3384 "TARGET_SIMD"
3385 {
3386 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3387 DONE;
3388 })
3389
3390 (define_expand "widen_usum<mode>3"
3391 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3392 (plus:<VDBLW> (zero_extend:<VDBLW>
3393 (match_operand:VQW 1 "register_operand" ""))
3394 (match_operand:<VDBLW> 2 "register_operand" "")))]
3395 "TARGET_SIMD"
3396 {
3397 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3398 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3399
3400 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3401 operands[1], p));
3402 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3403 DONE;
3404 }
3405 )
3406
3407 (define_expand "widen_usum<mode>3"
3408 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3409 (plus:<VWIDE> (zero_extend:<VWIDE>
3410 (match_operand:VD_BHSI 1 "register_operand" ""))
3411 (match_operand:<VWIDE> 2 "register_operand" "")))]
3412 "TARGET_SIMD"
3413 {
3414 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3415 DONE;
3416 })
3417
3418 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3419 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3420 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3421 (ANY_EXTEND:<VWIDE>
3422 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3423 "TARGET_SIMD"
3424 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3425 [(set_attr "type" "neon_sub_widen")]
3426 )
3427
3428 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3429 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3430 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3431 (ANY_EXTEND:<VWIDE>
3432 (vec_select:<VHALF>
3433 (match_operand:VQW 2 "register_operand" "w")
3434 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3435 "TARGET_SIMD"
3436 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3437 [(set_attr "type" "neon_sub_widen")]
3438 )
3439
3440 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3441 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3442 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3443 (ANY_EXTEND:<VWIDE>
3444 (vec_select:<VHALF>
3445 (match_operand:VQW 2 "register_operand" "w")
3446 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3447 "TARGET_SIMD"
3448 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3449 [(set_attr "type" "neon_sub_widen")]
3450 )
3451
3452 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3453 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3454 (plus:<VWIDE>
3455 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3456 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3457 "TARGET_SIMD"
3458 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3459 [(set_attr "type" "neon_add_widen")]
3460 )
3461
3462 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3463 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3464 (plus:<VWIDE>
3465 (ANY_EXTEND:<VWIDE>
3466 (vec_select:<VHALF>
3467 (match_operand:VQW 2 "register_operand" "w")
3468 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3469 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3470 "TARGET_SIMD"
3471 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3472 [(set_attr "type" "neon_add_widen")]
3473 )
3474
3475 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3476 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3477 (plus:<VWIDE>
3478 (ANY_EXTEND:<VWIDE>
3479 (vec_select:<VHALF>
3480 (match_operand:VQW 2 "register_operand" "w")
3481 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3482 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3483 "TARGET_SIMD"
3484 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3485 [(set_attr "type" "neon_add_widen")]
3486 )
3487
3488 (define_expand "aarch64_saddw2<mode>"
3489 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3490 (match_operand:<VWIDE> 1 "register_operand" "w")
3491 (match_operand:VQW 2 "register_operand" "w")]
3492 "TARGET_SIMD"
3493 {
3494 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3495 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3496 operands[2], p));
3497 DONE;
3498 })
3499
3500 (define_expand "aarch64_uaddw2<mode>"
3501 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3502 (match_operand:<VWIDE> 1 "register_operand" "w")
3503 (match_operand:VQW 2 "register_operand" "w")]
3504 "TARGET_SIMD"
3505 {
3506 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3507 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3508 operands[2], p));
3509 DONE;
3510 })
3511
3512
3513 (define_expand "aarch64_ssubw2<mode>"
3514 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3515 (match_operand:<VWIDE> 1 "register_operand" "w")
3516 (match_operand:VQW 2 "register_operand" "w")]
3517 "TARGET_SIMD"
3518 {
3519 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3521 operands[2], p));
3522 DONE;
3523 })
3524
3525 (define_expand "aarch64_usubw2<mode>"
3526 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3527 (match_operand:<VWIDE> 1 "register_operand" "w")
3528 (match_operand:VQW 2 "register_operand" "w")]
3529 "TARGET_SIMD"
3530 {
3531 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3532 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3533 operands[2], p));
3534 DONE;
3535 })
3536
3537 ;; <su><r>h<addsub>.
3538
3539 (define_expand "<u>avg<mode>3_floor"
3540 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3541 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3542 (match_operand:VDQ_BHSI 2 "register_operand")]
3543 HADD))]
3544 "TARGET_SIMD"
3545 )
3546
3547 (define_expand "<u>avg<mode>3_ceil"
3548 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3549 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3550 (match_operand:VDQ_BHSI 2 "register_operand")]
3551 RHADD))]
3552 "TARGET_SIMD"
3553 )
3554
3555 (define_insn "aarch64_<sur>h<addsub><mode>"
3556 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3557 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3558 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3559 HADDSUB))]
3560 "TARGET_SIMD"
3561 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3562 [(set_attr "type" "neon_<addsub>_halve<q>")]
3563 )
3564
3565 ;; <r><addsub>hn<q>.
3566
3567 (define_insn "aarch64_<sur><addsub>hn<mode>"
3568 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3569 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3570 (match_operand:VQN 2 "register_operand" "w")]
3571 ADDSUBHN))]
3572 "TARGET_SIMD"
3573 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3574 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3575 )
3576
3577 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3578 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3579 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3580 (match_operand:VQN 2 "register_operand" "w")
3581 (match_operand:VQN 3 "register_operand" "w")]
3582 ADDSUBHN2))]
3583 "TARGET_SIMD"
3584 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3585 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3586 )
3587
3588 ;; pmul.
3589
3590 (define_insn "aarch64_pmul<mode>"
3591 [(set (match_operand:VB 0 "register_operand" "=w")
3592 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3593 (match_operand:VB 2 "register_operand" "w")]
3594 UNSPEC_PMUL))]
3595 "TARGET_SIMD"
3596 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3597 [(set_attr "type" "neon_mul_<Vetype><q>")]
3598 )
3599
3600 ;; fmulx.
3601
3602 (define_insn "aarch64_fmulx<mode>"
3603 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3604 (unspec:VHSDF_HSDF
3605 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3606 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3607 UNSPEC_FMULX))]
3608 "TARGET_SIMD"
3609 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3610 [(set_attr "type" "neon_fp_mul_<stype>")]
3611 )
3612
3613 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3614
3615 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3616 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3617 (unspec:VDQSF
3618 [(match_operand:VDQSF 1 "register_operand" "w")
3619 (vec_duplicate:VDQSF
3620 (vec_select:<VEL>
3621 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3622 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3623 UNSPEC_FMULX))]
3624 "TARGET_SIMD"
3625 {
3626 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3627 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3628 }
3629 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3630 )
3631
3632 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3633
3634 (define_insn "*aarch64_mulx_elt<mode>"
3635 [(set (match_operand:VDQF 0 "register_operand" "=w")
3636 (unspec:VDQF
3637 [(match_operand:VDQF 1 "register_operand" "w")
3638 (vec_duplicate:VDQF
3639 (vec_select:<VEL>
3640 (match_operand:VDQF 2 "register_operand" "w")
3641 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3642 UNSPEC_FMULX))]
3643 "TARGET_SIMD"
3644 {
3645 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3646 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3647 }
3648 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3649 )
3650
3651 ;; vmulxq_lane
3652
3653 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3654 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3655 (unspec:VHSDF
3656 [(match_operand:VHSDF 1 "register_operand" "w")
3657 (vec_duplicate:VHSDF
3658 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3659 UNSPEC_FMULX))]
3660 "TARGET_SIMD"
3661 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3662 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3663 )
3664
3665 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3666 ;; vmulxd_lane_f64 == vmulx_lane_f64
3667 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3668
3669 (define_insn "*aarch64_vgetfmulx<mode>"
3670 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3671 (unspec:<VEL>
3672 [(match_operand:<VEL> 1 "register_operand" "w")
3673 (vec_select:<VEL>
3674 (match_operand:VDQF 2 "register_operand" "w")
3675 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3676 UNSPEC_FMULX))]
3677 "TARGET_SIMD"
3678 {
3679 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3680 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3681 }
3682 [(set_attr "type" "fmul<Vetype>")]
3683 )
3684 ;; <su>q<addsub>
3685
3686 (define_insn "aarch64_<su_optab><optab><mode>"
3687 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3688 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3689 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3690 "TARGET_SIMD"
3691 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3692 [(set_attr "type" "neon_<optab><q>")]
3693 )
3694
3695 ;; suqadd and usqadd
3696
3697 (define_insn "aarch64_<sur>qadd<mode>"
3698 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3699 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3700 (match_operand:VSDQ_I 2 "register_operand" "w")]
3701 USSUQADD))]
3702 "TARGET_SIMD"
3703 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3704 [(set_attr "type" "neon_qadd<q>")]
3705 )
3706
3707 ;; sqmovun
3708
3709 (define_insn "aarch64_sqmovun<mode>"
3710 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3711 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3712 UNSPEC_SQXTUN))]
3713 "TARGET_SIMD"
3714 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3715 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3716 )
3717
3718 ;; sqmovn and uqmovn
3719
3720 (define_insn "aarch64_<sur>qmovn<mode>"
3721 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3722 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3723 SUQMOVN))]
3724 "TARGET_SIMD"
3725 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3726 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3727 )
3728
3729 ;; <su>q<absneg>
3730
3731 (define_insn "aarch64_s<optab><mode>"
3732 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3733 (UNQOPS:VSDQ_I
3734 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3735 "TARGET_SIMD"
3736 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3737 [(set_attr "type" "neon_<optab><q>")]
3738 )
3739
3740 ;; sq<r>dmulh.
3741
3742 (define_insn "aarch64_sq<r>dmulh<mode>"
3743 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3744 (unspec:VSDQ_HSI
3745 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3746 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3747 VQDMULH))]
3748 "TARGET_SIMD"
3749 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3750 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3751 )
3752
3753 ;; sq<r>dmulh_lane
3754
3755 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3756 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3757 (unspec:VDQHS
3758 [(match_operand:VDQHS 1 "register_operand" "w")
3759 (vec_select:<VEL>
3760 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3761 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3762 VQDMULH))]
3763 "TARGET_SIMD"
3764 "*
3765 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3766 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3767 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3768 )
3769
3770 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3771 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3772 (unspec:VDQHS
3773 [(match_operand:VDQHS 1 "register_operand" "w")
3774 (vec_select:<VEL>
3775 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3776 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3777 VQDMULH))]
3778 "TARGET_SIMD"
3779 "*
3780 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3781 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3782 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3783 )
3784
3785 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3786 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3787 (unspec:SD_HSI
3788 [(match_operand:SD_HSI 1 "register_operand" "w")
3789 (vec_select:<VEL>
3790 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3791 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3792 VQDMULH))]
3793 "TARGET_SIMD"
3794 "*
3795 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3796 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3797 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3798 )
3799
3800 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3801 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3802 (unspec:SD_HSI
3803 [(match_operand:SD_HSI 1 "register_operand" "w")
3804 (vec_select:<VEL>
3805 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3806 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3807 VQDMULH))]
3808 "TARGET_SIMD"
3809 "*
3810 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3811 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3812 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3813 )
3814
3815 ;; sqrdml[as]h.
3816
3817 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3818 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3819 (unspec:VSDQ_HSI
3820 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3821 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3822 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3823 SQRDMLH_AS))]
3824 "TARGET_SIMD_RDMA"
3825 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3826 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3827 )
3828
3829 ;; sqrdml[as]h_lane.
3830
3831 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3832 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3833 (unspec:VDQHS
3834 [(match_operand:VDQHS 1 "register_operand" "0")
3835 (match_operand:VDQHS 2 "register_operand" "w")
3836 (vec_select:<VEL>
3837 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3838 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3839 SQRDMLH_AS))]
3840 "TARGET_SIMD_RDMA"
3841 {
3842 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3843 return
3844 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3845 }
3846 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3847 )
3848
3849 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3850 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3851 (unspec:SD_HSI
3852 [(match_operand:SD_HSI 1 "register_operand" "0")
3853 (match_operand:SD_HSI 2 "register_operand" "w")
3854 (vec_select:<VEL>
3855 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3856 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3857 SQRDMLH_AS))]
3858 "TARGET_SIMD_RDMA"
3859 {
3860 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3861 return
3862 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3863 }
3864 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3865 )
3866
3867 ;; sqrdml[as]h_laneq.
3868
3869 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3870 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3871 (unspec:VDQHS
3872 [(match_operand:VDQHS 1 "register_operand" "0")
3873 (match_operand:VDQHS 2 "register_operand" "w")
3874 (vec_select:<VEL>
3875 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3876 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3877 SQRDMLH_AS))]
3878 "TARGET_SIMD_RDMA"
3879 {
3880 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3881 return
3882 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3883 }
3884 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3885 )
3886
3887 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3888 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3889 (unspec:SD_HSI
3890 [(match_operand:SD_HSI 1 "register_operand" "0")
3891 (match_operand:SD_HSI 2 "register_operand" "w")
3892 (vec_select:<VEL>
3893 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3894 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3895 SQRDMLH_AS))]
3896 "TARGET_SIMD_RDMA"
3897 {
3898 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3899 return
3900 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3901 }
3902 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3903 )
3904
3905 ;; vqdml[sa]l
3906
3907 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3908 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3909 (SBINQOPS:<VWIDE>
3910 (match_operand:<VWIDE> 1 "register_operand" "0")
3911 (ss_ashift:<VWIDE>
3912 (mult:<VWIDE>
3913 (sign_extend:<VWIDE>
3914 (match_operand:VSD_HSI 2 "register_operand" "w"))
3915 (sign_extend:<VWIDE>
3916 (match_operand:VSD_HSI 3 "register_operand" "w")))
3917 (const_int 1))))]
3918 "TARGET_SIMD"
3919 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3920 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3921 )
3922
3923 ;; vqdml[sa]l_lane
3924
3925 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3926 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3927 (SBINQOPS:<VWIDE>
3928 (match_operand:<VWIDE> 1 "register_operand" "0")
3929 (ss_ashift:<VWIDE>
3930 (mult:<VWIDE>
3931 (sign_extend:<VWIDE>
3932 (match_operand:VD_HSI 2 "register_operand" "w"))
3933 (sign_extend:<VWIDE>
3934 (vec_duplicate:VD_HSI
3935 (vec_select:<VEL>
3936 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3937 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3938 ))
3939 (const_int 1))))]
3940 "TARGET_SIMD"
3941 {
3942 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3943 return
3944 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3945 }
3946 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3947 )
3948
3949 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3950 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3951 (SBINQOPS:<VWIDE>
3952 (match_operand:<VWIDE> 1 "register_operand" "0")
3953 (ss_ashift:<VWIDE>
3954 (mult:<VWIDE>
3955 (sign_extend:<VWIDE>
3956 (match_operand:VD_HSI 2 "register_operand" "w"))
3957 (sign_extend:<VWIDE>
3958 (vec_duplicate:VD_HSI
3959 (vec_select:<VEL>
3960 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3961 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3962 ))
3963 (const_int 1))))]
3964 "TARGET_SIMD"
3965 {
3966 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3967 return
3968 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3969 }
3970 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3971 )
3972
3973 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3974 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3975 (SBINQOPS:<VWIDE>
3976 (match_operand:<VWIDE> 1 "register_operand" "0")
3977 (ss_ashift:<VWIDE>
3978 (mult:<VWIDE>
3979 (sign_extend:<VWIDE>
3980 (match_operand:SD_HSI 2 "register_operand" "w"))
3981 (sign_extend:<VWIDE>
3982 (vec_select:<VEL>
3983 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3984 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3985 )
3986 (const_int 1))))]
3987 "TARGET_SIMD"
3988 {
3989 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3990 return
3991 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3992 }
3993 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3994 )
3995
3996 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3997 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3998 (SBINQOPS:<VWIDE>
3999 (match_operand:<VWIDE> 1 "register_operand" "0")
4000 (ss_ashift:<VWIDE>
4001 (mult:<VWIDE>
4002 (sign_extend:<VWIDE>
4003 (match_operand:SD_HSI 2 "register_operand" "w"))
4004 (sign_extend:<VWIDE>
4005 (vec_select:<VEL>
4006 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4007 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4008 )
4009 (const_int 1))))]
4010 "TARGET_SIMD"
4011 {
4012 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4013 return
4014 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4015 }
4016 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4017 )
4018
4019 ;; vqdml[sa]l_n
4020
4021 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4022 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4023 (SBINQOPS:<VWIDE>
4024 (match_operand:<VWIDE> 1 "register_operand" "0")
4025 (ss_ashift:<VWIDE>
4026 (mult:<VWIDE>
4027 (sign_extend:<VWIDE>
4028 (match_operand:VD_HSI 2 "register_operand" "w"))
4029 (sign_extend:<VWIDE>
4030 (vec_duplicate:VD_HSI
4031 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4032 (const_int 1))))]
4033 "TARGET_SIMD"
4034 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4035 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4036 )
4037
4038 ;; sqdml[as]l2
4039
4040 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4041 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4042 (SBINQOPS:<VWIDE>
4043 (match_operand:<VWIDE> 1 "register_operand" "0")
4044 (ss_ashift:<VWIDE>
4045 (mult:<VWIDE>
4046 (sign_extend:<VWIDE>
4047 (vec_select:<VHALF>
4048 (match_operand:VQ_HSI 2 "register_operand" "w")
4049 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4050 (sign_extend:<VWIDE>
4051 (vec_select:<VHALF>
4052 (match_operand:VQ_HSI 3 "register_operand" "w")
4053 (match_dup 4))))
4054 (const_int 1))))]
4055 "TARGET_SIMD"
4056 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4057 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4058 )
4059
4060 (define_expand "aarch64_sqdmlal2<mode>"
4061 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4062 (match_operand:<VWIDE> 1 "register_operand" "w")
4063 (match_operand:VQ_HSI 2 "register_operand" "w")
4064 (match_operand:VQ_HSI 3 "register_operand" "w")]
4065 "TARGET_SIMD"
4066 {
4067 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4068 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4069 operands[2], operands[3], p));
4070 DONE;
4071 })
4072
4073 (define_expand "aarch64_sqdmlsl2<mode>"
4074 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4075 (match_operand:<VWIDE> 1 "register_operand" "w")
4076 (match_operand:VQ_HSI 2 "register_operand" "w")
4077 (match_operand:VQ_HSI 3 "register_operand" "w")]
4078 "TARGET_SIMD"
4079 {
4080 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4081 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4082 operands[2], operands[3], p));
4083 DONE;
4084 })
4085
4086 ;; vqdml[sa]l2_lane
4087
4088 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4089 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4090 (SBINQOPS:<VWIDE>
4091 (match_operand:<VWIDE> 1 "register_operand" "0")
4092 (ss_ashift:<VWIDE>
4093 (mult:<VWIDE>
4094 (sign_extend:<VWIDE>
4095 (vec_select:<VHALF>
4096 (match_operand:VQ_HSI 2 "register_operand" "w")
4097 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4098 (sign_extend:<VWIDE>
4099 (vec_duplicate:<VHALF>
4100 (vec_select:<VEL>
4101 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4102 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4103 ))))
4104 (const_int 1))))]
4105 "TARGET_SIMD"
4106 {
4107 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4108 return
4109 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4110 }
4111 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4112 )
4113
4114 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4115 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4116 (SBINQOPS:<VWIDE>
4117 (match_operand:<VWIDE> 1 "register_operand" "0")
4118 (ss_ashift:<VWIDE>
4119 (mult:<VWIDE>
4120 (sign_extend:<VWIDE>
4121 (vec_select:<VHALF>
4122 (match_operand:VQ_HSI 2 "register_operand" "w")
4123 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4124 (sign_extend:<VWIDE>
4125 (vec_duplicate:<VHALF>
4126 (vec_select:<VEL>
4127 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4128 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4129 ))))
4130 (const_int 1))))]
4131 "TARGET_SIMD"
4132 {
4133 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4134 return
4135 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4136 }
4137 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4138 )
4139
4140 (define_expand "aarch64_sqdmlal2_lane<mode>"
4141 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4142 (match_operand:<VWIDE> 1 "register_operand" "w")
4143 (match_operand:VQ_HSI 2 "register_operand" "w")
4144 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4145 (match_operand:SI 4 "immediate_operand" "i")]
4146 "TARGET_SIMD"
4147 {
4148 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4149 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4150 operands[2], operands[3],
4151 operands[4], p));
4152 DONE;
4153 })
4154
4155 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4156 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4157 (match_operand:<VWIDE> 1 "register_operand" "w")
4158 (match_operand:VQ_HSI 2 "register_operand" "w")
4159 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4160 (match_operand:SI 4 "immediate_operand" "i")]
4161 "TARGET_SIMD"
4162 {
4163 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4164 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4165 operands[2], operands[3],
4166 operands[4], p));
4167 DONE;
4168 })
4169
4170 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4171 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4172 (match_operand:<VWIDE> 1 "register_operand" "w")
4173 (match_operand:VQ_HSI 2 "register_operand" "w")
4174 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4175 (match_operand:SI 4 "immediate_operand" "i")]
4176 "TARGET_SIMD"
4177 {
4178 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4179 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4180 operands[2], operands[3],
4181 operands[4], p));
4182 DONE;
4183 })
4184
4185 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4186 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4187 (match_operand:<VWIDE> 1 "register_operand" "w")
4188 (match_operand:VQ_HSI 2 "register_operand" "w")
4189 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4190 (match_operand:SI 4 "immediate_operand" "i")]
4191 "TARGET_SIMD"
4192 {
4193 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4195 operands[2], operands[3],
4196 operands[4], p));
4197 DONE;
4198 })
4199
4200 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4201 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4202 (SBINQOPS:<VWIDE>
4203 (match_operand:<VWIDE> 1 "register_operand" "0")
4204 (ss_ashift:<VWIDE>
4205 (mult:<VWIDE>
4206 (sign_extend:<VWIDE>
4207 (vec_select:<VHALF>
4208 (match_operand:VQ_HSI 2 "register_operand" "w")
4209 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4210 (sign_extend:<VWIDE>
4211 (vec_duplicate:<VHALF>
4212 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4213 (const_int 1))))]
4214 "TARGET_SIMD"
4215 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4216 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4217 )
4218
4219 (define_expand "aarch64_sqdmlal2_n<mode>"
4220 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4221 (match_operand:<VWIDE> 1 "register_operand" "w")
4222 (match_operand:VQ_HSI 2 "register_operand" "w")
4223 (match_operand:<VEL> 3 "register_operand" "w")]
4224 "TARGET_SIMD"
4225 {
4226 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4227 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4228 operands[2], operands[3],
4229 p));
4230 DONE;
4231 })
4232
4233 (define_expand "aarch64_sqdmlsl2_n<mode>"
4234 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4235 (match_operand:<VWIDE> 1 "register_operand" "w")
4236 (match_operand:VQ_HSI 2 "register_operand" "w")
4237 (match_operand:<VEL> 3 "register_operand" "w")]
4238 "TARGET_SIMD"
4239 {
4240 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4241 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4242 operands[2], operands[3],
4243 p));
4244 DONE;
4245 })
4246
4247 ;; vqdmull
4248
4249 (define_insn "aarch64_sqdmull<mode>"
4250 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4251 (ss_ashift:<VWIDE>
4252 (mult:<VWIDE>
4253 (sign_extend:<VWIDE>
4254 (match_operand:VSD_HSI 1 "register_operand" "w"))
4255 (sign_extend:<VWIDE>
4256 (match_operand:VSD_HSI 2 "register_operand" "w")))
4257 (const_int 1)))]
4258 "TARGET_SIMD"
4259 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4260 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4261 )
4262
4263 ;; vqdmull_lane
4264
4265 (define_insn "aarch64_sqdmull_lane<mode>"
4266 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4267 (ss_ashift:<VWIDE>
4268 (mult:<VWIDE>
4269 (sign_extend:<VWIDE>
4270 (match_operand:VD_HSI 1 "register_operand" "w"))
4271 (sign_extend:<VWIDE>
4272 (vec_duplicate:VD_HSI
4273 (vec_select:<VEL>
4274 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4275 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4276 ))
4277 (const_int 1)))]
4278 "TARGET_SIMD"
4279 {
4280 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4281 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4282 }
4283 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4284 )
4285
4286 (define_insn "aarch64_sqdmull_laneq<mode>"
4287 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4288 (ss_ashift:<VWIDE>
4289 (mult:<VWIDE>
4290 (sign_extend:<VWIDE>
4291 (match_operand:VD_HSI 1 "register_operand" "w"))
4292 (sign_extend:<VWIDE>
4293 (vec_duplicate:VD_HSI
4294 (vec_select:<VEL>
4295 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4296 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4297 ))
4298 (const_int 1)))]
4299 "TARGET_SIMD"
4300 {
4301 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4302 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4303 }
4304 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4305 )
4306
4307 (define_insn "aarch64_sqdmull_lane<mode>"
4308 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4309 (ss_ashift:<VWIDE>
4310 (mult:<VWIDE>
4311 (sign_extend:<VWIDE>
4312 (match_operand:SD_HSI 1 "register_operand" "w"))
4313 (sign_extend:<VWIDE>
4314 (vec_select:<VEL>
4315 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4316 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4317 ))
4318 (const_int 1)))]
4319 "TARGET_SIMD"
4320 {
4321 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4322 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4323 }
4324 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4325 )
4326
4327 (define_insn "aarch64_sqdmull_laneq<mode>"
4328 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4329 (ss_ashift:<VWIDE>
4330 (mult:<VWIDE>
4331 (sign_extend:<VWIDE>
4332 (match_operand:SD_HSI 1 "register_operand" "w"))
4333 (sign_extend:<VWIDE>
4334 (vec_select:<VEL>
4335 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4336 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4337 ))
4338 (const_int 1)))]
4339 "TARGET_SIMD"
4340 {
4341 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4342 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4343 }
4344 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4345 )
4346
4347 ;; vqdmull_n
4348
4349 (define_insn "aarch64_sqdmull_n<mode>"
4350 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4351 (ss_ashift:<VWIDE>
4352 (mult:<VWIDE>
4353 (sign_extend:<VWIDE>
4354 (match_operand:VD_HSI 1 "register_operand" "w"))
4355 (sign_extend:<VWIDE>
4356 (vec_duplicate:VD_HSI
4357 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4358 )
4359 (const_int 1)))]
4360 "TARGET_SIMD"
4361 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4362 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4363 )
4364
4365 ;; vqdmull2
4366
4367
4368
4369 (define_insn "aarch64_sqdmull2<mode>_internal"
4370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4371 (ss_ashift:<VWIDE>
4372 (mult:<VWIDE>
4373 (sign_extend:<VWIDE>
4374 (vec_select:<VHALF>
4375 (match_operand:VQ_HSI 1 "register_operand" "w")
4376 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4377 (sign_extend:<VWIDE>
4378 (vec_select:<VHALF>
4379 (match_operand:VQ_HSI 2 "register_operand" "w")
4380 (match_dup 3)))
4381 )
4382 (const_int 1)))]
4383 "TARGET_SIMD"
4384 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4385 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4386 )
4387
4388 (define_expand "aarch64_sqdmull2<mode>"
4389 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4390 (match_operand:VQ_HSI 1 "register_operand" "w")
4391 (match_operand:VQ_HSI 2 "register_operand" "w")]
4392 "TARGET_SIMD"
4393 {
4394 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4395 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4396 operands[2], p));
4397 DONE;
4398 })
4399
4400 ;; vqdmull2_lane
4401
4402 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4403 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4404 (ss_ashift:<VWIDE>
4405 (mult:<VWIDE>
4406 (sign_extend:<VWIDE>
4407 (vec_select:<VHALF>
4408 (match_operand:VQ_HSI 1 "register_operand" "w")
4409 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4410 (sign_extend:<VWIDE>
4411 (vec_duplicate:<VHALF>
4412 (vec_select:<VEL>
4413 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4414 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4415 ))
4416 (const_int 1)))]
4417 "TARGET_SIMD"
4418 {
4419 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4420 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4421 }
4422 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4423 )
4424
4425 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4427 (ss_ashift:<VWIDE>
4428 (mult:<VWIDE>
4429 (sign_extend:<VWIDE>
4430 (vec_select:<VHALF>
4431 (match_operand:VQ_HSI 1 "register_operand" "w")
4432 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4433 (sign_extend:<VWIDE>
4434 (vec_duplicate:<VHALF>
4435 (vec_select:<VEL>
4436 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4437 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4438 ))
4439 (const_int 1)))]
4440 "TARGET_SIMD"
4441 {
4442 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4443 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4444 }
4445 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4446 )
4447
4448 (define_expand "aarch64_sqdmull2_lane<mode>"
4449 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4450 (match_operand:VQ_HSI 1 "register_operand" "w")
4451 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4452 (match_operand:SI 3 "immediate_operand" "i")]
4453 "TARGET_SIMD"
4454 {
4455 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4456 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4457 operands[2], operands[3],
4458 p));
4459 DONE;
4460 })
4461
4462 (define_expand "aarch64_sqdmull2_laneq<mode>"
4463 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4464 (match_operand:VQ_HSI 1 "register_operand" "w")
4465 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4466 (match_operand:SI 3 "immediate_operand" "i")]
4467 "TARGET_SIMD"
4468 {
4469 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4471 operands[2], operands[3],
4472 p));
4473 DONE;
4474 })
4475
4476 ;; vqdmull2_n
4477
4478 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4479 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4480 (ss_ashift:<VWIDE>
4481 (mult:<VWIDE>
4482 (sign_extend:<VWIDE>
4483 (vec_select:<VHALF>
4484 (match_operand:VQ_HSI 1 "register_operand" "w")
4485 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4486 (sign_extend:<VWIDE>
4487 (vec_duplicate:<VHALF>
4488 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4489 )
4490 (const_int 1)))]
4491 "TARGET_SIMD"
4492 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4493 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4494 )
4495
4496 (define_expand "aarch64_sqdmull2_n<mode>"
4497 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4498 (match_operand:VQ_HSI 1 "register_operand" "w")
4499 (match_operand:<VEL> 2 "register_operand" "w")]
4500 "TARGET_SIMD"
4501 {
4502 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4503 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4504 operands[2], p));
4505 DONE;
4506 })
4507
4508 ;; vshl
4509
4510 (define_insn "aarch64_<sur>shl<mode>"
4511 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4512 (unspec:VSDQ_I_DI
4513 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4514 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4515 VSHL))]
4516 "TARGET_SIMD"
4517 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4518 [(set_attr "type" "neon_shift_reg<q>")]
4519 )
4520
4521
4522 ;; vqshl
4523
4524 (define_insn "aarch64_<sur>q<r>shl<mode>"
4525 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4526 (unspec:VSDQ_I
4527 [(match_operand:VSDQ_I 1 "register_operand" "w")
4528 (match_operand:VSDQ_I 2 "register_operand" "w")]
4529 VQSHL))]
4530 "TARGET_SIMD"
4531 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4532 [(set_attr "type" "neon_sat_shift_reg<q>")]
4533 )
4534
4535 ;; vshll_n
4536
4537 (define_insn "aarch64_<sur>shll_n<mode>"
4538 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4539 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4540 (match_operand:SI 2
4541 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4542 VSHLL))]
4543 "TARGET_SIMD"
4544 {
4545 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4546 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4547 else
4548 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4549 }
4550 [(set_attr "type" "neon_shift_imm_long")]
4551 )
4552
4553 ;; vshll_high_n
4554
4555 (define_insn "aarch64_<sur>shll2_n<mode>"
4556 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4557 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4558 (match_operand:SI 2 "immediate_operand" "i")]
4559 VSHLL))]
4560 "TARGET_SIMD"
4561 {
4562 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4563 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4564 else
4565 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4566 }
4567 [(set_attr "type" "neon_shift_imm_long")]
4568 )
4569
4570 ;; vrshr_n
4571
4572 (define_insn "aarch64_<sur>shr_n<mode>"
4573 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4574 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4575 (match_operand:SI 2
4576 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4577 VRSHR_N))]
4578 "TARGET_SIMD"
4579 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4580 [(set_attr "type" "neon_sat_shift_imm<q>")]
4581 )
4582
4583 ;; v(r)sra_n
4584
4585 (define_insn "aarch64_<sur>sra_n<mode>"
4586 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4587 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4588 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4589 (match_operand:SI 3
4590 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4591 VSRA))]
4592 "TARGET_SIMD"
4593 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4594 [(set_attr "type" "neon_shift_acc<q>")]
4595 )
4596
4597 ;; vs<lr>i_n
4598
4599 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4600 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4601 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4602 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4603 (match_operand:SI 3
4604 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4605 VSLRI))]
4606 "TARGET_SIMD"
4607 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4608 [(set_attr "type" "neon_shift_imm<q>")]
4609 )
4610
4611 ;; vqshl(u)
4612
4613 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4614 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4615 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4616 (match_operand:SI 2
4617 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4618 VQSHL_N))]
4619 "TARGET_SIMD"
4620 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4621 [(set_attr "type" "neon_sat_shift_imm<q>")]
4622 )
4623
4624
4625 ;; vq(r)shr(u)n_n
4626
4627 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4628 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4629 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4630 (match_operand:SI 2
4631 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4632 VQSHRN_N))]
4633 "TARGET_SIMD"
4634 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4635 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4636 )
4637
4638
4639 ;; cm(eq|ge|gt|lt|le)
4640 ;; Note, we have constraints for Dz and Z as different expanders
4641 ;; have different ideas of what should be passed to this pattern.
4642
4643 (define_insn "aarch64_cm<optab><mode>"
4644 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4645 (neg:<V_INT_EQUIV>
4646 (COMPARISONS:<V_INT_EQUIV>
4647 (match_operand:VDQ_I 1 "register_operand" "w,w")
4648 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4649 )))]
4650 "TARGET_SIMD"
4651 "@
4652 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4653 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4654 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4655 )
4656
4657 (define_insn_and_split "aarch64_cm<optab>di"
4658 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4659 (neg:DI
4660 (COMPARISONS:DI
4661 (match_operand:DI 1 "register_operand" "w,w,r")
4662 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4663 )))
4664 (clobber (reg:CC CC_REGNUM))]
4665 "TARGET_SIMD"
4666 "#"
4667 "&& reload_completed"
4668 [(set (match_operand:DI 0 "register_operand")
4669 (neg:DI
4670 (COMPARISONS:DI
4671 (match_operand:DI 1 "register_operand")
4672 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4673 )))]
4674 {
4675 /* If we are in the general purpose register file,
4676 we split to a sequence of comparison and store. */
4677 if (GP_REGNUM_P (REGNO (operands[0]))
4678 && GP_REGNUM_P (REGNO (operands[1])))
4679 {
4680 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4681 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4682 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4683 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4684 DONE;
4685 }
4686 /* Otherwise, we expand to a similar pattern which does not
4687 clobber CC_REGNUM. */
4688 }
4689 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4690 )
4691
4692 (define_insn "*aarch64_cm<optab>di"
4693 [(set (match_operand:DI 0 "register_operand" "=w,w")
4694 (neg:DI
4695 (COMPARISONS:DI
4696 (match_operand:DI 1 "register_operand" "w,w")
4697 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4698 )))]
4699 "TARGET_SIMD && reload_completed"
4700 "@
4701 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4702 cm<optab>\t%d0, %d1, #0"
4703 [(set_attr "type" "neon_compare, neon_compare_zero")]
4704 )
4705
4706 ;; cm(hs|hi)
4707
4708 (define_insn "aarch64_cm<optab><mode>"
4709 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4710 (neg:<V_INT_EQUIV>
4711 (UCOMPARISONS:<V_INT_EQUIV>
4712 (match_operand:VDQ_I 1 "register_operand" "w")
4713 (match_operand:VDQ_I 2 "register_operand" "w")
4714 )))]
4715 "TARGET_SIMD"
4716 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4717 [(set_attr "type" "neon_compare<q>")]
4718 )
4719
4720 (define_insn_and_split "aarch64_cm<optab>di"
4721 [(set (match_operand:DI 0 "register_operand" "=w,r")
4722 (neg:DI
4723 (UCOMPARISONS:DI
4724 (match_operand:DI 1 "register_operand" "w,r")
4725 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4726 )))
4727 (clobber (reg:CC CC_REGNUM))]
4728 "TARGET_SIMD"
4729 "#"
4730 "&& reload_completed"
4731 [(set (match_operand:DI 0 "register_operand")
4732 (neg:DI
4733 (UCOMPARISONS:DI
4734 (match_operand:DI 1 "register_operand")
4735 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4736 )))]
4737 {
4738 /* If we are in the general purpose register file,
4739 we split to a sequence of comparison and store. */
4740 if (GP_REGNUM_P (REGNO (operands[0]))
4741 && GP_REGNUM_P (REGNO (operands[1])))
4742 {
4743 machine_mode mode = CCmode;
4744 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4745 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4746 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4747 DONE;
4748 }
4749 /* Otherwise, we expand to a similar pattern which does not
4750 clobber CC_REGNUM. */
4751 }
4752 [(set_attr "type" "neon_compare,multiple")]
4753 )
4754
4755 (define_insn "*aarch64_cm<optab>di"
4756 [(set (match_operand:DI 0 "register_operand" "=w")
4757 (neg:DI
4758 (UCOMPARISONS:DI
4759 (match_operand:DI 1 "register_operand" "w")
4760 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4761 )))]
4762 "TARGET_SIMD && reload_completed"
4763 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4764 [(set_attr "type" "neon_compare")]
4765 )
4766
4767 ;; cmtst
4768
4769 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4770 ;; we don't have any insns using ne, and aarch64_vcond outputs
4771 ;; not (neg (eq (and x y) 0))
4772 ;; which is rewritten by simplify_rtx as
4773 ;; plus (eq (and x y) 0) -1.
4774
4775 (define_insn "aarch64_cmtst<mode>"
4776 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4777 (plus:<V_INT_EQUIV>
4778 (eq:<V_INT_EQUIV>
4779 (and:VDQ_I
4780 (match_operand:VDQ_I 1 "register_operand" "w")
4781 (match_operand:VDQ_I 2 "register_operand" "w"))
4782 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4783 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4784 ]
4785 "TARGET_SIMD"
4786 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4787 [(set_attr "type" "neon_tst<q>")]
4788 )
4789
4790 (define_insn_and_split "aarch64_cmtstdi"
4791 [(set (match_operand:DI 0 "register_operand" "=w,r")
4792 (neg:DI
4793 (ne:DI
4794 (and:DI
4795 (match_operand:DI 1 "register_operand" "w,r")
4796 (match_operand:DI 2 "register_operand" "w,r"))
4797 (const_int 0))))
4798 (clobber (reg:CC CC_REGNUM))]
4799 "TARGET_SIMD"
4800 "#"
4801 "&& reload_completed"
4802 [(set (match_operand:DI 0 "register_operand")
4803 (neg:DI
4804 (ne:DI
4805 (and:DI
4806 (match_operand:DI 1 "register_operand")
4807 (match_operand:DI 2 "register_operand"))
4808 (const_int 0))))]
4809 {
4810 /* If we are in the general purpose register file,
4811 we split to a sequence of comparison and store. */
4812 if (GP_REGNUM_P (REGNO (operands[0]))
4813 && GP_REGNUM_P (REGNO (operands[1])))
4814 {
4815 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4816 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4817 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4818 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4819 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4820 DONE;
4821 }
4822 /* Otherwise, we expand to a similar pattern which does not
4823 clobber CC_REGNUM. */
4824 }
4825 [(set_attr "type" "neon_tst,multiple")]
4826 )
4827
4828 (define_insn "*aarch64_cmtstdi"
4829 [(set (match_operand:DI 0 "register_operand" "=w")
4830 (neg:DI
4831 (ne:DI
4832 (and:DI
4833 (match_operand:DI 1 "register_operand" "w")
4834 (match_operand:DI 2 "register_operand" "w"))
4835 (const_int 0))))]
4836 "TARGET_SIMD"
4837 "cmtst\t%d0, %d1, %d2"
4838 [(set_attr "type" "neon_tst")]
4839 )
4840
4841 ;; fcm(eq|ge|gt|le|lt)
4842
4843 (define_insn "aarch64_cm<optab><mode>"
4844 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4845 (neg:<V_INT_EQUIV>
4846 (COMPARISONS:<V_INT_EQUIV>
4847 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4848 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4849 )))]
4850 "TARGET_SIMD"
4851 "@
4852 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4853 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4854 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4855 )
4856
4857 ;; fac(ge|gt)
4858 ;; Note we can also handle what would be fac(le|lt) by
4859 ;; generating fac(ge|gt).
4860
4861 (define_insn "aarch64_fac<optab><mode>"
4862 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4863 (neg:<V_INT_EQUIV>
4864 (FAC_COMPARISONS:<V_INT_EQUIV>
4865 (abs:VHSDF_HSDF
4866 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4867 (abs:VHSDF_HSDF
4868 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4869 )))]
4870 "TARGET_SIMD"
4871 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4872 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4873 )
4874
4875 ;; addp
4876
4877 (define_insn "aarch64_addp<mode>"
4878 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4879 (unspec:VD_BHSI
4880 [(match_operand:VD_BHSI 1 "register_operand" "w")
4881 (match_operand:VD_BHSI 2 "register_operand" "w")]
4882 UNSPEC_ADDP))]
4883 "TARGET_SIMD"
4884 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4885 [(set_attr "type" "neon_reduc_add<q>")]
4886 )
4887
4888 (define_insn "aarch64_addpdi"
4889 [(set (match_operand:DI 0 "register_operand" "=w")
4890 (unspec:DI
4891 [(match_operand:V2DI 1 "register_operand" "w")]
4892 UNSPEC_ADDP))]
4893 "TARGET_SIMD"
4894 "addp\t%d0, %1.2d"
4895 [(set_attr "type" "neon_reduc_add")]
4896 )
4897
4898 ;; sqrt
4899
4900 (define_expand "sqrt<mode>2"
4901 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4902 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4903 "TARGET_SIMD"
4904 {
4905 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4906 DONE;
4907 })
4908
4909 (define_insn "*sqrt<mode>2"
4910 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4911 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4912 "TARGET_SIMD"
4913 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4914 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4915 )
4916
4917 ;; Patterns for vector struct loads and stores.
4918
4919 (define_insn "aarch64_simd_ld2<mode>"
4920 [(set (match_operand:OI 0 "register_operand" "=w")
4921 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4922 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4923 UNSPEC_LD2))]
4924 "TARGET_SIMD"
4925 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4926 [(set_attr "type" "neon_load2_2reg<q>")]
4927 )
4928
4929 (define_insn "aarch64_simd_ld2r<mode>"
4930 [(set (match_operand:OI 0 "register_operand" "=w")
4931 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4932 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4933 UNSPEC_LD2_DUP))]
4934 "TARGET_SIMD"
4935 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4936 [(set_attr "type" "neon_load2_all_lanes<q>")]
4937 )
4938
4939 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4940 [(set (match_operand:OI 0 "register_operand" "=w")
4941 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4942 (match_operand:OI 2 "register_operand" "0")
4943 (match_operand:SI 3 "immediate_operand" "i")
4944 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4945 UNSPEC_LD2_LANE))]
4946 "TARGET_SIMD"
4947 {
4948 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4949 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4950 }
4951 [(set_attr "type" "neon_load2_one_lane")]
4952 )
4953
4954 (define_expand "vec_load_lanesoi<mode>"
4955 [(set (match_operand:OI 0 "register_operand" "=w")
4956 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4957 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4958 UNSPEC_LD2))]
4959 "TARGET_SIMD"
4960 {
4961 if (BYTES_BIG_ENDIAN)
4962 {
4963 rtx tmp = gen_reg_rtx (OImode);
4964 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4965 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4966 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4967 }
4968 else
4969 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4970 DONE;
4971 })
4972
4973 (define_insn "aarch64_simd_st2<mode>"
4974 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4975 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4976 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4977 UNSPEC_ST2))]
4978 "TARGET_SIMD"
4979 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4980 [(set_attr "type" "neon_store2_2reg<q>")]
4981 )
4982
4983 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4984 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4985 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4986 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4987 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4988 (match_operand:SI 2 "immediate_operand" "i")]
4989 UNSPEC_ST2_LANE))]
4990 "TARGET_SIMD"
4991 {
4992 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4993 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4994 }
4995 [(set_attr "type" "neon_store2_one_lane<q>")]
4996 )
4997
4998 (define_expand "vec_store_lanesoi<mode>"
4999 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5000 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
5001 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5002 UNSPEC_ST2))]
5003 "TARGET_SIMD"
5004 {
5005 if (BYTES_BIG_ENDIAN)
5006 {
5007 rtx tmp = gen_reg_rtx (OImode);
5008 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5009 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5010 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5011 }
5012 else
5013 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5014 DONE;
5015 })
5016
5017 (define_insn "aarch64_simd_ld3<mode>"
5018 [(set (match_operand:CI 0 "register_operand" "=w")
5019 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5020 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5021 UNSPEC_LD3))]
5022 "TARGET_SIMD"
5023 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5024 [(set_attr "type" "neon_load3_3reg<q>")]
5025 )
5026
5027 (define_insn "aarch64_simd_ld3r<mode>"
5028 [(set (match_operand:CI 0 "register_operand" "=w")
5029 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5030 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5031 UNSPEC_LD3_DUP))]
5032 "TARGET_SIMD"
5033 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5034 [(set_attr "type" "neon_load3_all_lanes<q>")]
5035 )
5036
5037 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5038 [(set (match_operand:CI 0 "register_operand" "=w")
5039 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5040 (match_operand:CI 2 "register_operand" "0")
5041 (match_operand:SI 3 "immediate_operand" "i")
5042 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5043 UNSPEC_LD3_LANE))]
5044 "TARGET_SIMD"
5045 {
5046 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5047 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5048 }
5049 [(set_attr "type" "neon_load3_one_lane")]
5050 )
5051
5052 (define_expand "vec_load_lanesci<mode>"
5053 [(set (match_operand:CI 0 "register_operand" "=w")
5054 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5055 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5056 UNSPEC_LD3))]
5057 "TARGET_SIMD"
5058 {
5059 if (BYTES_BIG_ENDIAN)
5060 {
5061 rtx tmp = gen_reg_rtx (CImode);
5062 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5063 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5064 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5065 }
5066 else
5067 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5068 DONE;
5069 })
5070
5071 (define_insn "aarch64_simd_st3<mode>"
5072 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5073 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5074 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5075 UNSPEC_ST3))]
5076 "TARGET_SIMD"
5077 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5078 [(set_attr "type" "neon_store3_3reg<q>")]
5079 )
5080
5081 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5082 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5083 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5084 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5085 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5086 (match_operand:SI 2 "immediate_operand" "i")]
5087 UNSPEC_ST3_LANE))]
5088 "TARGET_SIMD"
5089 {
5090 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5091 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5092 }
5093 [(set_attr "type" "neon_store3_one_lane<q>")]
5094 )
5095
5096 (define_expand "vec_store_lanesci<mode>"
5097 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5098 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5099 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5100 UNSPEC_ST3))]
5101 "TARGET_SIMD"
5102 {
5103 if (BYTES_BIG_ENDIAN)
5104 {
5105 rtx tmp = gen_reg_rtx (CImode);
5106 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5107 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5108 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5109 }
5110 else
5111 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5112 DONE;
5113 })
5114
5115 (define_insn "aarch64_simd_ld4<mode>"
5116 [(set (match_operand:XI 0 "register_operand" "=w")
5117 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5118 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5119 UNSPEC_LD4))]
5120 "TARGET_SIMD"
5121 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5122 [(set_attr "type" "neon_load4_4reg<q>")]
5123 )
5124
5125 (define_insn "aarch64_simd_ld4r<mode>"
5126 [(set (match_operand:XI 0 "register_operand" "=w")
5127 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5128 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5129 UNSPEC_LD4_DUP))]
5130 "TARGET_SIMD"
5131 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5132 [(set_attr "type" "neon_load4_all_lanes<q>")]
5133 )
5134
5135 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5136 [(set (match_operand:XI 0 "register_operand" "=w")
5137 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5138 (match_operand:XI 2 "register_operand" "0")
5139 (match_operand:SI 3 "immediate_operand" "i")
5140 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5141 UNSPEC_LD4_LANE))]
5142 "TARGET_SIMD"
5143 {
5144 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5145 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5146 }
5147 [(set_attr "type" "neon_load4_one_lane")]
5148 )
5149
5150 (define_expand "vec_load_lanesxi<mode>"
5151 [(set (match_operand:XI 0 "register_operand" "=w")
5152 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5153 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5154 UNSPEC_LD4))]
5155 "TARGET_SIMD"
5156 {
5157 if (BYTES_BIG_ENDIAN)
5158 {
5159 rtx tmp = gen_reg_rtx (XImode);
5160 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5161 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5162 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5163 }
5164 else
5165 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5166 DONE;
5167 })
5168
5169 (define_insn "aarch64_simd_st4<mode>"
5170 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5171 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5172 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5173 UNSPEC_ST4))]
5174 "TARGET_SIMD"
5175 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5176 [(set_attr "type" "neon_store4_4reg<q>")]
5177 )
5178
5179 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5180 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5181 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5182 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5183 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5184 (match_operand:SI 2 "immediate_operand" "i")]
5185 UNSPEC_ST4_LANE))]
5186 "TARGET_SIMD"
5187 {
5188 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5189 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5190 }
5191 [(set_attr "type" "neon_store4_one_lane<q>")]
5192 )
5193
5194 (define_expand "vec_store_lanesxi<mode>"
5195 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5196 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5197 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5198 UNSPEC_ST4))]
5199 "TARGET_SIMD"
5200 {
5201 if (BYTES_BIG_ENDIAN)
5202 {
5203 rtx tmp = gen_reg_rtx (XImode);
5204 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5205 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5206 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5207 }
5208 else
5209 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5210 DONE;
5211 })
5212
5213 (define_insn_and_split "aarch64_rev_reglist<mode>"
5214 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5215 (unspec:VSTRUCT
5216 [(match_operand:VSTRUCT 1 "register_operand" "w")
5217 (match_operand:V16QI 2 "register_operand" "w")]
5218 UNSPEC_REV_REGLIST))]
5219 "TARGET_SIMD"
5220 "#"
5221 "&& reload_completed"
5222 [(const_int 0)]
5223 {
5224 int i;
5225 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5226 for (i = 0; i < nregs; i++)
5227 {
5228 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5229 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5230 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5231 }
5232 DONE;
5233 }
5234 [(set_attr "type" "neon_tbl1_q")
5235 (set_attr "length" "<insn_count>")]
5236 )
5237
5238 ;; Reload patterns for AdvSIMD register list operands.
5239
5240 (define_expand "mov<mode>"
5241 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5242 (match_operand:VSTRUCT 1 "general_operand" ""))]
5243 "TARGET_SIMD"
5244 {
5245 if (can_create_pseudo_p ())
5246 {
5247 if (GET_CODE (operands[0]) != REG)
5248 operands[1] = force_reg (<MODE>mode, operands[1]);
5249 }
5250 })
5251
5252
5253 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5254 [(match_operand:CI 0 "register_operand" "=w")
5255 (match_operand:DI 1 "register_operand" "r")
5256 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5257 "TARGET_SIMD"
5258 {
5259 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5260 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5261 DONE;
5262 })
5263
5264 (define_insn "aarch64_ld1_x3_<mode>"
5265 [(set (match_operand:CI 0 "register_operand" "=w")
5266 (unspec:CI
5267 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5268 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5269 "TARGET_SIMD"
5270 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5271 [(set_attr "type" "neon_load1_3reg<q>")]
5272 )
5273
5274 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5275 [(match_operand:DI 0 "register_operand" "")
5276 (match_operand:OI 1 "register_operand" "")
5277 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5278 "TARGET_SIMD"
5279 {
5280 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5281 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5282 DONE;
5283 })
5284
5285 (define_insn "aarch64_st1_x2_<mode>"
5286 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5287 (unspec:OI
5288 [(match_operand:OI 1 "register_operand" "w")
5289 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5290 "TARGET_SIMD"
5291 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5292 [(set_attr "type" "neon_store1_2reg<q>")]
5293 )
5294
5295 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5296 [(match_operand:DI 0 "register_operand" "")
5297 (match_operand:CI 1 "register_operand" "")
5298 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5299 "TARGET_SIMD"
5300 {
5301 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5302 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5303 DONE;
5304 })
5305
5306 (define_insn "aarch64_st1_x3_<mode>"
5307 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5308 (unspec:CI
5309 [(match_operand:CI 1 "register_operand" "w")
5310 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5311 "TARGET_SIMD"
5312 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5313 [(set_attr "type" "neon_store1_3reg<q>")]
5314 )
5315
5316 (define_insn "*aarch64_mov<mode>"
5317 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5318 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5319 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5320 && (register_operand (operands[0], <MODE>mode)
5321 || register_operand (operands[1], <MODE>mode))"
5322 "@
5323 #
5324 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5325 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5326 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5327 neon_load<nregs>_<nregs>reg_q")
5328 (set_attr "length" "<insn_count>,4,4")]
5329 )
5330
5331 (define_insn "aarch64_be_ld1<mode>"
5332 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5333 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5334 "aarch64_simd_struct_operand" "Utv")]
5335 UNSPEC_LD1))]
5336 "TARGET_SIMD"
5337 "ld1\\t{%0<Vmtype>}, %1"
5338 [(set_attr "type" "neon_load1_1reg<q>")]
5339 )
5340
5341 (define_insn "aarch64_be_st1<mode>"
5342 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5343 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5344 UNSPEC_ST1))]
5345 "TARGET_SIMD"
5346 "st1\\t{%1<Vmtype>}, %0"
5347 [(set_attr "type" "neon_store1_1reg<q>")]
5348 )
5349
5350 (define_insn "*aarch64_be_movoi"
5351 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5352 (match_operand:OI 1 "general_operand" " w,w,m"))]
5353 "TARGET_SIMD && BYTES_BIG_ENDIAN
5354 && (register_operand (operands[0], OImode)
5355 || register_operand (operands[1], OImode))"
5356 "@
5357 #
5358 stp\\t%q1, %R1, %0
5359 ldp\\t%q0, %R0, %1"
5360 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5361 (set_attr "length" "8,4,4")]
5362 )
5363
5364 (define_insn "*aarch64_be_movci"
5365 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5366 (match_operand:CI 1 "general_operand" " w,w,o"))]
5367 "TARGET_SIMD && BYTES_BIG_ENDIAN
5368 && (register_operand (operands[0], CImode)
5369 || register_operand (operands[1], CImode))"
5370 "#"
5371 [(set_attr "type" "multiple")
5372 (set_attr "length" "12,4,4")]
5373 )
5374
5375 (define_insn "*aarch64_be_movxi"
5376 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5377 (match_operand:XI 1 "general_operand" " w,w,o"))]
5378 "TARGET_SIMD && BYTES_BIG_ENDIAN
5379 && (register_operand (operands[0], XImode)
5380 || register_operand (operands[1], XImode))"
5381 "#"
5382 [(set_attr "type" "multiple")
5383 (set_attr "length" "16,4,4")]
5384 )
5385
5386 (define_split
5387 [(set (match_operand:OI 0 "register_operand")
5388 (match_operand:OI 1 "register_operand"))]
5389 "TARGET_SIMD && reload_completed"
5390 [(const_int 0)]
5391 {
5392 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5393 DONE;
5394 })
5395
5396 (define_split
5397 [(set (match_operand:CI 0 "nonimmediate_operand")
5398 (match_operand:CI 1 "general_operand"))]
5399 "TARGET_SIMD && reload_completed"
5400 [(const_int 0)]
5401 {
5402 if (register_operand (operands[0], CImode)
5403 && register_operand (operands[1], CImode))
5404 {
5405 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5406 DONE;
5407 }
5408 else if (BYTES_BIG_ENDIAN)
5409 {
5410 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5411 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5412 emit_move_insn (gen_lowpart (V16QImode,
5413 simplify_gen_subreg (TImode, operands[0],
5414 CImode, 32)),
5415 gen_lowpart (V16QImode,
5416 simplify_gen_subreg (TImode, operands[1],
5417 CImode, 32)));
5418 DONE;
5419 }
5420 else
5421 FAIL;
5422 })
5423
5424 (define_split
5425 [(set (match_operand:XI 0 "nonimmediate_operand")
5426 (match_operand:XI 1 "general_operand"))]
5427 "TARGET_SIMD && reload_completed"
5428 [(const_int 0)]
5429 {
5430 if (register_operand (operands[0], XImode)
5431 && register_operand (operands[1], XImode))
5432 {
5433 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5434 DONE;
5435 }
5436 else if (BYTES_BIG_ENDIAN)
5437 {
5438 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5439 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5440 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5441 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5442 DONE;
5443 }
5444 else
5445 FAIL;
5446 })
5447
5448 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5449 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5450 (match_operand:DI 1 "register_operand" "w")
5451 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5452 "TARGET_SIMD"
5453 {
5454 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5455 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5456 * <VSTRUCT:nregs>);
5457
5458 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5459 mem));
5460 DONE;
5461 })
5462
5463 (define_insn "aarch64_ld2<mode>_dreg"
5464 [(set (match_operand:OI 0 "register_operand" "=w")
5465 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5466 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5467 UNSPEC_LD2_DREG))]
5468 "TARGET_SIMD"
5469 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5470 [(set_attr "type" "neon_load2_2reg<q>")]
5471 )
5472
5473 (define_insn "aarch64_ld2<mode>_dreg"
5474 [(set (match_operand:OI 0 "register_operand" "=w")
5475 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5476 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5477 UNSPEC_LD2_DREG))]
5478 "TARGET_SIMD"
5479 "ld1\\t{%S0.1d - %T0.1d}, %1"
5480 [(set_attr "type" "neon_load1_2reg<q>")]
5481 )
5482
5483 (define_insn "aarch64_ld3<mode>_dreg"
5484 [(set (match_operand:CI 0 "register_operand" "=w")
5485 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5486 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5487 UNSPEC_LD3_DREG))]
5488 "TARGET_SIMD"
5489 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5490 [(set_attr "type" "neon_load3_3reg<q>")]
5491 )
5492
5493 (define_insn "aarch64_ld3<mode>_dreg"
5494 [(set (match_operand:CI 0 "register_operand" "=w")
5495 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5496 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5497 UNSPEC_LD3_DREG))]
5498 "TARGET_SIMD"
5499 "ld1\\t{%S0.1d - %U0.1d}, %1"
5500 [(set_attr "type" "neon_load1_3reg<q>")]
5501 )
5502
5503 (define_insn "aarch64_ld4<mode>_dreg"
5504 [(set (match_operand:XI 0 "register_operand" "=w")
5505 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5506 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5507 UNSPEC_LD4_DREG))]
5508 "TARGET_SIMD"
5509 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5510 [(set_attr "type" "neon_load4_4reg<q>")]
5511 )
5512
5513 (define_insn "aarch64_ld4<mode>_dreg"
5514 [(set (match_operand:XI 0 "register_operand" "=w")
5515 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5516 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5517 UNSPEC_LD4_DREG))]
5518 "TARGET_SIMD"
5519 "ld1\\t{%S0.1d - %V0.1d}, %1"
5520 [(set_attr "type" "neon_load1_4reg<q>")]
5521 )
5522
5523 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5524 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5525 (match_operand:DI 1 "register_operand" "r")
5526 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5527 "TARGET_SIMD"
5528 {
5529 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5530 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5531
5532 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5533 DONE;
5534 })
5535
5536 (define_expand "aarch64_ld1<VALL_F16:mode>"
5537 [(match_operand:VALL_F16 0 "register_operand")
5538 (match_operand:DI 1 "register_operand")]
5539 "TARGET_SIMD"
5540 {
5541 machine_mode mode = <VALL_F16:MODE>mode;
5542 rtx mem = gen_rtx_MEM (mode, operands[1]);
5543
5544 if (BYTES_BIG_ENDIAN)
5545 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5546 else
5547 emit_move_insn (operands[0], mem);
5548 DONE;
5549 })
5550
5551 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5552 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5553 (match_operand:DI 1 "register_operand" "r")
5554 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5555 "TARGET_SIMD"
5556 {
5557 machine_mode mode = <VSTRUCT:MODE>mode;
5558 rtx mem = gen_rtx_MEM (mode, operands[1]);
5559
5560 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5561 DONE;
5562 })
5563
5564 (define_expand "aarch64_ld1x2<VQ:mode>"
5565 [(match_operand:OI 0 "register_operand" "=w")
5566 (match_operand:DI 1 "register_operand" "r")
5567 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568 "TARGET_SIMD"
5569 {
5570 machine_mode mode = OImode;
5571 rtx mem = gen_rtx_MEM (mode, operands[1]);
5572
5573 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5574 DONE;
5575 })
5576
5577 (define_expand "aarch64_ld1x2<VDC:mode>"
5578 [(match_operand:OI 0 "register_operand" "=w")
5579 (match_operand:DI 1 "register_operand" "r")
5580 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5581 "TARGET_SIMD"
5582 {
5583 machine_mode mode = OImode;
5584 rtx mem = gen_rtx_MEM (mode, operands[1]);
5585
5586 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5587 DONE;
5588 })
5589
5590
5591 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5592 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5593 (match_operand:DI 1 "register_operand" "w")
5594 (match_operand:VSTRUCT 2 "register_operand" "0")
5595 (match_operand:SI 3 "immediate_operand" "i")
5596 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5597 "TARGET_SIMD"
5598 {
5599 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5600 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5601 * <VSTRUCT:nregs>);
5602
5603 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5604 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5605 operands[0], mem, operands[2], operands[3]));
5606 DONE;
5607 })
5608
5609 ;; Expanders for builtins to extract vector registers from large
5610 ;; opaque integer modes.
5611
5612 ;; D-register list.
5613
5614 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5615 [(match_operand:VDC 0 "register_operand" "=w")
5616 (match_operand:VSTRUCT 1 "register_operand" "w")
5617 (match_operand:SI 2 "immediate_operand" "i")]
5618 "TARGET_SIMD"
5619 {
5620 int part = INTVAL (operands[2]);
5621 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5622 int offset = part * 16;
5623
5624 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5625 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5626 DONE;
5627 })
5628
5629 ;; Q-register list.
5630
5631 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5632 [(match_operand:VQ 0 "register_operand" "=w")
5633 (match_operand:VSTRUCT 1 "register_operand" "w")
5634 (match_operand:SI 2 "immediate_operand" "i")]
5635 "TARGET_SIMD"
5636 {
5637 int part = INTVAL (operands[2]);
5638 int offset = part * 16;
5639
5640 emit_move_insn (operands[0],
5641 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5642 DONE;
5643 })
5644
5645 ;; Permuted-store expanders for neon intrinsics.
5646
5647 ;; Permute instructions
5648
5649 ;; vec_perm support
5650
5651 (define_expand "vec_perm<mode>"
5652 [(match_operand:VB 0 "register_operand")
5653 (match_operand:VB 1 "register_operand")
5654 (match_operand:VB 2 "register_operand")
5655 (match_operand:VB 3 "register_operand")]
5656 "TARGET_SIMD"
5657 {
5658 aarch64_expand_vec_perm (operands[0], operands[1],
5659 operands[2], operands[3], <nunits>);
5660 DONE;
5661 })
5662
5663 (define_insn "aarch64_tbl1<mode>"
5664 [(set (match_operand:VB 0 "register_operand" "=w")
5665 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5666 (match_operand:VB 2 "register_operand" "w")]
5667 UNSPEC_TBL))]
5668 "TARGET_SIMD"
5669 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5670 [(set_attr "type" "neon_tbl1<q>")]
5671 )
5672
5673 ;; Two source registers.
5674
5675 (define_insn "aarch64_tbl2v16qi"
5676 [(set (match_operand:V16QI 0 "register_operand" "=w")
5677 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5678 (match_operand:V16QI 2 "register_operand" "w")]
5679 UNSPEC_TBL))]
5680 "TARGET_SIMD"
5681 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5682 [(set_attr "type" "neon_tbl2_q")]
5683 )
5684
5685 (define_insn "aarch64_tbl3<mode>"
5686 [(set (match_operand:VB 0 "register_operand" "=w")
5687 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5688 (match_operand:VB 2 "register_operand" "w")]
5689 UNSPEC_TBL))]
5690 "TARGET_SIMD"
5691 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5692 [(set_attr "type" "neon_tbl3")]
5693 )
5694
5695 (define_insn "aarch64_tbx4<mode>"
5696 [(set (match_operand:VB 0 "register_operand" "=w")
5697 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5698 (match_operand:OI 2 "register_operand" "w")
5699 (match_operand:VB 3 "register_operand" "w")]
5700 UNSPEC_TBX))]
5701 "TARGET_SIMD"
5702 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5703 [(set_attr "type" "neon_tbl4")]
5704 )
5705
5706 ;; Three source registers.
5707
5708 (define_insn "aarch64_qtbl3<mode>"
5709 [(set (match_operand:VB 0 "register_operand" "=w")
5710 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5711 (match_operand:VB 2 "register_operand" "w")]
5712 UNSPEC_TBL))]
5713 "TARGET_SIMD"
5714 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5715 [(set_attr "type" "neon_tbl3")]
5716 )
5717
5718 (define_insn "aarch64_qtbx3<mode>"
5719 [(set (match_operand:VB 0 "register_operand" "=w")
5720 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5721 (match_operand:CI 2 "register_operand" "w")
5722 (match_operand:VB 3 "register_operand" "w")]
5723 UNSPEC_TBX))]
5724 "TARGET_SIMD"
5725 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5726 [(set_attr "type" "neon_tbl3")]
5727 )
5728
5729 ;; Four source registers.
5730
5731 (define_insn "aarch64_qtbl4<mode>"
5732 [(set (match_operand:VB 0 "register_operand" "=w")
5733 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5734 (match_operand:VB 2 "register_operand" "w")]
5735 UNSPEC_TBL))]
5736 "TARGET_SIMD"
5737 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5738 [(set_attr "type" "neon_tbl4")]
5739 )
5740
5741 (define_insn "aarch64_qtbx4<mode>"
5742 [(set (match_operand:VB 0 "register_operand" "=w")
5743 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5744 (match_operand:XI 2 "register_operand" "w")
5745 (match_operand:VB 3 "register_operand" "w")]
5746 UNSPEC_TBX))]
5747 "TARGET_SIMD"
5748 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5749 [(set_attr "type" "neon_tbl4")]
5750 )
5751
5752 (define_insn_and_split "aarch64_combinev16qi"
5753 [(set (match_operand:OI 0 "register_operand" "=w")
5754 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5755 (match_operand:V16QI 2 "register_operand" "w")]
5756 UNSPEC_CONCAT))]
5757 "TARGET_SIMD"
5758 "#"
5759 "&& reload_completed"
5760 [(const_int 0)]
5761 {
5762 aarch64_split_combinev16qi (operands);
5763 DONE;
5764 }
5765 [(set_attr "type" "multiple")]
5766 )
5767
5768 ;; This instruction's pattern is generated directly by
5769 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5770 ;; need corresponding changes there.
5771 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5772 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5773 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5774 (match_operand:VALL_F16 2 "register_operand" "w")]
5775 PERMUTE))]
5776 "TARGET_SIMD"
5777 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5778 [(set_attr "type" "neon_permute<q>")]
5779 )
5780
5781 ;; This instruction's pattern is generated directly by
5782 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5783 ;; need corresponding changes there. Note that the immediate (third)
5784 ;; operand is a lane index not a byte index.
5785 (define_insn "aarch64_ext<mode>"
5786 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5787 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5788 (match_operand:VALL_F16 2 "register_operand" "w")
5789 (match_operand:SI 3 "immediate_operand" "i")]
5790 UNSPEC_EXT))]
5791 "TARGET_SIMD"
5792 {
5793 operands[3] = GEN_INT (INTVAL (operands[3])
5794 * GET_MODE_UNIT_SIZE (<MODE>mode));
5795 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5796 }
5797 [(set_attr "type" "neon_ext<q>")]
5798 )
5799
5800 ;; This instruction's pattern is generated directly by
5801 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5802 ;; need corresponding changes there.
5803 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5804 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5805 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5806 REVERSE))]
5807 "TARGET_SIMD"
5808 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5809 [(set_attr "type" "neon_rev<q>")]
5810 )
5811
5812 (define_insn "aarch64_st2<mode>_dreg"
5813 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5814 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5815 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5816 UNSPEC_ST2))]
5817 "TARGET_SIMD"
5818 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5819 [(set_attr "type" "neon_store2_2reg")]
5820 )
5821
5822 (define_insn "aarch64_st2<mode>_dreg"
5823 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5824 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5825 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5826 UNSPEC_ST2))]
5827 "TARGET_SIMD"
5828 "st1\\t{%S1.1d - %T1.1d}, %0"
5829 [(set_attr "type" "neon_store1_2reg")]
5830 )
5831
5832 (define_insn "aarch64_st3<mode>_dreg"
5833 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5834 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5835 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5836 UNSPEC_ST3))]
5837 "TARGET_SIMD"
5838 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5839 [(set_attr "type" "neon_store3_3reg")]
5840 )
5841
5842 (define_insn "aarch64_st3<mode>_dreg"
5843 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5844 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5845 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5846 UNSPEC_ST3))]
5847 "TARGET_SIMD"
5848 "st1\\t{%S1.1d - %U1.1d}, %0"
5849 [(set_attr "type" "neon_store1_3reg")]
5850 )
5851
5852 (define_insn "aarch64_st4<mode>_dreg"
5853 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5854 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5855 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5856 UNSPEC_ST4))]
5857 "TARGET_SIMD"
5858 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5859 [(set_attr "type" "neon_store4_4reg")]
5860 )
5861
5862 (define_insn "aarch64_st4<mode>_dreg"
5863 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5864 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5865 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5866 UNSPEC_ST4))]
5867 "TARGET_SIMD"
5868 "st1\\t{%S1.1d - %V1.1d}, %0"
5869 [(set_attr "type" "neon_store1_4reg")]
5870 )
5871
5872 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5873 [(match_operand:DI 0 "register_operand" "r")
5874 (match_operand:VSTRUCT 1 "register_operand" "w")
5875 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5876 "TARGET_SIMD"
5877 {
5878 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5879 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5880
5881 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5882 DONE;
5883 })
5884
5885 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5886 [(match_operand:DI 0 "register_operand" "r")
5887 (match_operand:VSTRUCT 1 "register_operand" "w")
5888 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5889 "TARGET_SIMD"
5890 {
5891 machine_mode mode = <VSTRUCT:MODE>mode;
5892 rtx mem = gen_rtx_MEM (mode, operands[0]);
5893
5894 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5895 DONE;
5896 })
5897
5898 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5899 [(match_operand:DI 0 "register_operand" "r")
5900 (match_operand:VSTRUCT 1 "register_operand" "w")
5901 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5902 (match_operand:SI 2 "immediate_operand")]
5903 "TARGET_SIMD"
5904 {
5905 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5906 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5907 * <VSTRUCT:nregs>);
5908
5909 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5910 mem, operands[1], operands[2]));
5911 DONE;
5912 })
5913
5914 (define_expand "aarch64_st1<VALL_F16:mode>"
5915 [(match_operand:DI 0 "register_operand")
5916 (match_operand:VALL_F16 1 "register_operand")]
5917 "TARGET_SIMD"
5918 {
5919 machine_mode mode = <VALL_F16:MODE>mode;
5920 rtx mem = gen_rtx_MEM (mode, operands[0]);
5921
5922 if (BYTES_BIG_ENDIAN)
5923 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5924 else
5925 emit_move_insn (mem, operands[1]);
5926 DONE;
5927 })
5928
5929 ;; Expander for builtins to insert vector registers into large
5930 ;; opaque integer modes.
5931
5932 ;; Q-register list. We don't need a D-reg inserter as we zero
5933 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5934
5935 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5936 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5937 (match_operand:VSTRUCT 1 "register_operand" "0")
5938 (match_operand:VQ 2 "register_operand" "w")
5939 (match_operand:SI 3 "immediate_operand" "i")]
5940 "TARGET_SIMD"
5941 {
5942 int part = INTVAL (operands[3]);
5943 int offset = part * 16;
5944
5945 emit_move_insn (operands[0], operands[1]);
5946 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5947 operands[2]);
5948 DONE;
5949 })
5950
5951 ;; Standard pattern name vec_init<mode><Vel>.
5952
5953 (define_expand "vec_init<mode><Vel>"
5954 [(match_operand:VALL_F16 0 "register_operand" "")
5955 (match_operand 1 "" "")]
5956 "TARGET_SIMD"
5957 {
5958 aarch64_expand_vector_init (operands[0], operands[1]);
5959 DONE;
5960 })
5961
5962 (define_expand "vec_init<mode><Vhalf>"
5963 [(match_operand:VQ_NO2E 0 "register_operand" "")
5964 (match_operand 1 "" "")]
5965 "TARGET_SIMD"
5966 {
5967 aarch64_expand_vector_init (operands[0], operands[1]);
5968 DONE;
5969 })
5970
5971 (define_insn "*aarch64_simd_ld1r<mode>"
5972 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5973 (vec_duplicate:VALL_F16
5974 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5975 "TARGET_SIMD"
5976 "ld1r\\t{%0.<Vtype>}, %1"
5977 [(set_attr "type" "neon_load1_all_lanes")]
5978 )
5979
5980 (define_insn "aarch64_simd_ld1<mode>_x2"
5981 [(set (match_operand:OI 0 "register_operand" "=w")
5982 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5983 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5984 UNSPEC_LD1))]
5985 "TARGET_SIMD"
5986 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5987 [(set_attr "type" "neon_load1_2reg<q>")]
5988 )
5989
5990 (define_insn "aarch64_simd_ld1<mode>_x2"
5991 [(set (match_operand:OI 0 "register_operand" "=w")
5992 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5993 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5994 UNSPEC_LD1))]
5995 "TARGET_SIMD"
5996 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5997 [(set_attr "type" "neon_load1_2reg<q>")]
5998 )
5999
6000
6001 (define_insn "@aarch64_frecpe<mode>"
6002 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6003 (unspec:VHSDF_HSDF
6004 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6005 UNSPEC_FRECPE))]
6006 "TARGET_SIMD"
6007 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6008 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6009 )
6010
6011 (define_insn "aarch64_frecpx<mode>"
6012 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6013 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6014 UNSPEC_FRECPX))]
6015 "TARGET_SIMD"
6016 "frecpx\t%<s>0, %<s>1"
6017 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6018 )
6019
6020 (define_insn "@aarch64_frecps<mode>"
6021 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6022 (unspec:VHSDF_HSDF
6023 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6024 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6025 UNSPEC_FRECPS))]
6026 "TARGET_SIMD"
6027 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6028 [(set_attr "type" "neon_fp_recps_<stype><q>")]
6029 )
6030
6031 (define_insn "aarch64_urecpe<mode>"
6032 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6033 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6034 UNSPEC_URECPE))]
6035 "TARGET_SIMD"
6036 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6037 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6038
6039 ;; Standard pattern name vec_extract<mode><Vel>.
6040
6041 (define_expand "vec_extract<mode><Vel>"
6042 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
6043 (match_operand:VALL_F16 1 "register_operand" "")
6044 (match_operand:SI 2 "immediate_operand" "")]
6045 "TARGET_SIMD"
6046 {
6047 emit_insn
6048 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6049 DONE;
6050 })
6051
6052 ;; aes
6053
6054 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6055 [(set (match_operand:V16QI 0 "register_operand" "=w")
6056 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
6057 (match_operand:V16QI 2 "register_operand" "w")]
6058 CRYPTO_AES))]
6059 "TARGET_SIMD && TARGET_AES"
6060 "aes<aes_op>\\t%0.16b, %2.16b"
6061 [(set_attr "type" "crypto_aese")]
6062 )
6063
6064 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6065 [(set (match_operand:V16QI 0 "register_operand" "=w")
6066 (unspec:V16QI [(xor:V16QI
6067 (match_operand:V16QI 1 "register_operand" "%0")
6068 (match_operand:V16QI 2 "register_operand" "w"))
6069 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
6070 CRYPTO_AES))]
6071 "TARGET_SIMD && TARGET_AES"
6072 "aes<aes_op>\\t%0.16b, %2.16b"
6073 [(set_attr "type" "crypto_aese")]
6074 )
6075
6076 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6077 [(set (match_operand:V16QI 0 "register_operand" "=w")
6078 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
6079 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
6080 (match_operand:V16QI 2 "register_operand" "w"))]
6081 CRYPTO_AES))]
6082 "TARGET_SIMD && TARGET_AES"
6083 "aes<aes_op>\\t%0.16b, %2.16b"
6084 [(set_attr "type" "crypto_aese")]
6085 )
6086
6087 ;; When AES/AESMC fusion is enabled we want the register allocation to
6088 ;; look like:
6089 ;; AESE Vn, _
6090 ;; AESMC Vn, Vn
6091 ;; So prefer to tie operand 1 to operand 0 when fusing.
6092
6093 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6094 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
6095 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
6096 CRYPTO_AESMC))]
6097 "TARGET_SIMD && TARGET_AES"
6098 "aes<aesmc_op>\\t%0.16b, %1.16b"
6099 [(set_attr "type" "crypto_aesmc")
6100 (set_attr_alternative "enabled"
6101 [(if_then_else (match_test
6102 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
6103 (const_string "yes" )
6104 (const_string "no"))
6105 (const_string "yes")])]
6106 )
6107
6108 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6109 ;; and enforce the register dependency without scheduling or register
6110 ;; allocation messing up the order or introducing moves inbetween.
6111 ;; Mash the two together during combine.
6112
6113 (define_insn "*aarch64_crypto_aese_fused"
6114 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6115 (unspec:V16QI
6116 [(unspec:V16QI
6117 [(match_operand:V16QI 1 "register_operand" "0")
6118 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
6119 ] UNSPEC_AESMC))]
6120 "TARGET_SIMD && TARGET_AES
6121 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6122 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6123 [(set_attr "type" "crypto_aese")
6124 (set_attr "length" "8")]
6125 )
6126
6127 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6128 ;; and enforce the register dependency without scheduling or register
6129 ;; allocation messing up the order or introducing moves inbetween.
6130 ;; Mash the two together during combine.
6131
6132 (define_insn "*aarch64_crypto_aesd_fused"
6133 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6134 (unspec:V16QI
6135 [(unspec:V16QI
6136 [(match_operand:V16QI 1 "register_operand" "0")
6137 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6138 ] UNSPEC_AESIMC))]
6139 "TARGET_SIMD && TARGET_AES
6140 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6141 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6142 [(set_attr "type" "crypto_aese")
6143 (set_attr "length" "8")]
6144 )
6145
6146 ;; sha1
6147
6148 (define_insn "aarch64_crypto_sha1hsi"
6149 [(set (match_operand:SI 0 "register_operand" "=w")
6150 (unspec:SI [(match_operand:SI 1
6151 "register_operand" "w")]
6152 UNSPEC_SHA1H))]
6153 "TARGET_SIMD && TARGET_SHA2"
6154 "sha1h\\t%s0, %s1"
6155 [(set_attr "type" "crypto_sha1_fast")]
6156 )
6157
6158 (define_insn "aarch64_crypto_sha1hv4si"
6159 [(set (match_operand:SI 0 "register_operand" "=w")
6160 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6161 (parallel [(const_int 0)]))]
6162 UNSPEC_SHA1H))]
6163 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6164 "sha1h\\t%s0, %s1"
6165 [(set_attr "type" "crypto_sha1_fast")]
6166 )
6167
6168 (define_insn "aarch64_be_crypto_sha1hv4si"
6169 [(set (match_operand:SI 0 "register_operand" "=w")
6170 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6171 (parallel [(const_int 3)]))]
6172 UNSPEC_SHA1H))]
6173 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6174 "sha1h\\t%s0, %s1"
6175 [(set_attr "type" "crypto_sha1_fast")]
6176 )
6177
6178 (define_insn "aarch64_crypto_sha1su1v4si"
6179 [(set (match_operand:V4SI 0 "register_operand" "=w")
6180 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6181 (match_operand:V4SI 2 "register_operand" "w")]
6182 UNSPEC_SHA1SU1))]
6183 "TARGET_SIMD && TARGET_SHA2"
6184 "sha1su1\\t%0.4s, %2.4s"
6185 [(set_attr "type" "crypto_sha1_fast")]
6186 )
6187
6188 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6189 [(set (match_operand:V4SI 0 "register_operand" "=w")
6190 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6191 (match_operand:SI 2 "register_operand" "w")
6192 (match_operand:V4SI 3 "register_operand" "w")]
6193 CRYPTO_SHA1))]
6194 "TARGET_SIMD && TARGET_SHA2"
6195 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6196 [(set_attr "type" "crypto_sha1_slow")]
6197 )
6198
6199 (define_insn "aarch64_crypto_sha1su0v4si"
6200 [(set (match_operand:V4SI 0 "register_operand" "=w")
6201 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6202 (match_operand:V4SI 2 "register_operand" "w")
6203 (match_operand:V4SI 3 "register_operand" "w")]
6204 UNSPEC_SHA1SU0))]
6205 "TARGET_SIMD && TARGET_SHA2"
6206 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6207 [(set_attr "type" "crypto_sha1_xor")]
6208 )
6209
6210 ;; sha256
6211
6212 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6213 [(set (match_operand:V4SI 0 "register_operand" "=w")
6214 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6215 (match_operand:V4SI 2 "register_operand" "w")
6216 (match_operand:V4SI 3 "register_operand" "w")]
6217 CRYPTO_SHA256))]
6218 "TARGET_SIMD && TARGET_SHA2"
6219 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6220 [(set_attr "type" "crypto_sha256_slow")]
6221 )
6222
6223 (define_insn "aarch64_crypto_sha256su0v4si"
6224 [(set (match_operand:V4SI 0 "register_operand" "=w")
6225 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6226 (match_operand:V4SI 2 "register_operand" "w")]
6227 UNSPEC_SHA256SU0))]
6228 "TARGET_SIMD && TARGET_SHA2"
6229 "sha256su0\\t%0.4s, %2.4s"
6230 [(set_attr "type" "crypto_sha256_fast")]
6231 )
6232
6233 (define_insn "aarch64_crypto_sha256su1v4si"
6234 [(set (match_operand:V4SI 0 "register_operand" "=w")
6235 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6236 (match_operand:V4SI 2 "register_operand" "w")
6237 (match_operand:V4SI 3 "register_operand" "w")]
6238 UNSPEC_SHA256SU1))]
6239 "TARGET_SIMD && TARGET_SHA2"
6240 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6241 [(set_attr "type" "crypto_sha256_slow")]
6242 )
6243
6244 ;; sha512
6245
6246 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6247 [(set (match_operand:V2DI 0 "register_operand" "=w")
6248 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6249 (match_operand:V2DI 2 "register_operand" "w")
6250 (match_operand:V2DI 3 "register_operand" "w")]
6251 CRYPTO_SHA512))]
6252 "TARGET_SIMD && TARGET_SHA3"
6253 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6254 [(set_attr "type" "crypto_sha512")]
6255 )
6256
6257 (define_insn "aarch64_crypto_sha512su0qv2di"
6258 [(set (match_operand:V2DI 0 "register_operand" "=w")
6259 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6260 (match_operand:V2DI 2 "register_operand" "w")]
6261 UNSPEC_SHA512SU0))]
6262 "TARGET_SIMD && TARGET_SHA3"
6263 "sha512su0\\t%0.2d, %2.2d"
6264 [(set_attr "type" "crypto_sha512")]
6265 )
6266
6267 (define_insn "aarch64_crypto_sha512su1qv2di"
6268 [(set (match_operand:V2DI 0 "register_operand" "=w")
6269 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6270 (match_operand:V2DI 2 "register_operand" "w")
6271 (match_operand:V2DI 3 "register_operand" "w")]
6272 UNSPEC_SHA512SU1))]
6273 "TARGET_SIMD && TARGET_SHA3"
6274 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6275 [(set_attr "type" "crypto_sha512")]
6276 )
6277
6278 ;; sha3
6279
6280 (define_insn "eor3q<mode>4"
6281 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6282 (xor:VQ_I
6283 (xor:VQ_I
6284 (match_operand:VQ_I 2 "register_operand" "w")
6285 (match_operand:VQ_I 3 "register_operand" "w"))
6286 (match_operand:VQ_I 1 "register_operand" "w")))]
6287 "TARGET_SIMD && TARGET_SHA3"
6288 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6289 [(set_attr "type" "crypto_sha3")]
6290 )
6291
6292 (define_insn "aarch64_rax1qv2di"
6293 [(set (match_operand:V2DI 0 "register_operand" "=w")
6294 (xor:V2DI
6295 (rotate:V2DI
6296 (match_operand:V2DI 2 "register_operand" "w")
6297 (const_int 1))
6298 (match_operand:V2DI 1 "register_operand" "w")))]
6299 "TARGET_SIMD && TARGET_SHA3"
6300 "rax1\\t%0.2d, %1.2d, %2.2d"
6301 [(set_attr "type" "crypto_sha3")]
6302 )
6303
6304 (define_insn "aarch64_xarqv2di"
6305 [(set (match_operand:V2DI 0 "register_operand" "=w")
6306 (rotatert:V2DI
6307 (xor:V2DI
6308 (match_operand:V2DI 1 "register_operand" "%w")
6309 (match_operand:V2DI 2 "register_operand" "w"))
6310 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6311 "TARGET_SIMD && TARGET_SHA3"
6312 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6313 [(set_attr "type" "crypto_sha3")]
6314 )
6315
6316 (define_insn "bcaxq<mode>4"
6317 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6318 (xor:VQ_I
6319 (and:VQ_I
6320 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6321 (match_operand:VQ_I 2 "register_operand" "w"))
6322 (match_operand:VQ_I 1 "register_operand" "w")))]
6323 "TARGET_SIMD && TARGET_SHA3"
6324 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6325 [(set_attr "type" "crypto_sha3")]
6326 )
6327
6328 ;; SM3
6329
6330 (define_insn "aarch64_sm3ss1qv4si"
6331 [(set (match_operand:V4SI 0 "register_operand" "=w")
6332 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6333 (match_operand:V4SI 2 "register_operand" "w")
6334 (match_operand:V4SI 3 "register_operand" "w")]
6335 UNSPEC_SM3SS1))]
6336 "TARGET_SIMD && TARGET_SM4"
6337 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6338 [(set_attr "type" "crypto_sm3")]
6339 )
6340
6341
6342 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6343 [(set (match_operand:V4SI 0 "register_operand" "=w")
6344 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6345 (match_operand:V4SI 2 "register_operand" "w")
6346 (match_operand:V4SI 3 "register_operand" "w")
6347 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6348 CRYPTO_SM3TT))]
6349 "TARGET_SIMD && TARGET_SM4"
6350 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6351 [(set_attr "type" "crypto_sm3")]
6352 )
6353
6354 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6355 [(set (match_operand:V4SI 0 "register_operand" "=w")
6356 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6357 (match_operand:V4SI 2 "register_operand" "w")
6358 (match_operand:V4SI 3 "register_operand" "w")]
6359 CRYPTO_SM3PART))]
6360 "TARGET_SIMD && TARGET_SM4"
6361 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6362 [(set_attr "type" "crypto_sm3")]
6363 )
6364
6365 ;; SM4
6366
6367 (define_insn "aarch64_sm4eqv4si"
6368 [(set (match_operand:V4SI 0 "register_operand" "=w")
6369 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6370 (match_operand:V4SI 2 "register_operand" "w")]
6371 UNSPEC_SM4E))]
6372 "TARGET_SIMD && TARGET_SM4"
6373 "sm4e\\t%0.4s, %2.4s"
6374 [(set_attr "type" "crypto_sm4")]
6375 )
6376
6377 (define_insn "aarch64_sm4ekeyqv4si"
6378 [(set (match_operand:V4SI 0 "register_operand" "=w")
6379 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6380 (match_operand:V4SI 2 "register_operand" "w")]
6381 UNSPEC_SM4EKEY))]
6382 "TARGET_SIMD && TARGET_SM4"
6383 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6384 [(set_attr "type" "crypto_sm4")]
6385 )
6386
6387 ;; fp16fml
6388
6389 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6390 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6391 (unspec:VDQSF
6392 [(match_operand:VDQSF 1 "register_operand" "0")
6393 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6394 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6395 VFMLA16_LOW))]
6396 "TARGET_F16FML"
6397 {
6398 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6399 <nunits> * 2, false);
6400 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6401 <nunits> * 2, false);
6402
6403 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6404 operands[1],
6405 operands[2],
6406 operands[3],
6407 p1, p2));
6408 DONE;
6409
6410 })
6411
6412 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6413 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6414 (unspec:VDQSF
6415 [(match_operand:VDQSF 1 "register_operand" "0")
6416 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6417 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6418 VFMLA16_HIGH))]
6419 "TARGET_F16FML"
6420 {
6421 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6422 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6423
6424 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6425 operands[1],
6426 operands[2],
6427 operands[3],
6428 p1, p2));
6429 DONE;
6430 })
6431
6432 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6433 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6434 (fma:VDQSF
6435 (float_extend:VDQSF
6436 (vec_select:<VFMLA_SEL_W>
6437 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6438 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6439 (float_extend:VDQSF
6440 (vec_select:<VFMLA_SEL_W>
6441 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6442 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6443 (match_operand:VDQSF 1 "register_operand" "0")))]
6444 "TARGET_F16FML"
6445 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6446 [(set_attr "type" "neon_fp_mul_s")]
6447 )
6448
6449 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6450 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6451 (fma:VDQSF
6452 (float_extend:VDQSF
6453 (neg:<VFMLA_SEL_W>
6454 (vec_select:<VFMLA_SEL_W>
6455 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6456 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6457 (float_extend:VDQSF
6458 (vec_select:<VFMLA_SEL_W>
6459 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6460 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6461 (match_operand:VDQSF 1 "register_operand" "0")))]
6462 "TARGET_F16FML"
6463 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6464 [(set_attr "type" "neon_fp_mul_s")]
6465 )
6466
6467 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6468 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6469 (fma:VDQSF
6470 (float_extend:VDQSF
6471 (vec_select:<VFMLA_SEL_W>
6472 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6473 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6474 (float_extend:VDQSF
6475 (vec_select:<VFMLA_SEL_W>
6476 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6477 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6478 (match_operand:VDQSF 1 "register_operand" "0")))]
6479 "TARGET_F16FML"
6480 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6481 [(set_attr "type" "neon_fp_mul_s")]
6482 )
6483
6484 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6485 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6486 (fma:VDQSF
6487 (float_extend:VDQSF
6488 (neg:<VFMLA_SEL_W>
6489 (vec_select:<VFMLA_SEL_W>
6490 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6491 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6492 (float_extend:VDQSF
6493 (vec_select:<VFMLA_SEL_W>
6494 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6495 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6496 (match_operand:VDQSF 1 "register_operand" "0")))]
6497 "TARGET_F16FML"
6498 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6499 [(set_attr "type" "neon_fp_mul_s")]
6500 )
6501
6502 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6503 [(set (match_operand:V2SF 0 "register_operand" "")
6504 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6505 (match_operand:V4HF 2 "register_operand" "")
6506 (match_operand:V4HF 3 "register_operand" "")
6507 (match_operand:SI 4 "aarch64_imm2" "")]
6508 VFMLA16_LOW))]
6509 "TARGET_F16FML"
6510 {
6511 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6512 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6513
6514 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6515 operands[1],
6516 operands[2],
6517 operands[3],
6518 p1, lane));
6519 DONE;
6520 }
6521 )
6522
6523 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6524 [(set (match_operand:V2SF 0 "register_operand" "")
6525 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6526 (match_operand:V4HF 2 "register_operand" "")
6527 (match_operand:V4HF 3 "register_operand" "")
6528 (match_operand:SI 4 "aarch64_imm2" "")]
6529 VFMLA16_HIGH))]
6530 "TARGET_F16FML"
6531 {
6532 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6533 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6534
6535 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6536 operands[1],
6537 operands[2],
6538 operands[3],
6539 p1, lane));
6540 DONE;
6541 })
6542
6543 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6544 [(set (match_operand:V2SF 0 "register_operand" "=w")
6545 (fma:V2SF
6546 (float_extend:V2SF
6547 (vec_select:V2HF
6548 (match_operand:V4HF 2 "register_operand" "w")
6549 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6550 (float_extend:V2SF
6551 (vec_duplicate:V2HF
6552 (vec_select:HF
6553 (match_operand:V4HF 3 "register_operand" "x")
6554 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6555 (match_operand:V2SF 1 "register_operand" "0")))]
6556 "TARGET_F16FML"
6557 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6558 [(set_attr "type" "neon_fp_mul_s")]
6559 )
6560
6561 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6562 [(set (match_operand:V2SF 0 "register_operand" "=w")
6563 (fma:V2SF
6564 (float_extend:V2SF
6565 (neg:V2HF
6566 (vec_select:V2HF
6567 (match_operand:V4HF 2 "register_operand" "w")
6568 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6569 (float_extend:V2SF
6570 (vec_duplicate:V2HF
6571 (vec_select:HF
6572 (match_operand:V4HF 3 "register_operand" "x")
6573 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6574 (match_operand:V2SF 1 "register_operand" "0")))]
6575 "TARGET_F16FML"
6576 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6577 [(set_attr "type" "neon_fp_mul_s")]
6578 )
6579
6580 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6581 [(set (match_operand:V2SF 0 "register_operand" "=w")
6582 (fma:V2SF
6583 (float_extend:V2SF
6584 (vec_select:V2HF
6585 (match_operand:V4HF 2 "register_operand" "w")
6586 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6587 (float_extend:V2SF
6588 (vec_duplicate:V2HF
6589 (vec_select:HF
6590 (match_operand:V4HF 3 "register_operand" "x")
6591 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6592 (match_operand:V2SF 1 "register_operand" "0")))]
6593 "TARGET_F16FML"
6594 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6595 [(set_attr "type" "neon_fp_mul_s")]
6596 )
6597
6598 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6599 [(set (match_operand:V2SF 0 "register_operand" "=w")
6600 (fma:V2SF
6601 (float_extend:V2SF
6602 (neg:V2HF
6603 (vec_select:V2HF
6604 (match_operand:V4HF 2 "register_operand" "w")
6605 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6606 (float_extend:V2SF
6607 (vec_duplicate:V2HF
6608 (vec_select:HF
6609 (match_operand:V4HF 3 "register_operand" "x")
6610 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6611 (match_operand:V2SF 1 "register_operand" "0")))]
6612 "TARGET_F16FML"
6613 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6614 [(set_attr "type" "neon_fp_mul_s")]
6615 )
6616
6617 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6618 [(set (match_operand:V4SF 0 "register_operand" "")
6619 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6620 (match_operand:V8HF 2 "register_operand" "")
6621 (match_operand:V8HF 3 "register_operand" "")
6622 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6623 VFMLA16_LOW))]
6624 "TARGET_F16FML"
6625 {
6626 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6627 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6628
6629 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6630 operands[1],
6631 operands[2],
6632 operands[3],
6633 p1, lane));
6634 DONE;
6635 })
6636
6637 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6638 [(set (match_operand:V4SF 0 "register_operand" "")
6639 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6640 (match_operand:V8HF 2 "register_operand" "")
6641 (match_operand:V8HF 3 "register_operand" "")
6642 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6643 VFMLA16_HIGH))]
6644 "TARGET_F16FML"
6645 {
6646 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6647 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6648
6649 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6650 operands[1],
6651 operands[2],
6652 operands[3],
6653 p1, lane));
6654 DONE;
6655 })
6656
6657 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6658 [(set (match_operand:V4SF 0 "register_operand" "=w")
6659 (fma:V4SF
6660 (float_extend:V4SF
6661 (vec_select:V4HF
6662 (match_operand:V8HF 2 "register_operand" "w")
6663 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6664 (float_extend:V4SF
6665 (vec_duplicate:V4HF
6666 (vec_select:HF
6667 (match_operand:V8HF 3 "register_operand" "x")
6668 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6669 (match_operand:V4SF 1 "register_operand" "0")))]
6670 "TARGET_F16FML"
6671 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6672 [(set_attr "type" "neon_fp_mul_s")]
6673 )
6674
6675 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6676 [(set (match_operand:V4SF 0 "register_operand" "=w")
6677 (fma:V4SF
6678 (float_extend:V4SF
6679 (neg:V4HF
6680 (vec_select:V4HF
6681 (match_operand:V8HF 2 "register_operand" "w")
6682 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6683 (float_extend:V4SF
6684 (vec_duplicate:V4HF
6685 (vec_select:HF
6686 (match_operand:V8HF 3 "register_operand" "x")
6687 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6688 (match_operand:V4SF 1 "register_operand" "0")))]
6689 "TARGET_F16FML"
6690 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6691 [(set_attr "type" "neon_fp_mul_s")]
6692 )
6693
6694 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6695 [(set (match_operand:V4SF 0 "register_operand" "=w")
6696 (fma:V4SF
6697 (float_extend:V4SF
6698 (vec_select:V4HF
6699 (match_operand:V8HF 2 "register_operand" "w")
6700 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6701 (float_extend:V4SF
6702 (vec_duplicate:V4HF
6703 (vec_select:HF
6704 (match_operand:V8HF 3 "register_operand" "x")
6705 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6706 (match_operand:V4SF 1 "register_operand" "0")))]
6707 "TARGET_F16FML"
6708 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6709 [(set_attr "type" "neon_fp_mul_s")]
6710 )
6711
6712 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6713 [(set (match_operand:V4SF 0 "register_operand" "=w")
6714 (fma:V4SF
6715 (float_extend:V4SF
6716 (neg:V4HF
6717 (vec_select:V4HF
6718 (match_operand:V8HF 2 "register_operand" "w")
6719 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6720 (float_extend:V4SF
6721 (vec_duplicate:V4HF
6722 (vec_select:HF
6723 (match_operand:V8HF 3 "register_operand" "x")
6724 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6725 (match_operand:V4SF 1 "register_operand" "0")))]
6726 "TARGET_F16FML"
6727 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6728 [(set_attr "type" "neon_fp_mul_s")]
6729 )
6730
6731 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6732 [(set (match_operand:V2SF 0 "register_operand" "")
6733 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6734 (match_operand:V4HF 2 "register_operand" "")
6735 (match_operand:V8HF 3 "register_operand" "")
6736 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6737 VFMLA16_LOW))]
6738 "TARGET_F16FML"
6739 {
6740 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6741 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6742
6743 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6744 operands[1],
6745 operands[2],
6746 operands[3],
6747 p1, lane));
6748 DONE;
6749
6750 })
6751
6752 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6753 [(set (match_operand:V2SF 0 "register_operand" "")
6754 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6755 (match_operand:V4HF 2 "register_operand" "")
6756 (match_operand:V8HF 3 "register_operand" "")
6757 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6758 VFMLA16_HIGH))]
6759 "TARGET_F16FML"
6760 {
6761 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6762 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6763
6764 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6765 operands[1],
6766 operands[2],
6767 operands[3],
6768 p1, lane));
6769 DONE;
6770
6771 })
6772
6773 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6774 [(set (match_operand:V2SF 0 "register_operand" "=w")
6775 (fma:V2SF
6776 (float_extend:V2SF
6777 (vec_select:V2HF
6778 (match_operand:V4HF 2 "register_operand" "w")
6779 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6780 (float_extend:V2SF
6781 (vec_duplicate:V2HF
6782 (vec_select:HF
6783 (match_operand:V8HF 3 "register_operand" "x")
6784 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6785 (match_operand:V2SF 1 "register_operand" "0")))]
6786 "TARGET_F16FML"
6787 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6788 [(set_attr "type" "neon_fp_mul_s")]
6789 )
6790
6791 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6792 [(set (match_operand:V2SF 0 "register_operand" "=w")
6793 (fma:V2SF
6794 (float_extend:V2SF
6795 (neg:V2HF
6796 (vec_select:V2HF
6797 (match_operand:V4HF 2 "register_operand" "w")
6798 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6799 (float_extend:V2SF
6800 (vec_duplicate:V2HF
6801 (vec_select:HF
6802 (match_operand:V8HF 3 "register_operand" "x")
6803 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6804 (match_operand:V2SF 1 "register_operand" "0")))]
6805 "TARGET_F16FML"
6806 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6807 [(set_attr "type" "neon_fp_mul_s")]
6808 )
6809
6810 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6811 [(set (match_operand:V2SF 0 "register_operand" "=w")
6812 (fma:V2SF
6813 (float_extend:V2SF
6814 (vec_select:V2HF
6815 (match_operand:V4HF 2 "register_operand" "w")
6816 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6817 (float_extend:V2SF
6818 (vec_duplicate:V2HF
6819 (vec_select:HF
6820 (match_operand:V8HF 3 "register_operand" "x")
6821 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6822 (match_operand:V2SF 1 "register_operand" "0")))]
6823 "TARGET_F16FML"
6824 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6825 [(set_attr "type" "neon_fp_mul_s")]
6826 )
6827
6828 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6829 [(set (match_operand:V2SF 0 "register_operand" "=w")
6830 (fma:V2SF
6831 (float_extend:V2SF
6832 (neg:V2HF
6833 (vec_select:V2HF
6834 (match_operand:V4HF 2 "register_operand" "w")
6835 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6836 (float_extend:V2SF
6837 (vec_duplicate:V2HF
6838 (vec_select:HF
6839 (match_operand:V8HF 3 "register_operand" "x")
6840 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6841 (match_operand:V2SF 1 "register_operand" "0")))]
6842 "TARGET_F16FML"
6843 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6844 [(set_attr "type" "neon_fp_mul_s")]
6845 )
6846
6847 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6848 [(set (match_operand:V4SF 0 "register_operand" "")
6849 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6850 (match_operand:V8HF 2 "register_operand" "")
6851 (match_operand:V4HF 3 "register_operand" "")
6852 (match_operand:SI 4 "aarch64_imm2" "")]
6853 VFMLA16_LOW))]
6854 "TARGET_F16FML"
6855 {
6856 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6857 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6858
6859 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6860 operands[1],
6861 operands[2],
6862 operands[3],
6863 p1, lane));
6864 DONE;
6865 })
6866
6867 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6868 [(set (match_operand:V4SF 0 "register_operand" "")
6869 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6870 (match_operand:V8HF 2 "register_operand" "")
6871 (match_operand:V4HF 3 "register_operand" "")
6872 (match_operand:SI 4 "aarch64_imm2" "")]
6873 VFMLA16_HIGH))]
6874 "TARGET_F16FML"
6875 {
6876 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6877 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6878
6879 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6880 operands[1],
6881 operands[2],
6882 operands[3],
6883 p1, lane));
6884 DONE;
6885 })
6886
6887 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6888 [(set (match_operand:V4SF 0 "register_operand" "=w")
6889 (fma:V4SF
6890 (float_extend:V4SF
6891 (vec_select:V4HF
6892 (match_operand:V8HF 2 "register_operand" "w")
6893 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6894 (float_extend:V4SF
6895 (vec_duplicate:V4HF
6896 (vec_select:HF
6897 (match_operand:V4HF 3 "register_operand" "x")
6898 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6899 (match_operand:V4SF 1 "register_operand" "0")))]
6900 "TARGET_F16FML"
6901 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6902 [(set_attr "type" "neon_fp_mul_s")]
6903 )
6904
6905 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6906 [(set (match_operand:V4SF 0 "register_operand" "=w")
6907 (fma:V4SF
6908 (float_extend:V4SF
6909 (neg:V4HF
6910 (vec_select:V4HF
6911 (match_operand:V8HF 2 "register_operand" "w")
6912 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6913 (float_extend:V4SF
6914 (vec_duplicate:V4HF
6915 (vec_select:HF
6916 (match_operand:V4HF 3 "register_operand" "x")
6917 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6918 (match_operand:V4SF 1 "register_operand" "0")))]
6919 "TARGET_F16FML"
6920 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6921 [(set_attr "type" "neon_fp_mul_s")]
6922 )
6923
6924 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6925 [(set (match_operand:V4SF 0 "register_operand" "=w")
6926 (fma:V4SF
6927 (float_extend:V4SF
6928 (vec_select:V4HF
6929 (match_operand:V8HF 2 "register_operand" "w")
6930 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6931 (float_extend:V4SF
6932 (vec_duplicate:V4HF
6933 (vec_select:HF
6934 (match_operand:V4HF 3 "register_operand" "x")
6935 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6936 (match_operand:V4SF 1 "register_operand" "0")))]
6937 "TARGET_F16FML"
6938 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6939 [(set_attr "type" "neon_fp_mul_s")]
6940 )
6941
6942 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6943 [(set (match_operand:V4SF 0 "register_operand" "=w")
6944 (fma:V4SF
6945 (float_extend:V4SF
6946 (neg:V4HF
6947 (vec_select:V4HF
6948 (match_operand:V8HF 2 "register_operand" "w")
6949 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6950 (float_extend:V4SF
6951 (vec_duplicate:V4HF
6952 (vec_select:HF
6953 (match_operand:V4HF 3 "register_operand" "x")
6954 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6955 (match_operand:V4SF 1 "register_operand" "0")))]
6956 "TARGET_F16FML"
6957 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6958 [(set_attr "type" "neon_fp_mul_s")]
6959 )
6960
6961 ;; pmull
6962
6963 (define_insn "aarch64_crypto_pmulldi"
6964 [(set (match_operand:TI 0 "register_operand" "=w")
6965 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6966 (match_operand:DI 2 "register_operand" "w")]
6967 UNSPEC_PMULL))]
6968 "TARGET_SIMD && TARGET_AES"
6969 "pmull\\t%0.1q, %1.1d, %2.1d"
6970 [(set_attr "type" "crypto_pmull")]
6971 )
6972
6973 (define_insn "aarch64_crypto_pmullv2di"
6974 [(set (match_operand:TI 0 "register_operand" "=w")
6975 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6976 (match_operand:V2DI 2 "register_operand" "w")]
6977 UNSPEC_PMULL2))]
6978 "TARGET_SIMD && TARGET_AES"
6979 "pmull2\\t%0.1q, %1.2d, %2.2d"
6980 [(set_attr "type" "crypto_pmull")]
6981 )