]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
Patch implementing vld1_*_x3, vst1_*_x2 and vst1_*_x3 intrinsics for AARCH64 for...
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umq, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_split
209 [(set (match_operand:VQ 0 "register_operand" "")
210 (match_operand:VQ 1 "register_operand" ""))]
211 "TARGET_SIMD && reload_completed
212 && GP_REGNUM_P (REGNO (operands[0]))
213 && GP_REGNUM_P (REGNO (operands[1]))"
214 [(const_int 0)]
215 {
216 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
217 DONE;
218 })
219
220 (define_split
221 [(set (match_operand:VQ 0 "register_operand" "")
222 (match_operand:VQ 1 "register_operand" ""))]
223 "TARGET_SIMD && reload_completed
224 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
225 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
226 [(const_int 0)]
227 {
228 aarch64_split_simd_move (operands[0], operands[1]);
229 DONE;
230 })
231
232 (define_expand "aarch64_split_simd_mov<mode>"
233 [(set (match_operand:VQ 0)
234 (match_operand:VQ 1))]
235 "TARGET_SIMD"
236 {
237 rtx dst = operands[0];
238 rtx src = operands[1];
239
240 if (GP_REGNUM_P (REGNO (src)))
241 {
242 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
243 rtx src_high_part = gen_highpart (<VHALF>mode, src);
244
245 emit_insn
246 (gen_move_lo_quad_<mode> (dst, src_low_part));
247 emit_insn
248 (gen_move_hi_quad_<mode> (dst, src_high_part));
249 }
250
251 else
252 {
253 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
254 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
255 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
256 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
257
258 emit_insn
259 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
260 emit_insn
261 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
262 }
263 DONE;
264 }
265 )
266
267 (define_insn "aarch64_simd_mov_from_<mode>low"
268 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
269 (vec_select:<VHALF>
270 (match_operand:VQ 1 "register_operand" "w")
271 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
272 "TARGET_SIMD && reload_completed"
273 "umov\t%0, %1.d[0]"
274 [(set_attr "type" "neon_to_gp<q>")
275 (set_attr "length" "4")
276 ])
277
278 (define_insn "aarch64_simd_mov_from_<mode>high"
279 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
280 (vec_select:<VHALF>
281 (match_operand:VQ 1 "register_operand" "w")
282 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
283 "TARGET_SIMD && reload_completed"
284 "umov\t%0, %1.d[1]"
285 [(set_attr "type" "neon_to_gp<q>")
286 (set_attr "length" "4")
287 ])
288
289 (define_insn "orn<mode>3"
290 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
291 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
292 (match_operand:VDQ_I 2 "register_operand" "w")))]
293 "TARGET_SIMD"
294 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
295 [(set_attr "type" "neon_logic<q>")]
296 )
297
298 (define_insn "bic<mode>3"
299 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
300 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
301 (match_operand:VDQ_I 2 "register_operand" "w")))]
302 "TARGET_SIMD"
303 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
304 [(set_attr "type" "neon_logic<q>")]
305 )
306
307 (define_insn "add<mode>3"
308 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
309 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
310 (match_operand:VDQ_I 2 "register_operand" "w")))]
311 "TARGET_SIMD"
312 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
313 [(set_attr "type" "neon_add<q>")]
314 )
315
316 (define_insn "sub<mode>3"
317 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
318 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
319 (match_operand:VDQ_I 2 "register_operand" "w")))]
320 "TARGET_SIMD"
321 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
322 [(set_attr "type" "neon_sub<q>")]
323 )
324
325 (define_insn "mul<mode>3"
326 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
327 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
328 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
329 "TARGET_SIMD"
330 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
331 [(set_attr "type" "neon_mul_<Vetype><q>")]
332 )
333
334 (define_insn "bswap<mode>2"
335 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
336 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
337 "TARGET_SIMD"
338 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
339 [(set_attr "type" "neon_rev<q>")]
340 )
341
342 (define_insn "aarch64_rbit<mode>"
343 [(set (match_operand:VB 0 "register_operand" "=w")
344 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
345 UNSPEC_RBIT))]
346 "TARGET_SIMD"
347 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
348 [(set_attr "type" "neon_rbit")]
349 )
350
351 (define_expand "ctz<mode>2"
352 [(set (match_operand:VS 0 "register_operand")
353 (ctz:VS (match_operand:VS 1 "register_operand")))]
354 "TARGET_SIMD"
355 {
356 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
357 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
358 <MODE>mode, 0);
359 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
360 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
361 DONE;
362 }
363 )
364
365 (define_expand "xorsign<mode>3"
366 [(match_operand:VHSDF 0 "register_operand")
367 (match_operand:VHSDF 1 "register_operand")
368 (match_operand:VHSDF 2 "register_operand")]
369 "TARGET_SIMD"
370 {
371
372 machine_mode imode = <V_INT_EQUIV>mode;
373 rtx v_bitmask = gen_reg_rtx (imode);
374 rtx op1x = gen_reg_rtx (imode);
375 rtx op2x = gen_reg_rtx (imode);
376
377 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
378 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
379
380 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
381
382 emit_move_insn (v_bitmask,
383 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
384 HOST_WIDE_INT_M1U << bits));
385
386 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
387 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
388 emit_move_insn (operands[0],
389 lowpart_subreg (<MODE>mode, op1x, imode));
390 DONE;
391 }
392 )
393
394 ;; These instructions map to the __builtins for the Dot Product operations.
395 (define_insn "aarch64_<sur>dot<vsi2qi>"
396 [(set (match_operand:VS 0 "register_operand" "=w")
397 (plus:VS (match_operand:VS 1 "register_operand" "0")
398 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
399 (match_operand:<VSI2QI> 3 "register_operand" "w")]
400 DOTPROD)))]
401 "TARGET_DOTPROD"
402 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
403 [(set_attr "type" "neon_dot")]
404 )
405
406 ;; These expands map to the Dot Product optab the vectorizer checks for.
407 ;; The auto-vectorizer expects a dot product builtin that also does an
408 ;; accumulation into the provided register.
409 ;; Given the following pattern
410 ;;
411 ;; for (i=0; i<len; i++) {
412 ;; c = a[i] * b[i];
413 ;; r += c;
414 ;; }
415 ;; return result;
416 ;;
417 ;; This can be auto-vectorized to
418 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
419 ;;
420 ;; given enough iterations. However the vectorizer can keep unrolling the loop
421 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
422 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
423 ;; ...
424 ;;
425 ;; and so the vectorizer provides r, in which the result has to be accumulated.
426 (define_expand "<sur>dot_prod<vsi2qi>"
427 [(set (match_operand:VS 0 "register_operand")
428 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
429 (match_operand:<VSI2QI> 2 "register_operand")]
430 DOTPROD)
431 (match_operand:VS 3 "register_operand")))]
432 "TARGET_DOTPROD"
433 {
434 emit_insn (
435 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
436 operands[2]));
437 emit_insn (gen_rtx_SET (operands[0], operands[3]));
438 DONE;
439 })
440
441 ;; These instructions map to the __builtins for the Dot Product
442 ;; indexed operations.
443 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
444 [(set (match_operand:VS 0 "register_operand" "=w")
445 (plus:VS (match_operand:VS 1 "register_operand" "0")
446 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
447 (match_operand:V8QI 3 "register_operand" "<h_con>")
448 (match_operand:SI 4 "immediate_operand" "i")]
449 DOTPROD)))]
450 "TARGET_DOTPROD"
451 {
452 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
453 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
454 }
455 [(set_attr "type" "neon_dot")]
456 )
457
458 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
459 [(set (match_operand:VS 0 "register_operand" "=w")
460 (plus:VS (match_operand:VS 1 "register_operand" "0")
461 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
462 (match_operand:V16QI 3 "register_operand" "<h_con>")
463 (match_operand:SI 4 "immediate_operand" "i")]
464 DOTPROD)))]
465 "TARGET_DOTPROD"
466 {
467 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
468 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
469 }
470 [(set_attr "type" "neon_dot")]
471 )
472
473 (define_expand "copysign<mode>3"
474 [(match_operand:VHSDF 0 "register_operand")
475 (match_operand:VHSDF 1 "register_operand")
476 (match_operand:VHSDF 2 "register_operand")]
477 "TARGET_FLOAT && TARGET_SIMD"
478 {
479 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
480 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
481
482 emit_move_insn (v_bitmask,
483 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
484 HOST_WIDE_INT_M1U << bits));
485 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
486 operands[2], operands[1]));
487 DONE;
488 }
489 )
490
491 (define_insn "*aarch64_mul3_elt<mode>"
492 [(set (match_operand:VMUL 0 "register_operand" "=w")
493 (mult:VMUL
494 (vec_duplicate:VMUL
495 (vec_select:<VEL>
496 (match_operand:VMUL 1 "register_operand" "<h_con>")
497 (parallel [(match_operand:SI 2 "immediate_operand")])))
498 (match_operand:VMUL 3 "register_operand" "w")))]
499 "TARGET_SIMD"
500 {
501 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
502 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
503 }
504 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
505 )
506
507 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
508 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
509 (mult:VMUL_CHANGE_NLANES
510 (vec_duplicate:VMUL_CHANGE_NLANES
511 (vec_select:<VEL>
512 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
513 (parallel [(match_operand:SI 2 "immediate_operand")])))
514 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
515 "TARGET_SIMD"
516 {
517 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
518 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
519 }
520 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
521 )
522
523 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
524 [(set (match_operand:VMUL 0 "register_operand" "=w")
525 (mult:VMUL
526 (vec_duplicate:VMUL
527 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
528 (match_operand:VMUL 2 "register_operand" "w")))]
529 "TARGET_SIMD"
530 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
531 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
532 )
533
534 (define_insn "aarch64_rsqrte<mode>"
535 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
536 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
537 UNSPEC_RSQRTE))]
538 "TARGET_SIMD"
539 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
540 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
541
542 (define_insn "aarch64_rsqrts<mode>"
543 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
544 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
545 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
546 UNSPEC_RSQRTS))]
547 "TARGET_SIMD"
548 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
549 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
550
551 (define_expand "rsqrt<mode>2"
552 [(set (match_operand:VALLF 0 "register_operand" "=w")
553 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
554 UNSPEC_RSQRT))]
555 "TARGET_SIMD"
556 {
557 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
558 DONE;
559 })
560
561 (define_insn "*aarch64_mul3_elt_to_64v2df"
562 [(set (match_operand:DF 0 "register_operand" "=w")
563 (mult:DF
564 (vec_select:DF
565 (match_operand:V2DF 1 "register_operand" "w")
566 (parallel [(match_operand:SI 2 "immediate_operand")]))
567 (match_operand:DF 3 "register_operand" "w")))]
568 "TARGET_SIMD"
569 {
570 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
571 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
572 }
573 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
574 )
575
576 (define_insn "neg<mode>2"
577 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
578 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
579 "TARGET_SIMD"
580 "neg\t%0.<Vtype>, %1.<Vtype>"
581 [(set_attr "type" "neon_neg<q>")]
582 )
583
584 (define_insn "abs<mode>2"
585 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
586 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
587 "TARGET_SIMD"
588 "abs\t%0.<Vtype>, %1.<Vtype>"
589 [(set_attr "type" "neon_abs<q>")]
590 )
591
592 ;; The intrinsic version of integer ABS must not be allowed to
593 ;; combine with any operation with an integerated ABS step, such
594 ;; as SABD.
595 (define_insn "aarch64_abs<mode>"
596 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
597 (unspec:VSDQ_I_DI
598 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
599 UNSPEC_ABS))]
600 "TARGET_SIMD"
601 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
602 [(set_attr "type" "neon_abs<q>")]
603 )
604
605 (define_insn "abd<mode>_3"
606 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
607 (abs:VDQ_BHSI (minus:VDQ_BHSI
608 (match_operand:VDQ_BHSI 1 "register_operand" "w")
609 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
610 "TARGET_SIMD"
611 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
612 [(set_attr "type" "neon_abd<q>")]
613 )
614
615 (define_insn "aarch64_<sur>abdl2<mode>_3"
616 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
617 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
618 (match_operand:VDQV_S 2 "register_operand" "w")]
619 ABDL2))]
620 "TARGET_SIMD"
621 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
622 [(set_attr "type" "neon_abd<q>")]
623 )
624
625 (define_insn "aarch64_<sur>abal<mode>_4"
626 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
627 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
628 (match_operand:VDQV_S 2 "register_operand" "w")
629 (match_operand:<VDBLW> 3 "register_operand" "0")]
630 ABAL))]
631 "TARGET_SIMD"
632 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
633 [(set_attr "type" "neon_arith_acc<q>")]
634 )
635
636 (define_insn "aarch64_<sur>adalp<mode>_3"
637 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
638 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
639 (match_operand:<VDBLW> 2 "register_operand" "0")]
640 ADALP))]
641 "TARGET_SIMD"
642 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
643 [(set_attr "type" "neon_reduc_add<q>")]
644 )
645
646 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
647 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
648 ;; reduction of the difference into a V4SI vector and accumulate that into
649 ;; operand 3 before copying that into the result operand 0.
650 ;; Perform that with a sequence of:
651 ;; UABDL2 tmp.8h, op1.16b, op2.16b
652 ;; UABAL tmp.8h, op1.16b, op2.16b
653 ;; UADALP op3.4s, tmp.8h
654 ;; MOV op0, op3 // should be eliminated in later passes.
655 ;; The signed version just uses the signed variants of the above instructions.
656
657 (define_expand "<sur>sadv16qi"
658 [(use (match_operand:V4SI 0 "register_operand"))
659 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
660 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
661 (use (match_operand:V4SI 3 "register_operand"))]
662 "TARGET_SIMD"
663 {
664 rtx reduc = gen_reg_rtx (V8HImode);
665 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
666 operands[2]));
667 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
668 operands[2], reduc));
669 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
670 operands[3]));
671 emit_move_insn (operands[0], operands[3]);
672 DONE;
673 }
674 )
675
676 (define_insn "aba<mode>_3"
677 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
678 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
679 (match_operand:VDQ_BHSI 1 "register_operand" "w")
680 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
681 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
682 "TARGET_SIMD"
683 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
684 [(set_attr "type" "neon_arith_acc<q>")]
685 )
686
687 (define_insn "fabd<mode>3"
688 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
689 (abs:VHSDF_HSDF
690 (minus:VHSDF_HSDF
691 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
692 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
693 "TARGET_SIMD"
694 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
695 [(set_attr "type" "neon_fp_abd_<stype><q>")]
696 )
697
698 ;; For AND (vector, register) and BIC (vector, immediate)
699 (define_insn "and<mode>3"
700 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
701 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
702 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
703 "TARGET_SIMD"
704 {
705 switch (which_alternative)
706 {
707 case 0:
708 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
709 case 1:
710 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
711 AARCH64_CHECK_BIC);
712 default:
713 gcc_unreachable ();
714 }
715 }
716 [(set_attr "type" "neon_logic<q>")]
717 )
718
719 ;; For ORR (vector, register) and ORR (vector, immediate)
720 (define_insn "ior<mode>3"
721 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
722 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
723 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
724 "TARGET_SIMD"
725 {
726 switch (which_alternative)
727 {
728 case 0:
729 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
730 case 1:
731 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
732 AARCH64_CHECK_ORR);
733 default:
734 gcc_unreachable ();
735 }
736 }
737 [(set_attr "type" "neon_logic<q>")]
738 )
739
740 (define_insn "xor<mode>3"
741 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
742 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
743 (match_operand:VDQ_I 2 "register_operand" "w")))]
744 "TARGET_SIMD"
745 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
746 [(set_attr "type" "neon_logic<q>")]
747 )
748
749 (define_insn "one_cmpl<mode>2"
750 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
751 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
752 "TARGET_SIMD"
753 "not\t%0.<Vbtype>, %1.<Vbtype>"
754 [(set_attr "type" "neon_logic<q>")]
755 )
756
757 (define_insn "aarch64_simd_vec_set<mode>"
758 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
759 (vec_merge:VALL_F16
760 (vec_duplicate:VALL_F16
761 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
762 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
763 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
764 "TARGET_SIMD"
765 {
766 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
767 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
768 switch (which_alternative)
769 {
770 case 0:
771 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
772 case 1:
773 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
774 case 2:
775 return "ld1\\t{%0.<Vetype>}[%p2], %1";
776 default:
777 gcc_unreachable ();
778 }
779 }
780 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
781 )
782
783 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
784 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
785 (vec_merge:VALL_F16
786 (vec_duplicate:VALL_F16
787 (vec_select:<VEL>
788 (match_operand:VALL_F16 3 "register_operand" "w")
789 (parallel
790 [(match_operand:SI 4 "immediate_operand" "i")])))
791 (match_operand:VALL_F16 1 "register_operand" "0")
792 (match_operand:SI 2 "immediate_operand" "i")))]
793 "TARGET_SIMD"
794 {
795 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
796 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
797 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
798
799 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
800 }
801 [(set_attr "type" "neon_ins<q>")]
802 )
803
804 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
805 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
806 (vec_merge:VALL_F16_NO_V2Q
807 (vec_duplicate:VALL_F16_NO_V2Q
808 (vec_select:<VEL>
809 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
810 (parallel
811 [(match_operand:SI 4 "immediate_operand" "i")])))
812 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
813 (match_operand:SI 2 "immediate_operand" "i")))]
814 "TARGET_SIMD"
815 {
816 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
817 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
818 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
819 INTVAL (operands[4]));
820
821 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
822 }
823 [(set_attr "type" "neon_ins<q>")]
824 )
825
826 (define_insn "aarch64_simd_lshr<mode>"
827 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
828 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
829 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
830 "TARGET_SIMD"
831 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
832 [(set_attr "type" "neon_shift_imm<q>")]
833 )
834
835 (define_insn "aarch64_simd_ashr<mode>"
836 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
837 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
838 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
839 "TARGET_SIMD"
840 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
841 [(set_attr "type" "neon_shift_imm<q>")]
842 )
843
844 (define_insn "aarch64_simd_imm_shl<mode>"
845 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
846 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
847 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
848 "TARGET_SIMD"
849 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
850 [(set_attr "type" "neon_shift_imm<q>")]
851 )
852
853 (define_insn "aarch64_simd_reg_sshl<mode>"
854 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
855 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
856 (match_operand:VDQ_I 2 "register_operand" "w")))]
857 "TARGET_SIMD"
858 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
859 [(set_attr "type" "neon_shift_reg<q>")]
860 )
861
862 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
863 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
864 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
865 (match_operand:VDQ_I 2 "register_operand" "w")]
866 UNSPEC_ASHIFT_UNSIGNED))]
867 "TARGET_SIMD"
868 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
869 [(set_attr "type" "neon_shift_reg<q>")]
870 )
871
872 (define_insn "aarch64_simd_reg_shl<mode>_signed"
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "register_operand" "w")]
876 UNSPEC_ASHIFT_SIGNED))]
877 "TARGET_SIMD"
878 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
879 [(set_attr "type" "neon_shift_reg<q>")]
880 )
881
882 (define_expand "ashl<mode>3"
883 [(match_operand:VDQ_I 0 "register_operand" "")
884 (match_operand:VDQ_I 1 "register_operand" "")
885 (match_operand:SI 2 "general_operand" "")]
886 "TARGET_SIMD"
887 {
888 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
889 int shift_amount;
890
891 if (CONST_INT_P (operands[2]))
892 {
893 shift_amount = INTVAL (operands[2]);
894 if (shift_amount >= 0 && shift_amount < bit_width)
895 {
896 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
897 shift_amount);
898 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
899 operands[1],
900 tmp));
901 DONE;
902 }
903 else
904 {
905 operands[2] = force_reg (SImode, operands[2]);
906 }
907 }
908 else if (MEM_P (operands[2]))
909 {
910 operands[2] = force_reg (SImode, operands[2]);
911 }
912
913 if (REG_P (operands[2]))
914 {
915 rtx tmp = gen_reg_rtx (<MODE>mode);
916 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
917 convert_to_mode (<VEL>mode,
918 operands[2],
919 0)));
920 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
921 tmp));
922 DONE;
923 }
924 else
925 FAIL;
926 }
927 )
928
929 (define_expand "lshr<mode>3"
930 [(match_operand:VDQ_I 0 "register_operand" "")
931 (match_operand:VDQ_I 1 "register_operand" "")
932 (match_operand:SI 2 "general_operand" "")]
933 "TARGET_SIMD"
934 {
935 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
936 int shift_amount;
937
938 if (CONST_INT_P (operands[2]))
939 {
940 shift_amount = INTVAL (operands[2]);
941 if (shift_amount > 0 && shift_amount <= bit_width)
942 {
943 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
944 shift_amount);
945 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
946 operands[1],
947 tmp));
948 DONE;
949 }
950 else
951 operands[2] = force_reg (SImode, operands[2]);
952 }
953 else if (MEM_P (operands[2]))
954 {
955 operands[2] = force_reg (SImode, operands[2]);
956 }
957
958 if (REG_P (operands[2]))
959 {
960 rtx tmp = gen_reg_rtx (SImode);
961 rtx tmp1 = gen_reg_rtx (<MODE>mode);
962 emit_insn (gen_negsi2 (tmp, operands[2]));
963 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
964 convert_to_mode (<VEL>mode,
965 tmp, 0)));
966 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
967 operands[1],
968 tmp1));
969 DONE;
970 }
971 else
972 FAIL;
973 }
974 )
975
976 (define_expand "ashr<mode>3"
977 [(match_operand:VDQ_I 0 "register_operand" "")
978 (match_operand:VDQ_I 1 "register_operand" "")
979 (match_operand:SI 2 "general_operand" "")]
980 "TARGET_SIMD"
981 {
982 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
983 int shift_amount;
984
985 if (CONST_INT_P (operands[2]))
986 {
987 shift_amount = INTVAL (operands[2]);
988 if (shift_amount > 0 && shift_amount <= bit_width)
989 {
990 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
991 shift_amount);
992 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
993 operands[1],
994 tmp));
995 DONE;
996 }
997 else
998 operands[2] = force_reg (SImode, operands[2]);
999 }
1000 else if (MEM_P (operands[2]))
1001 {
1002 operands[2] = force_reg (SImode, operands[2]);
1003 }
1004
1005 if (REG_P (operands[2]))
1006 {
1007 rtx tmp = gen_reg_rtx (SImode);
1008 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1009 emit_insn (gen_negsi2 (tmp, operands[2]));
1010 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1011 convert_to_mode (<VEL>mode,
1012 tmp, 0)));
1013 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1014 operands[1],
1015 tmp1));
1016 DONE;
1017 }
1018 else
1019 FAIL;
1020 }
1021 )
1022
1023 (define_expand "vashl<mode>3"
1024 [(match_operand:VDQ_I 0 "register_operand" "")
1025 (match_operand:VDQ_I 1 "register_operand" "")
1026 (match_operand:VDQ_I 2 "register_operand" "")]
1027 "TARGET_SIMD"
1028 {
1029 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1030 operands[2]));
1031 DONE;
1032 })
1033
1034 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1035 ;; Negating individual lanes most certainly offsets the
1036 ;; gain from vectorization.
1037 (define_expand "vashr<mode>3"
1038 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1039 (match_operand:VDQ_BHSI 1 "register_operand" "")
1040 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1041 "TARGET_SIMD"
1042 {
1043 rtx neg = gen_reg_rtx (<MODE>mode);
1044 emit (gen_neg<mode>2 (neg, operands[2]));
1045 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1046 neg));
1047 DONE;
1048 })
1049
1050 ;; DI vector shift
1051 (define_expand "aarch64_ashr_simddi"
1052 [(match_operand:DI 0 "register_operand" "=w")
1053 (match_operand:DI 1 "register_operand" "w")
1054 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1055 "TARGET_SIMD"
1056 {
1057 /* An arithmetic shift right by 64 fills the result with copies of the sign
1058 bit, just like asr by 63 - however the standard pattern does not handle
1059 a shift by 64. */
1060 if (INTVAL (operands[2]) == 64)
1061 operands[2] = GEN_INT (63);
1062 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1063 DONE;
1064 }
1065 )
1066
1067 (define_expand "vlshr<mode>3"
1068 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1069 (match_operand:VDQ_BHSI 1 "register_operand" "")
1070 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1071 "TARGET_SIMD"
1072 {
1073 rtx neg = gen_reg_rtx (<MODE>mode);
1074 emit (gen_neg<mode>2 (neg, operands[2]));
1075 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1076 neg));
1077 DONE;
1078 })
1079
1080 (define_expand "aarch64_lshr_simddi"
1081 [(match_operand:DI 0 "register_operand" "=w")
1082 (match_operand:DI 1 "register_operand" "w")
1083 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1084 "TARGET_SIMD"
1085 {
1086 if (INTVAL (operands[2]) == 64)
1087 emit_move_insn (operands[0], const0_rtx);
1088 else
1089 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1090 DONE;
1091 }
1092 )
1093
1094 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1095 (define_insn "vec_shr_<mode>"
1096 [(set (match_operand:VD 0 "register_operand" "=w")
1097 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1098 (match_operand:SI 2 "immediate_operand" "i")]
1099 UNSPEC_VEC_SHR))]
1100 "TARGET_SIMD"
1101 {
1102 if (BYTES_BIG_ENDIAN)
1103 return "shl %d0, %d1, %2";
1104 else
1105 return "ushr %d0, %d1, %2";
1106 }
1107 [(set_attr "type" "neon_shift_imm")]
1108 )
1109
1110 (define_expand "vec_set<mode>"
1111 [(match_operand:VALL_F16 0 "register_operand" "+w")
1112 (match_operand:<VEL> 1 "register_operand" "w")
1113 (match_operand:SI 2 "immediate_operand" "")]
1114 "TARGET_SIMD"
1115 {
1116 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1117 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1118 GEN_INT (elem), operands[0]));
1119 DONE;
1120 }
1121 )
1122
1123
1124 (define_insn "aarch64_mla<mode>"
1125 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1126 (plus:VDQ_BHSI (mult:VDQ_BHSI
1127 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1128 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1129 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1130 "TARGET_SIMD"
1131 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1132 [(set_attr "type" "neon_mla_<Vetype><q>")]
1133 )
1134
1135 (define_insn "*aarch64_mla_elt<mode>"
1136 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1137 (plus:VDQHS
1138 (mult:VDQHS
1139 (vec_duplicate:VDQHS
1140 (vec_select:<VEL>
1141 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1142 (parallel [(match_operand:SI 2 "immediate_operand")])))
1143 (match_operand:VDQHS 3 "register_operand" "w"))
1144 (match_operand:VDQHS 4 "register_operand" "0")))]
1145 "TARGET_SIMD"
1146 {
1147 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1148 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1149 }
1150 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1151 )
1152
1153 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1154 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1155 (plus:VDQHS
1156 (mult:VDQHS
1157 (vec_duplicate:VDQHS
1158 (vec_select:<VEL>
1159 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1160 (parallel [(match_operand:SI 2 "immediate_operand")])))
1161 (match_operand:VDQHS 3 "register_operand" "w"))
1162 (match_operand:VDQHS 4 "register_operand" "0")))]
1163 "TARGET_SIMD"
1164 {
1165 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1166 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1167 }
1168 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1169 )
1170
1171 (define_insn "*aarch64_mla_elt_merge<mode>"
1172 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1173 (plus:VDQHS
1174 (mult:VDQHS (vec_duplicate:VDQHS
1175 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1176 (match_operand:VDQHS 2 "register_operand" "w"))
1177 (match_operand:VDQHS 3 "register_operand" "0")))]
1178 "TARGET_SIMD"
1179 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1180 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1181 )
1182
1183 (define_insn "aarch64_mls<mode>"
1184 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1185 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1186 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1187 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1188 "TARGET_SIMD"
1189 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1190 [(set_attr "type" "neon_mla_<Vetype><q>")]
1191 )
1192
1193 (define_insn "*aarch64_mls_elt<mode>"
1194 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1195 (minus:VDQHS
1196 (match_operand:VDQHS 4 "register_operand" "0")
1197 (mult:VDQHS
1198 (vec_duplicate:VDQHS
1199 (vec_select:<VEL>
1200 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1201 (parallel [(match_operand:SI 2 "immediate_operand")])))
1202 (match_operand:VDQHS 3 "register_operand" "w"))))]
1203 "TARGET_SIMD"
1204 {
1205 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1206 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1207 }
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1212 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1213 (minus:VDQHS
1214 (match_operand:VDQHS 4 "register_operand" "0")
1215 (mult:VDQHS
1216 (vec_duplicate:VDQHS
1217 (vec_select:<VEL>
1218 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1219 (parallel [(match_operand:SI 2 "immediate_operand")])))
1220 (match_operand:VDQHS 3 "register_operand" "w"))))]
1221 "TARGET_SIMD"
1222 {
1223 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1224 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1225 }
1226 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1227 )
1228
1229 (define_insn "*aarch64_mls_elt_merge<mode>"
1230 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1231 (minus:VDQHS
1232 (match_operand:VDQHS 1 "register_operand" "0")
1233 (mult:VDQHS (vec_duplicate:VDQHS
1234 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1235 (match_operand:VDQHS 3 "register_operand" "w"))))]
1236 "TARGET_SIMD"
1237 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1238 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1239 )
1240
1241 ;; Max/Min operations.
1242 (define_insn "<su><maxmin><mode>3"
1243 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1244 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1245 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1246 "TARGET_SIMD"
1247 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1248 [(set_attr "type" "neon_minmax<q>")]
1249 )
1250
1251 (define_expand "<su><maxmin>v2di3"
1252 [(set (match_operand:V2DI 0 "register_operand" "")
1253 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1254 (match_operand:V2DI 2 "register_operand" "")))]
1255 "TARGET_SIMD"
1256 {
1257 enum rtx_code cmp_operator;
1258 rtx cmp_fmt;
1259
1260 switch (<CODE>)
1261 {
1262 case UMIN:
1263 cmp_operator = LTU;
1264 break;
1265 case SMIN:
1266 cmp_operator = LT;
1267 break;
1268 case UMAX:
1269 cmp_operator = GTU;
1270 break;
1271 case SMAX:
1272 cmp_operator = GT;
1273 break;
1274 default:
1275 gcc_unreachable ();
1276 }
1277
1278 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1279 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1280 operands[2], cmp_fmt, operands[1], operands[2]));
1281 DONE;
1282 })
1283
1284 ;; Pairwise Integer Max/Min operations.
1285 (define_insn "aarch64_<maxmin_uns>p<mode>"
1286 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1287 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1288 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1289 MAXMINV))]
1290 "TARGET_SIMD"
1291 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1292 [(set_attr "type" "neon_minmax<q>")]
1293 )
1294
1295 ;; Pairwise FP Max/Min operations.
1296 (define_insn "aarch64_<maxmin_uns>p<mode>"
1297 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1298 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1299 (match_operand:VHSDF 2 "register_operand" "w")]
1300 FMAXMINV))]
1301 "TARGET_SIMD"
1302 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1303 [(set_attr "type" "neon_minmax<q>")]
1304 )
1305
1306 ;; vec_concat gives a new vector with the low elements from operand 1, and
1307 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1308 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1309 ;; What that means, is that the RTL descriptions of the below patterns
1310 ;; need to change depending on endianness.
1311
1312 ;; Move to the low architectural bits of the register.
1313 ;; On little-endian this is { operand, zeroes }
1314 ;; On big-endian this is { zeroes, operand }
1315
1316 (define_insn "move_lo_quad_internal_<mode>"
1317 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1318 (vec_concat:VQ_NO2E
1319 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1320 (vec_duplicate:<VHALF> (const_int 0))))]
1321 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1322 "@
1323 dup\\t%d0, %1.d[0]
1324 fmov\\t%d0, %1
1325 dup\\t%d0, %1"
1326 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1327 (set_attr "simd" "yes,*,yes")
1328 (set_attr "fp" "*,yes,*")
1329 (set_attr "length" "4")]
1330 )
1331
1332 (define_insn "move_lo_quad_internal_<mode>"
1333 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1334 (vec_concat:VQ_2E
1335 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1336 (const_int 0)))]
1337 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1338 "@
1339 dup\\t%d0, %1.d[0]
1340 fmov\\t%d0, %1
1341 dup\\t%d0, %1"
1342 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1343 (set_attr "simd" "yes,*,yes")
1344 (set_attr "fp" "*,yes,*")
1345 (set_attr "length" "4")]
1346 )
1347
1348 (define_insn "move_lo_quad_internal_be_<mode>"
1349 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1350 (vec_concat:VQ_NO2E
1351 (vec_duplicate:<VHALF> (const_int 0))
1352 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1353 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1354 "@
1355 dup\\t%d0, %1.d[0]
1356 fmov\\t%d0, %1
1357 dup\\t%d0, %1"
1358 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1359 (set_attr "simd" "yes,*,yes")
1360 (set_attr "fp" "*,yes,*")
1361 (set_attr "length" "4")]
1362 )
1363
1364 (define_insn "move_lo_quad_internal_be_<mode>"
1365 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1366 (vec_concat:VQ_2E
1367 (const_int 0)
1368 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1369 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1370 "@
1371 dup\\t%d0, %1.d[0]
1372 fmov\\t%d0, %1
1373 dup\\t%d0, %1"
1374 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1375 (set_attr "simd" "yes,*,yes")
1376 (set_attr "fp" "*,yes,*")
1377 (set_attr "length" "4")]
1378 )
1379
1380 (define_expand "move_lo_quad_<mode>"
1381 [(match_operand:VQ 0 "register_operand")
1382 (match_operand:VQ 1 "register_operand")]
1383 "TARGET_SIMD"
1384 {
1385 if (BYTES_BIG_ENDIAN)
1386 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1387 else
1388 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1389 DONE;
1390 }
1391 )
1392
1393 ;; Move operand1 to the high architectural bits of the register, keeping
1394 ;; the low architectural bits of operand2.
1395 ;; For little-endian this is { operand2, operand1 }
1396 ;; For big-endian this is { operand1, operand2 }
1397
1398 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1399 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1400 (vec_concat:VQ
1401 (vec_select:<VHALF>
1402 (match_dup 0)
1403 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1404 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1405 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1406 "@
1407 ins\\t%0.d[1], %1.d[0]
1408 ins\\t%0.d[1], %1"
1409 [(set_attr "type" "neon_ins")]
1410 )
1411
1412 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1413 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1414 (vec_concat:VQ
1415 (match_operand:<VHALF> 1 "register_operand" "w,r")
1416 (vec_select:<VHALF>
1417 (match_dup 0)
1418 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1419 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1420 "@
1421 ins\\t%0.d[1], %1.d[0]
1422 ins\\t%0.d[1], %1"
1423 [(set_attr "type" "neon_ins")]
1424 )
1425
1426 (define_expand "move_hi_quad_<mode>"
1427 [(match_operand:VQ 0 "register_operand" "")
1428 (match_operand:<VHALF> 1 "register_operand" "")]
1429 "TARGET_SIMD"
1430 {
1431 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1432 if (BYTES_BIG_ENDIAN)
1433 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1434 operands[1], p));
1435 else
1436 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1437 operands[1], p));
1438 DONE;
1439 })
1440
1441 ;; Narrowing operations.
1442
1443 ;; For doubles.
1444 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1445 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1446 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1447 "TARGET_SIMD"
1448 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1449 [(set_attr "type" "neon_shift_imm_narrow_q")]
1450 )
1451
1452 (define_expand "vec_pack_trunc_<mode>"
1453 [(match_operand:<VNARROWD> 0 "register_operand" "")
1454 (match_operand:VDN 1 "register_operand" "")
1455 (match_operand:VDN 2 "register_operand" "")]
1456 "TARGET_SIMD"
1457 {
1458 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1459 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1460 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1461
1462 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1463 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1464 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1465 DONE;
1466 })
1467
1468 ;; For quads.
1469
1470 (define_insn "vec_pack_trunc_<mode>"
1471 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1472 (vec_concat:<VNARROWQ2>
1473 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1474 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1475 "TARGET_SIMD"
1476 {
1477 if (BYTES_BIG_ENDIAN)
1478 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1479 else
1480 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1481 }
1482 [(set_attr "type" "multiple")
1483 (set_attr "length" "8")]
1484 )
1485
1486 ;; Widening operations.
1487
1488 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1489 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1490 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1491 (match_operand:VQW 1 "register_operand" "w")
1492 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1493 )))]
1494 "TARGET_SIMD"
1495 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1496 [(set_attr "type" "neon_shift_imm_long")]
1497 )
1498
1499 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1500 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1501 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1502 (match_operand:VQW 1 "register_operand" "w")
1503 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1504 )))]
1505 "TARGET_SIMD"
1506 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1507 [(set_attr "type" "neon_shift_imm_long")]
1508 )
1509
1510 (define_expand "vec_unpack<su>_hi_<mode>"
1511 [(match_operand:<VWIDE> 0 "register_operand" "")
1512 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1513 "TARGET_SIMD"
1514 {
1515 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1516 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1517 operands[1], p));
1518 DONE;
1519 }
1520 )
1521
1522 (define_expand "vec_unpack<su>_lo_<mode>"
1523 [(match_operand:<VWIDE> 0 "register_operand" "")
1524 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1525 "TARGET_SIMD"
1526 {
1527 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1528 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1529 operands[1], p));
1530 DONE;
1531 }
1532 )
1533
1534 ;; Widening arithmetic.
1535
1536 (define_insn "*aarch64_<su>mlal_lo<mode>"
1537 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1538 (plus:<VWIDE>
1539 (mult:<VWIDE>
1540 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1541 (match_operand:VQW 2 "register_operand" "w")
1542 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1543 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1544 (match_operand:VQW 4 "register_operand" "w")
1545 (match_dup 3))))
1546 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1547 "TARGET_SIMD"
1548 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1549 [(set_attr "type" "neon_mla_<Vetype>_long")]
1550 )
1551
1552 (define_insn "*aarch64_<su>mlal_hi<mode>"
1553 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1554 (plus:<VWIDE>
1555 (mult:<VWIDE>
1556 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1557 (match_operand:VQW 2 "register_operand" "w")
1558 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1559 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1560 (match_operand:VQW 4 "register_operand" "w")
1561 (match_dup 3))))
1562 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1563 "TARGET_SIMD"
1564 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1565 [(set_attr "type" "neon_mla_<Vetype>_long")]
1566 )
1567
1568 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1569 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1570 (minus:<VWIDE>
1571 (match_operand:<VWIDE> 1 "register_operand" "0")
1572 (mult:<VWIDE>
1573 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1574 (match_operand:VQW 2 "register_operand" "w")
1575 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1576 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1577 (match_operand:VQW 4 "register_operand" "w")
1578 (match_dup 3))))))]
1579 "TARGET_SIMD"
1580 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1581 [(set_attr "type" "neon_mla_<Vetype>_long")]
1582 )
1583
1584 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1585 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1586 (minus:<VWIDE>
1587 (match_operand:<VWIDE> 1 "register_operand" "0")
1588 (mult:<VWIDE>
1589 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1590 (match_operand:VQW 2 "register_operand" "w")
1591 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1592 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1593 (match_operand:VQW 4 "register_operand" "w")
1594 (match_dup 3))))))]
1595 "TARGET_SIMD"
1596 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1597 [(set_attr "type" "neon_mla_<Vetype>_long")]
1598 )
1599
1600 (define_insn "*aarch64_<su>mlal<mode>"
1601 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1602 (plus:<VWIDE>
1603 (mult:<VWIDE>
1604 (ANY_EXTEND:<VWIDE>
1605 (match_operand:VD_BHSI 1 "register_operand" "w"))
1606 (ANY_EXTEND:<VWIDE>
1607 (match_operand:VD_BHSI 2 "register_operand" "w")))
1608 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1609 "TARGET_SIMD"
1610 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1611 [(set_attr "type" "neon_mla_<Vetype>_long")]
1612 )
1613
1614 (define_insn "*aarch64_<su>mlsl<mode>"
1615 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1616 (minus:<VWIDE>
1617 (match_operand:<VWIDE> 1 "register_operand" "0")
1618 (mult:<VWIDE>
1619 (ANY_EXTEND:<VWIDE>
1620 (match_operand:VD_BHSI 2 "register_operand" "w"))
1621 (ANY_EXTEND:<VWIDE>
1622 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1623 "TARGET_SIMD"
1624 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1625 [(set_attr "type" "neon_mla_<Vetype>_long")]
1626 )
1627
1628 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1629 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1631 (match_operand:VQW 1 "register_operand" "w")
1632 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1633 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1634 (match_operand:VQW 2 "register_operand" "w")
1635 (match_dup 3)))))]
1636 "TARGET_SIMD"
1637 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1638 [(set_attr "type" "neon_mul_<Vetype>_long")]
1639 )
1640
1641 (define_expand "vec_widen_<su>mult_lo_<mode>"
1642 [(match_operand:<VWIDE> 0 "register_operand" "")
1643 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1644 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1645 "TARGET_SIMD"
1646 {
1647 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1648 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1649 operands[1],
1650 operands[2], p));
1651 DONE;
1652 }
1653 )
1654
1655 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1656 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1657 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658 (match_operand:VQW 1 "register_operand" "w")
1659 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1660 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1661 (match_operand:VQW 2 "register_operand" "w")
1662 (match_dup 3)))))]
1663 "TARGET_SIMD"
1664 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1665 [(set_attr "type" "neon_mul_<Vetype>_long")]
1666 )
1667
1668 (define_expand "vec_widen_<su>mult_hi_<mode>"
1669 [(match_operand:<VWIDE> 0 "register_operand" "")
1670 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1671 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1672 "TARGET_SIMD"
1673 {
1674 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1675 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1676 operands[1],
1677 operands[2], p));
1678 DONE;
1679
1680 }
1681 )
1682
1683 ;; FP vector operations.
1684 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1685 ;; double-precision (64-bit) floating-point data types and arithmetic as
1686 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1687 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1688 ;;
1689 ;; Floating-point operations can raise an exception. Vectorizing such
1690 ;; operations are safe because of reasons explained below.
1691 ;;
1692 ;; ARMv8 permits an extension to enable trapped floating-point
1693 ;; exception handling, however this is an optional feature. In the
1694 ;; event of a floating-point exception being raised by vectorised
1695 ;; code then:
1696 ;; 1. If trapped floating-point exceptions are available, then a trap
1697 ;; will be taken when any lane raises an enabled exception. A trap
1698 ;; handler may determine which lane raised the exception.
1699 ;; 2. Alternatively a sticky exception flag is set in the
1700 ;; floating-point status register (FPSR). Software may explicitly
1701 ;; test the exception flags, in which case the tests will either
1702 ;; prevent vectorisation, allowing precise identification of the
1703 ;; failing operation, or if tested outside of vectorisable regions
1704 ;; then the specific operation and lane are not of interest.
1705
1706 ;; FP arithmetic operations.
1707
1708 (define_insn "add<mode>3"
1709 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1710 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1711 (match_operand:VHSDF 2 "register_operand" "w")))]
1712 "TARGET_SIMD"
1713 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1714 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1715 )
1716
1717 (define_insn "sub<mode>3"
1718 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1719 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1720 (match_operand:VHSDF 2 "register_operand" "w")))]
1721 "TARGET_SIMD"
1722 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1723 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1724 )
1725
1726 (define_insn "mul<mode>3"
1727 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1728 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1729 (match_operand:VHSDF 2 "register_operand" "w")))]
1730 "TARGET_SIMD"
1731 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1732 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1733 )
1734
1735 (define_expand "div<mode>3"
1736 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1737 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1738 (match_operand:VHSDF 2 "register_operand" "w")))]
1739 "TARGET_SIMD"
1740 {
1741 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1742 DONE;
1743
1744 operands[1] = force_reg (<MODE>mode, operands[1]);
1745 })
1746
1747 (define_insn "*div<mode>3"
1748 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1749 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1750 (match_operand:VHSDF 2 "register_operand" "w")))]
1751 "TARGET_SIMD"
1752 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1753 [(set_attr "type" "neon_fp_div_<stype><q>")]
1754 )
1755
1756 (define_insn "neg<mode>2"
1757 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1758 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1759 "TARGET_SIMD"
1760 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1761 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1762 )
1763
1764 (define_insn "abs<mode>2"
1765 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1766 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1767 "TARGET_SIMD"
1768 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1769 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1770 )
1771
1772 (define_insn "fma<mode>4"
1773 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1774 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1775 (match_operand:VHSDF 2 "register_operand" "w")
1776 (match_operand:VHSDF 3 "register_operand" "0")))]
1777 "TARGET_SIMD"
1778 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1779 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1780 )
1781
1782 (define_insn "*aarch64_fma4_elt<mode>"
1783 [(set (match_operand:VDQF 0 "register_operand" "=w")
1784 (fma:VDQF
1785 (vec_duplicate:VDQF
1786 (vec_select:<VEL>
1787 (match_operand:VDQF 1 "register_operand" "<h_con>")
1788 (parallel [(match_operand:SI 2 "immediate_operand")])))
1789 (match_operand:VDQF 3 "register_operand" "w")
1790 (match_operand:VDQF 4 "register_operand" "0")))]
1791 "TARGET_SIMD"
1792 {
1793 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1794 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1795 }
1796 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1797 )
1798
1799 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1800 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1801 (fma:VDQSF
1802 (vec_duplicate:VDQSF
1803 (vec_select:<VEL>
1804 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1805 (parallel [(match_operand:SI 2 "immediate_operand")])))
1806 (match_operand:VDQSF 3 "register_operand" "w")
1807 (match_operand:VDQSF 4 "register_operand" "0")))]
1808 "TARGET_SIMD"
1809 {
1810 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1811 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1812 }
1813 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1814 )
1815
1816 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1817 [(set (match_operand:VMUL 0 "register_operand" "=w")
1818 (fma:VMUL
1819 (vec_duplicate:VMUL
1820 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1821 (match_operand:VMUL 2 "register_operand" "w")
1822 (match_operand:VMUL 3 "register_operand" "0")))]
1823 "TARGET_SIMD"
1824 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1825 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1826 )
1827
1828 (define_insn "*aarch64_fma4_elt_to_64v2df"
1829 [(set (match_operand:DF 0 "register_operand" "=w")
1830 (fma:DF
1831 (vec_select:DF
1832 (match_operand:V2DF 1 "register_operand" "w")
1833 (parallel [(match_operand:SI 2 "immediate_operand")]))
1834 (match_operand:DF 3 "register_operand" "w")
1835 (match_operand:DF 4 "register_operand" "0")))]
1836 "TARGET_SIMD"
1837 {
1838 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1839 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1840 }
1841 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1842 )
1843
1844 (define_insn "fnma<mode>4"
1845 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1846 (fma:VHSDF
1847 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1848 (match_operand:VHSDF 2 "register_operand" "w")
1849 (match_operand:VHSDF 3 "register_operand" "0")))]
1850 "TARGET_SIMD"
1851 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1852 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1853 )
1854
1855 (define_insn "*aarch64_fnma4_elt<mode>"
1856 [(set (match_operand:VDQF 0 "register_operand" "=w")
1857 (fma:VDQF
1858 (neg:VDQF
1859 (match_operand:VDQF 3 "register_operand" "w"))
1860 (vec_duplicate:VDQF
1861 (vec_select:<VEL>
1862 (match_operand:VDQF 1 "register_operand" "<h_con>")
1863 (parallel [(match_operand:SI 2 "immediate_operand")])))
1864 (match_operand:VDQF 4 "register_operand" "0")))]
1865 "TARGET_SIMD"
1866 {
1867 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1868 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1869 }
1870 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1871 )
1872
1873 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1874 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1875 (fma:VDQSF
1876 (neg:VDQSF
1877 (match_operand:VDQSF 3 "register_operand" "w"))
1878 (vec_duplicate:VDQSF
1879 (vec_select:<VEL>
1880 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1881 (parallel [(match_operand:SI 2 "immediate_operand")])))
1882 (match_operand:VDQSF 4 "register_operand" "0")))]
1883 "TARGET_SIMD"
1884 {
1885 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1886 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1887 }
1888 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1889 )
1890
1891 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1892 [(set (match_operand:VMUL 0 "register_operand" "=w")
1893 (fma:VMUL
1894 (neg:VMUL
1895 (match_operand:VMUL 2 "register_operand" "w"))
1896 (vec_duplicate:VMUL
1897 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1898 (match_operand:VMUL 3 "register_operand" "0")))]
1899 "TARGET_SIMD"
1900 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1901 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1902 )
1903
1904 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1905 [(set (match_operand:DF 0 "register_operand" "=w")
1906 (fma:DF
1907 (vec_select:DF
1908 (match_operand:V2DF 1 "register_operand" "w")
1909 (parallel [(match_operand:SI 2 "immediate_operand")]))
1910 (neg:DF
1911 (match_operand:DF 3 "register_operand" "w"))
1912 (match_operand:DF 4 "register_operand" "0")))]
1913 "TARGET_SIMD"
1914 {
1915 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1916 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1917 }
1918 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1919 )
1920
1921 ;; Vector versions of the floating-point frint patterns.
1922 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1923 (define_insn "<frint_pattern><mode>2"
1924 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1925 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1926 FRINT))]
1927 "TARGET_SIMD"
1928 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1929 [(set_attr "type" "neon_fp_round_<stype><q>")]
1930 )
1931
1932 ;; Vector versions of the fcvt standard patterns.
1933 ;; Expands to lbtrunc, lround, lceil, lfloor
1934 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1935 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1936 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1937 [(match_operand:VHSDF 1 "register_operand" "w")]
1938 FCVT)))]
1939 "TARGET_SIMD"
1940 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1941 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1942 )
1943
1944 ;; HF Scalar variants of related SIMD instructions.
1945 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1946 [(set (match_operand:HI 0 "register_operand" "=w")
1947 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1948 FCVT)))]
1949 "TARGET_SIMD_F16INST"
1950 "fcvt<frint_suffix><su>\t%h0, %h1"
1951 [(set_attr "type" "neon_fp_to_int_s")]
1952 )
1953
1954 (define_insn "<optab>_trunchfhi2"
1955 [(set (match_operand:HI 0 "register_operand" "=w")
1956 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1957 "TARGET_SIMD_F16INST"
1958 "fcvtz<su>\t%h0, %h1"
1959 [(set_attr "type" "neon_fp_to_int_s")]
1960 )
1961
1962 (define_insn "<optab>hihf2"
1963 [(set (match_operand:HF 0 "register_operand" "=w")
1964 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1965 "TARGET_SIMD_F16INST"
1966 "<su_optab>cvtf\t%h0, %h1"
1967 [(set_attr "type" "neon_int_to_fp_s")]
1968 )
1969
1970 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1971 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1972 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1973 [(mult:VDQF
1974 (match_operand:VDQF 1 "register_operand" "w")
1975 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1976 UNSPEC_FRINTZ)))]
1977 "TARGET_SIMD
1978 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1979 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1980 {
1981 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1982 char buf[64];
1983 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1984 output_asm_insn (buf, operands);
1985 return "";
1986 }
1987 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1988 )
1989
1990 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1991 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1992 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1993 [(match_operand:VHSDF 1 "register_operand")]
1994 UNSPEC_FRINTZ)))]
1995 "TARGET_SIMD"
1996 {})
1997
1998 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1999 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2000 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001 [(match_operand:VHSDF 1 "register_operand")]
2002 UNSPEC_FRINTZ)))]
2003 "TARGET_SIMD"
2004 {})
2005
2006 (define_expand "ftrunc<VHSDF:mode>2"
2007 [(set (match_operand:VHSDF 0 "register_operand")
2008 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2009 UNSPEC_FRINTZ))]
2010 "TARGET_SIMD"
2011 {})
2012
2013 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2014 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2015 (FLOATUORS:VHSDF
2016 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2017 "TARGET_SIMD"
2018 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2019 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2020 )
2021
2022 ;; Conversions between vectors of floats and doubles.
2023 ;; Contains a mix of patterns to match standard pattern names
2024 ;; and those for intrinsics.
2025
2026 ;; Float widening operations.
2027
2028 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2029 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2030 (float_extend:<VWIDE> (vec_select:<VHALF>
2031 (match_operand:VQ_HSF 1 "register_operand" "w")
2032 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2033 )))]
2034 "TARGET_SIMD"
2035 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2036 [(set_attr "type" "neon_fp_cvt_widen_s")]
2037 )
2038
2039 ;; Convert between fixed-point and floating-point (vector modes)
2040
2041 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2042 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2043 (unspec:<VHSDF:FCVT_TARGET>
2044 [(match_operand:VHSDF 1 "register_operand" "w")
2045 (match_operand:SI 2 "immediate_operand" "i")]
2046 FCVT_F2FIXED))]
2047 "TARGET_SIMD"
2048 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2049 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2050 )
2051
2052 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2053 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2054 (unspec:<VDQ_HSDI:FCVT_TARGET>
2055 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2056 (match_operand:SI 2 "immediate_operand" "i")]
2057 FCVT_FIXED2F))]
2058 "TARGET_SIMD"
2059 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2060 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2061 )
2062
2063 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2064 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2065 ;; the meaning of HI and LO changes depending on the target endianness.
2066 ;; While elsewhere we map the higher numbered elements of a vector to
2067 ;; the lower architectural lanes of the vector, for these patterns we want
2068 ;; to always treat "hi" as referring to the higher architectural lanes.
2069 ;; Consequently, while the patterns below look inconsistent with our
2070 ;; other big-endian patterns their behavior is as required.
2071
2072 (define_expand "vec_unpacks_lo_<mode>"
2073 [(match_operand:<VWIDE> 0 "register_operand" "")
2074 (match_operand:VQ_HSF 1 "register_operand" "")]
2075 "TARGET_SIMD"
2076 {
2077 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2078 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2079 operands[1], p));
2080 DONE;
2081 }
2082 )
2083
2084 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2085 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2086 (float_extend:<VWIDE> (vec_select:<VHALF>
2087 (match_operand:VQ_HSF 1 "register_operand" "w")
2088 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2089 )))]
2090 "TARGET_SIMD"
2091 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2092 [(set_attr "type" "neon_fp_cvt_widen_s")]
2093 )
2094
2095 (define_expand "vec_unpacks_hi_<mode>"
2096 [(match_operand:<VWIDE> 0 "register_operand" "")
2097 (match_operand:VQ_HSF 1 "register_operand" "")]
2098 "TARGET_SIMD"
2099 {
2100 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2101 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2102 operands[1], p));
2103 DONE;
2104 }
2105 )
2106 (define_insn "aarch64_float_extend_lo_<Vwide>"
2107 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2108 (float_extend:<VWIDE>
2109 (match_operand:VDF 1 "register_operand" "w")))]
2110 "TARGET_SIMD"
2111 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2112 [(set_attr "type" "neon_fp_cvt_widen_s")]
2113 )
2114
2115 ;; Float narrowing operations.
2116
2117 (define_insn "aarch64_float_truncate_lo_<mode>"
2118 [(set (match_operand:VDF 0 "register_operand" "=w")
2119 (float_truncate:VDF
2120 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2121 "TARGET_SIMD"
2122 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2123 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2124 )
2125
2126 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2127 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2128 (vec_concat:<VDBL>
2129 (match_operand:VDF 1 "register_operand" "0")
2130 (float_truncate:VDF
2131 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2132 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2133 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2134 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2135 )
2136
2137 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2138 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2139 (vec_concat:<VDBL>
2140 (float_truncate:VDF
2141 (match_operand:<VWIDE> 2 "register_operand" "w"))
2142 (match_operand:VDF 1 "register_operand" "0")))]
2143 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2144 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2145 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2146 )
2147
2148 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2149 [(match_operand:<VDBL> 0 "register_operand" "=w")
2150 (match_operand:VDF 1 "register_operand" "0")
2151 (match_operand:<VWIDE> 2 "register_operand" "w")]
2152 "TARGET_SIMD"
2153 {
2154 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2155 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2156 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2157 emit_insn (gen (operands[0], operands[1], operands[2]));
2158 DONE;
2159 }
2160 )
2161
2162 (define_expand "vec_pack_trunc_v2df"
2163 [(set (match_operand:V4SF 0 "register_operand")
2164 (vec_concat:V4SF
2165 (float_truncate:V2SF
2166 (match_operand:V2DF 1 "register_operand"))
2167 (float_truncate:V2SF
2168 (match_operand:V2DF 2 "register_operand"))
2169 ))]
2170 "TARGET_SIMD"
2171 {
2172 rtx tmp = gen_reg_rtx (V2SFmode);
2173 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2174 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2175
2176 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2177 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2178 tmp, operands[hi]));
2179 DONE;
2180 }
2181 )
2182
2183 (define_expand "vec_pack_trunc_df"
2184 [(set (match_operand:V2SF 0 "register_operand")
2185 (vec_concat:V2SF
2186 (float_truncate:SF
2187 (match_operand:DF 1 "register_operand"))
2188 (float_truncate:SF
2189 (match_operand:DF 2 "register_operand"))
2190 ))]
2191 "TARGET_SIMD"
2192 {
2193 rtx tmp = gen_reg_rtx (V2SFmode);
2194 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2195 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2196
2197 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2198 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2199 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2200 DONE;
2201 }
2202 )
2203
2204 ;; FP Max/Min
2205 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2206 ;; expression like:
2207 ;; a = (b < c) ? b : c;
2208 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2209 ;; either explicitly or indirectly via -ffast-math.
2210 ;;
2211 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2212 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2213 ;; operand will be returned when both operands are zero (i.e. they may not
2214 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2215 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2216 ;; NaNs.
2217
2218 (define_insn "<su><maxmin><mode>3"
2219 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2220 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2221 (match_operand:VHSDF 2 "register_operand" "w")))]
2222 "TARGET_SIMD"
2223 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2224 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2225 )
2226
2227 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2228 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2229 ;; which implement the IEEE fmax ()/fmin () functions.
2230 (define_insn "<maxmin_uns><mode>3"
2231 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2232 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2233 (match_operand:VHSDF 2 "register_operand" "w")]
2234 FMAXMIN_UNS))]
2235 "TARGET_SIMD"
2236 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2237 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2238 )
2239
2240 ;; 'across lanes' add.
2241
2242 (define_expand "reduc_plus_scal_<mode>"
2243 [(match_operand:<VEL> 0 "register_operand" "=w")
2244 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2245 UNSPEC_ADDV)]
2246 "TARGET_SIMD"
2247 {
2248 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2249 rtx scratch = gen_reg_rtx (<MODE>mode);
2250 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2251 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2252 DONE;
2253 }
2254 )
2255
2256 (define_insn "aarch64_faddp<mode>"
2257 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2258 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2259 (match_operand:VHSDF 2 "register_operand" "w")]
2260 UNSPEC_FADDV))]
2261 "TARGET_SIMD"
2262 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2263 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2264 )
2265
2266 (define_insn "aarch64_reduc_plus_internal<mode>"
2267 [(set (match_operand:VDQV 0 "register_operand" "=w")
2268 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2269 UNSPEC_ADDV))]
2270 "TARGET_SIMD"
2271 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2272 [(set_attr "type" "neon_reduc_add<q>")]
2273 )
2274
2275 (define_insn "aarch64_reduc_plus_internalv2si"
2276 [(set (match_operand:V2SI 0 "register_operand" "=w")
2277 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2278 UNSPEC_ADDV))]
2279 "TARGET_SIMD"
2280 "addp\\t%0.2s, %1.2s, %1.2s"
2281 [(set_attr "type" "neon_reduc_add")]
2282 )
2283
2284 (define_insn "reduc_plus_scal_<mode>"
2285 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2286 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2287 UNSPEC_FADDV))]
2288 "TARGET_SIMD"
2289 "faddp\\t%<Vetype>0, %1.<Vtype>"
2290 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2291 )
2292
2293 (define_expand "reduc_plus_scal_v4sf"
2294 [(set (match_operand:SF 0 "register_operand")
2295 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2296 UNSPEC_FADDV))]
2297 "TARGET_SIMD"
2298 {
2299 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2300 rtx scratch = gen_reg_rtx (V4SFmode);
2301 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2302 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2303 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2304 DONE;
2305 })
2306
2307 (define_insn "clrsb<mode>2"
2308 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2309 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2310 "TARGET_SIMD"
2311 "cls\\t%0.<Vtype>, %1.<Vtype>"
2312 [(set_attr "type" "neon_cls<q>")]
2313 )
2314
2315 (define_insn "clz<mode>2"
2316 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2317 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2318 "TARGET_SIMD"
2319 "clz\\t%0.<Vtype>, %1.<Vtype>"
2320 [(set_attr "type" "neon_cls<q>")]
2321 )
2322
2323 (define_insn "popcount<mode>2"
2324 [(set (match_operand:VB 0 "register_operand" "=w")
2325 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2326 "TARGET_SIMD"
2327 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2328 [(set_attr "type" "neon_cnt<q>")]
2329 )
2330
2331 ;; 'across lanes' max and min ops.
2332
2333 ;; Template for outputting a scalar, so we can create __builtins which can be
2334 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2335 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2336 [(match_operand:<VEL> 0 "register_operand")
2337 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2338 FMAXMINV)]
2339 "TARGET_SIMD"
2340 {
2341 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2342 rtx scratch = gen_reg_rtx (<MODE>mode);
2343 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2344 operands[1]));
2345 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2346 DONE;
2347 }
2348 )
2349
2350 ;; Likewise for integer cases, signed and unsigned.
2351 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2352 [(match_operand:<VEL> 0 "register_operand")
2353 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2354 MAXMINV)]
2355 "TARGET_SIMD"
2356 {
2357 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358 rtx scratch = gen_reg_rtx (<MODE>mode);
2359 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2360 operands[1]));
2361 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2362 DONE;
2363 }
2364 )
2365
2366 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2367 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2368 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2369 MAXMINV))]
2370 "TARGET_SIMD"
2371 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2372 [(set_attr "type" "neon_reduc_minmax<q>")]
2373 )
2374
2375 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2376 [(set (match_operand:V2SI 0 "register_operand" "=w")
2377 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2378 MAXMINV))]
2379 "TARGET_SIMD"
2380 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2381 [(set_attr "type" "neon_reduc_minmax")]
2382 )
2383
2384 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2385 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2387 FMAXMINV))]
2388 "TARGET_SIMD"
2389 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2390 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2391 )
2392
2393 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2394 ;; allocation.
2395 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2396 ;; to select.
2397 ;;
2398 ;; Thus our BSL is of the form:
2399 ;; op0 = bsl (mask, op2, op3)
2400 ;; We can use any of:
2401 ;;
2402 ;; if (op0 = mask)
2403 ;; bsl mask, op1, op2
2404 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2405 ;; bit op0, op2, mask
2406 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2407 ;; bif op0, op1, mask
2408 ;;
2409 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2410 ;; Some forms of straight-line code may generate the equivalent form
2411 ;; in *aarch64_simd_bsl<mode>_alt.
2412
2413 (define_insn "aarch64_simd_bsl<mode>_internal"
2414 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2415 (xor:VDQ_I
2416 (and:VDQ_I
2417 (xor:VDQ_I
2418 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2419 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2420 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2421 (match_dup:<V_INT_EQUIV> 3)
2422 ))]
2423 "TARGET_SIMD"
2424 "@
2425 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2426 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2427 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2428 [(set_attr "type" "neon_bsl<q>")]
2429 )
2430
2431 ;; We need this form in addition to the above pattern to match the case
2432 ;; when combine tries merging three insns such that the second operand of
2433 ;; the outer XOR matches the second operand of the inner XOR rather than
2434 ;; the first. The two are equivalent but since recog doesn't try all
2435 ;; permutations of commutative operations, we have to have a separate pattern.
2436
2437 (define_insn "*aarch64_simd_bsl<mode>_alt"
2438 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2439 (xor:VDQ_I
2440 (and:VDQ_I
2441 (xor:VDQ_I
2442 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2443 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2444 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2445 (match_dup:<V_INT_EQUIV> 2)))]
2446 "TARGET_SIMD"
2447 "@
2448 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2449 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2450 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2451 [(set_attr "type" "neon_bsl<q>")]
2452 )
2453
2454 ;; DImode is special, we want to avoid computing operations which are
2455 ;; more naturally computed in general purpose registers in the vector
2456 ;; registers. If we do that, we need to move all three operands from general
2457 ;; purpose registers to vector registers, then back again. However, we
2458 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2459 ;; optimizations based on the component operations of a BSL.
2460 ;;
2461 ;; That means we need a splitter back to the individual operations, if they
2462 ;; would be better calculated on the integer side.
2463
2464 (define_insn_and_split "aarch64_simd_bsldi_internal"
2465 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2466 (xor:DI
2467 (and:DI
2468 (xor:DI
2469 (match_operand:DI 3 "register_operand" "w,0,w,r")
2470 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2471 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2472 (match_dup:DI 3)
2473 ))]
2474 "TARGET_SIMD"
2475 "@
2476 bsl\\t%0.8b, %2.8b, %3.8b
2477 bit\\t%0.8b, %2.8b, %1.8b
2478 bif\\t%0.8b, %3.8b, %1.8b
2479 #"
2480 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2481 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2482 {
2483 /* Split back to individual operations. If we're before reload, and
2484 able to create a temporary register, do so. If we're after reload,
2485 we've got an early-clobber destination register, so use that.
2486 Otherwise, we can't create pseudos and we can't yet guarantee that
2487 operands[0] is safe to write, so FAIL to split. */
2488
2489 rtx scratch;
2490 if (reload_completed)
2491 scratch = operands[0];
2492 else if (can_create_pseudo_p ())
2493 scratch = gen_reg_rtx (DImode);
2494 else
2495 FAIL;
2496
2497 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2498 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2499 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2500 DONE;
2501 }
2502 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2503 (set_attr "length" "4,4,4,12")]
2504 )
2505
2506 (define_insn_and_split "aarch64_simd_bsldi_alt"
2507 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2508 (xor:DI
2509 (and:DI
2510 (xor:DI
2511 (match_operand:DI 3 "register_operand" "w,w,0,r")
2512 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2513 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2514 (match_dup:DI 2)
2515 ))]
2516 "TARGET_SIMD"
2517 "@
2518 bsl\\t%0.8b, %3.8b, %2.8b
2519 bit\\t%0.8b, %3.8b, %1.8b
2520 bif\\t%0.8b, %2.8b, %1.8b
2521 #"
2522 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2523 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2524 {
2525 /* Split back to individual operations. If we're before reload, and
2526 able to create a temporary register, do so. If we're after reload,
2527 we've got an early-clobber destination register, so use that.
2528 Otherwise, we can't create pseudos and we can't yet guarantee that
2529 operands[0] is safe to write, so FAIL to split. */
2530
2531 rtx scratch;
2532 if (reload_completed)
2533 scratch = operands[0];
2534 else if (can_create_pseudo_p ())
2535 scratch = gen_reg_rtx (DImode);
2536 else
2537 FAIL;
2538
2539 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2540 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2541 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2542 DONE;
2543 }
2544 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2545 (set_attr "length" "4,4,4,12")]
2546 )
2547
2548 (define_expand "aarch64_simd_bsl<mode>"
2549 [(match_operand:VALLDIF 0 "register_operand")
2550 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2551 (match_operand:VALLDIF 2 "register_operand")
2552 (match_operand:VALLDIF 3 "register_operand")]
2553 "TARGET_SIMD"
2554 {
2555 /* We can't alias operands together if they have different modes. */
2556 rtx tmp = operands[0];
2557 if (FLOAT_MODE_P (<MODE>mode))
2558 {
2559 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2560 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2561 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2562 }
2563 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2564 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2565 operands[1],
2566 operands[2],
2567 operands[3]));
2568 if (tmp != operands[0])
2569 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2570
2571 DONE;
2572 })
2573
2574 (define_expand "vcond_mask_<mode><v_int_equiv>"
2575 [(match_operand:VALLDI 0 "register_operand")
2576 (match_operand:VALLDI 1 "nonmemory_operand")
2577 (match_operand:VALLDI 2 "nonmemory_operand")
2578 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2579 "TARGET_SIMD"
2580 {
2581 /* If we have (a = (P) ? -1 : 0);
2582 Then we can simply move the generated mask (result must be int). */
2583 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2584 && operands[2] == CONST0_RTX (<MODE>mode))
2585 emit_move_insn (operands[0], operands[3]);
2586 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2587 else if (operands[1] == CONST0_RTX (<MODE>mode)
2588 && operands[2] == CONSTM1_RTX (<MODE>mode))
2589 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2590 else
2591 {
2592 if (!REG_P (operands[1]))
2593 operands[1] = force_reg (<MODE>mode, operands[1]);
2594 if (!REG_P (operands[2]))
2595 operands[2] = force_reg (<MODE>mode, operands[2]);
2596 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2597 operands[1], operands[2]));
2598 }
2599
2600 DONE;
2601 })
2602
2603 ;; Patterns comparing two vectors to produce a mask.
2604
2605 (define_expand "vec_cmp<mode><mode>"
2606 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2607 (match_operator 1 "comparison_operator"
2608 [(match_operand:VSDQ_I_DI 2 "register_operand")
2609 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2610 "TARGET_SIMD"
2611 {
2612 rtx mask = operands[0];
2613 enum rtx_code code = GET_CODE (operands[1]);
2614
2615 switch (code)
2616 {
2617 case NE:
2618 case LE:
2619 case LT:
2620 case GE:
2621 case GT:
2622 case EQ:
2623 if (operands[3] == CONST0_RTX (<MODE>mode))
2624 break;
2625
2626 /* Fall through. */
2627 default:
2628 if (!REG_P (operands[3]))
2629 operands[3] = force_reg (<MODE>mode, operands[3]);
2630
2631 break;
2632 }
2633
2634 switch (code)
2635 {
2636 case LT:
2637 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2638 break;
2639
2640 case GE:
2641 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2642 break;
2643
2644 case LE:
2645 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2646 break;
2647
2648 case GT:
2649 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2650 break;
2651
2652 case LTU:
2653 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2654 break;
2655
2656 case GEU:
2657 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2658 break;
2659
2660 case LEU:
2661 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2662 break;
2663
2664 case GTU:
2665 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2666 break;
2667
2668 case NE:
2669 /* Handle NE as !EQ. */
2670 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2671 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2672 break;
2673
2674 case EQ:
2675 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2676 break;
2677
2678 default:
2679 gcc_unreachable ();
2680 }
2681
2682 DONE;
2683 })
2684
2685 (define_expand "vec_cmp<mode><v_int_equiv>"
2686 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2687 (match_operator 1 "comparison_operator"
2688 [(match_operand:VDQF 2 "register_operand")
2689 (match_operand:VDQF 3 "nonmemory_operand")]))]
2690 "TARGET_SIMD"
2691 {
2692 int use_zero_form = 0;
2693 enum rtx_code code = GET_CODE (operands[1]);
2694 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2695
2696 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2697
2698 switch (code)
2699 {
2700 case LE:
2701 case LT:
2702 case GE:
2703 case GT:
2704 case EQ:
2705 if (operands[3] == CONST0_RTX (<MODE>mode))
2706 {
2707 use_zero_form = 1;
2708 break;
2709 }
2710 /* Fall through. */
2711 default:
2712 if (!REG_P (operands[3]))
2713 operands[3] = force_reg (<MODE>mode, operands[3]);
2714
2715 break;
2716 }
2717
2718 switch (code)
2719 {
2720 case LT:
2721 if (use_zero_form)
2722 {
2723 comparison = gen_aarch64_cmlt<mode>;
2724 break;
2725 }
2726 /* Fall through. */
2727 case UNLT:
2728 std::swap (operands[2], operands[3]);
2729 /* Fall through. */
2730 case UNGT:
2731 case GT:
2732 comparison = gen_aarch64_cmgt<mode>;
2733 break;
2734 case LE:
2735 if (use_zero_form)
2736 {
2737 comparison = gen_aarch64_cmle<mode>;
2738 break;
2739 }
2740 /* Fall through. */
2741 case UNLE:
2742 std::swap (operands[2], operands[3]);
2743 /* Fall through. */
2744 case UNGE:
2745 case GE:
2746 comparison = gen_aarch64_cmge<mode>;
2747 break;
2748 case NE:
2749 case EQ:
2750 comparison = gen_aarch64_cmeq<mode>;
2751 break;
2752 case UNEQ:
2753 case ORDERED:
2754 case UNORDERED:
2755 case LTGT:
2756 break;
2757 default:
2758 gcc_unreachable ();
2759 }
2760
2761 switch (code)
2762 {
2763 case UNGE:
2764 case UNGT:
2765 case UNLE:
2766 case UNLT:
2767 {
2768 /* All of the above must not raise any FP exceptions. Thus we first
2769 check each operand for NaNs and force any elements containing NaN to
2770 zero before using them in the compare.
2771 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2772 (cm<cc> (isnan (a) ? 0.0 : a,
2773 isnan (b) ? 0.0 : b))
2774 We use the following transformations for doing the comparisions:
2775 a UNGE b -> a GE b
2776 a UNGT b -> a GT b
2777 a UNLE b -> b GE a
2778 a UNLT b -> b GT a. */
2779
2780 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2781 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2782 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2783 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2784 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2785 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2786 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2787 lowpart_subreg (<V_INT_EQUIV>mode,
2788 operands[2],
2789 <MODE>mode)));
2790 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2791 lowpart_subreg (<V_INT_EQUIV>mode,
2792 operands[3],
2793 <MODE>mode)));
2794 gcc_assert (comparison != NULL);
2795 emit_insn (comparison (operands[0],
2796 lowpart_subreg (<MODE>mode,
2797 tmp0, <V_INT_EQUIV>mode),
2798 lowpart_subreg (<MODE>mode,
2799 tmp1, <V_INT_EQUIV>mode)));
2800 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2801 }
2802 break;
2803
2804 case LT:
2805 case LE:
2806 case GT:
2807 case GE:
2808 case EQ:
2809 case NE:
2810 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2811 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2812 a GE b -> a GE b
2813 a GT b -> a GT b
2814 a LE b -> b GE a
2815 a LT b -> b GT a
2816 a EQ b -> a EQ b
2817 a NE b -> ~(a EQ b) */
2818 gcc_assert (comparison != NULL);
2819 emit_insn (comparison (operands[0], operands[2], operands[3]));
2820 if (code == NE)
2821 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2822 break;
2823
2824 case LTGT:
2825 /* LTGT is not guranteed to not generate a FP exception. So let's
2826 go the faster way : ((a > b) || (b > a)). */
2827 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2828 operands[2], operands[3]));
2829 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2830 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2831 break;
2832
2833 case ORDERED:
2834 case UNORDERED:
2835 case UNEQ:
2836 /* cmeq (a, a) & cmeq (b, b). */
2837 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2838 operands[2], operands[2]));
2839 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2840 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2841
2842 if (code == UNORDERED)
2843 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2844 else if (code == UNEQ)
2845 {
2846 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2847 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2848 }
2849 break;
2850
2851 default:
2852 gcc_unreachable ();
2853 }
2854
2855 DONE;
2856 })
2857
2858 (define_expand "vec_cmpu<mode><mode>"
2859 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2860 (match_operator 1 "comparison_operator"
2861 [(match_operand:VSDQ_I_DI 2 "register_operand")
2862 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2863 "TARGET_SIMD"
2864 {
2865 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2866 operands[2], operands[3]));
2867 DONE;
2868 })
2869
2870 (define_expand "vcond<mode><mode>"
2871 [(set (match_operand:VALLDI 0 "register_operand")
2872 (if_then_else:VALLDI
2873 (match_operator 3 "comparison_operator"
2874 [(match_operand:VALLDI 4 "register_operand")
2875 (match_operand:VALLDI 5 "nonmemory_operand")])
2876 (match_operand:VALLDI 1 "nonmemory_operand")
2877 (match_operand:VALLDI 2 "nonmemory_operand")))]
2878 "TARGET_SIMD"
2879 {
2880 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2881 enum rtx_code code = GET_CODE (operands[3]);
2882
2883 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2884 it as well as switch operands 1/2 in order to avoid the additional
2885 NOT instruction. */
2886 if (code == NE)
2887 {
2888 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2889 operands[4], operands[5]);
2890 std::swap (operands[1], operands[2]);
2891 }
2892 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2893 operands[4], operands[5]));
2894 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2895 operands[2], mask));
2896
2897 DONE;
2898 })
2899
2900 (define_expand "vcond<v_cmp_mixed><mode>"
2901 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2902 (if_then_else:<V_cmp_mixed>
2903 (match_operator 3 "comparison_operator"
2904 [(match_operand:VDQF_COND 4 "register_operand")
2905 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2906 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2907 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2908 "TARGET_SIMD"
2909 {
2910 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2911 enum rtx_code code = GET_CODE (operands[3]);
2912
2913 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2914 it as well as switch operands 1/2 in order to avoid the additional
2915 NOT instruction. */
2916 if (code == NE)
2917 {
2918 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2919 operands[4], operands[5]);
2920 std::swap (operands[1], operands[2]);
2921 }
2922 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2923 operands[4], operands[5]));
2924 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2925 operands[0], operands[1],
2926 operands[2], mask));
2927
2928 DONE;
2929 })
2930
2931 (define_expand "vcondu<mode><mode>"
2932 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2933 (if_then_else:VSDQ_I_DI
2934 (match_operator 3 "comparison_operator"
2935 [(match_operand:VSDQ_I_DI 4 "register_operand")
2936 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2937 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2938 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2939 "TARGET_SIMD"
2940 {
2941 rtx mask = gen_reg_rtx (<MODE>mode);
2942 enum rtx_code code = GET_CODE (operands[3]);
2943
2944 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2945 it as well as switch operands 1/2 in order to avoid the additional
2946 NOT instruction. */
2947 if (code == NE)
2948 {
2949 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2950 operands[4], operands[5]);
2951 std::swap (operands[1], operands[2]);
2952 }
2953 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2954 operands[4], operands[5]));
2955 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2956 operands[2], mask));
2957 DONE;
2958 })
2959
2960 (define_expand "vcondu<mode><v_cmp_mixed>"
2961 [(set (match_operand:VDQF 0 "register_operand")
2962 (if_then_else:VDQF
2963 (match_operator 3 "comparison_operator"
2964 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2965 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2966 (match_operand:VDQF 1 "nonmemory_operand")
2967 (match_operand:VDQF 2 "nonmemory_operand")))]
2968 "TARGET_SIMD"
2969 {
2970 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2971 enum rtx_code code = GET_CODE (operands[3]);
2972
2973 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974 it as well as switch operands 1/2 in order to avoid the additional
2975 NOT instruction. */
2976 if (code == NE)
2977 {
2978 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979 operands[4], operands[5]);
2980 std::swap (operands[1], operands[2]);
2981 }
2982 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2983 mask, operands[3],
2984 operands[4], operands[5]));
2985 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2986 operands[2], mask));
2987 DONE;
2988 })
2989
2990 ;; Patterns for AArch64 SIMD Intrinsics.
2991
2992 ;; Lane extraction with sign extension to general purpose register.
2993 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2994 [(set (match_operand:GPI 0 "register_operand" "=r")
2995 (sign_extend:GPI
2996 (vec_select:<VEL>
2997 (match_operand:VDQQH 1 "register_operand" "w")
2998 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2999 "TARGET_SIMD"
3000 {
3001 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3002 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3003 }
3004 [(set_attr "type" "neon_to_gp<q>")]
3005 )
3006
3007 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3008 [(set (match_operand:SI 0 "register_operand" "=r")
3009 (zero_extend:SI
3010 (vec_select:<VEL>
3011 (match_operand:VDQQH 1 "register_operand" "w")
3012 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3013 "TARGET_SIMD"
3014 {
3015 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3016 return "umov\\t%w0, %1.<Vetype>[%2]";
3017 }
3018 [(set_attr "type" "neon_to_gp<q>")]
3019 )
3020
3021 ;; Lane extraction of a value, neither sign nor zero extension
3022 ;; is guaranteed so upper bits should be considered undefined.
3023 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3024 (define_insn "aarch64_get_lane<mode>"
3025 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3026 (vec_select:<VEL>
3027 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3028 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3029 "TARGET_SIMD"
3030 {
3031 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3032 switch (which_alternative)
3033 {
3034 case 0:
3035 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3036 case 1:
3037 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3038 case 2:
3039 return "st1\\t{%1.<Vetype>}[%2], %0";
3040 default:
3041 gcc_unreachable ();
3042 }
3043 }
3044 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3045 )
3046
3047 (define_insn "load_pair_lanes<mode>"
3048 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3049 (vec_concat:<VDBL>
3050 (match_operand:VDC 1 "memory_operand" "Utq")
3051 (match_operand:VDC 2 "memory_operand" "m")))]
3052 "TARGET_SIMD && !STRICT_ALIGNMENT
3053 && rtx_equal_p (XEXP (operands[2], 0),
3054 plus_constant (Pmode,
3055 XEXP (operands[1], 0),
3056 GET_MODE_SIZE (<MODE>mode)))"
3057 "ldr\\t%q0, %1"
3058 [(set_attr "type" "neon_load1_1reg_q")]
3059 )
3060
3061 (define_insn "store_pair_lanes<mode>"
3062 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3063 (vec_concat:<VDBL>
3064 (match_operand:VDC 1 "register_operand" "w, r")
3065 (match_operand:VDC 2 "register_operand" "w, r")))]
3066 "TARGET_SIMD"
3067 "@
3068 stp\\t%d1, %d2, %y0
3069 stp\\t%x1, %x2, %y0"
3070 [(set_attr "type" "neon_stp, store_16")]
3071 )
3072
3073 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3074 ;; dest vector.
3075
3076 (define_insn "*aarch64_combinez<mode>"
3077 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3078 (vec_concat:<VDBL>
3079 (match_operand:VDC 1 "general_operand" "w,?r,m")
3080 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3081 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3082 "@
3083 mov\\t%0.8b, %1.8b
3084 fmov\t%d0, %1
3085 ldr\\t%d0, %1"
3086 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3087 (set_attr "simd" "yes,*,yes")
3088 (set_attr "fp" "*,yes,*")]
3089 )
3090
3091 (define_insn "*aarch64_combinez_be<mode>"
3092 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3093 (vec_concat:<VDBL>
3094 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3095 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3096 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3097 "@
3098 mov\\t%0.8b, %1.8b
3099 fmov\t%d0, %1
3100 ldr\\t%d0, %1"
3101 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3102 (set_attr "simd" "yes,*,yes")
3103 (set_attr "fp" "*,yes,*")]
3104 )
3105
3106 (define_expand "aarch64_combine<mode>"
3107 [(match_operand:<VDBL> 0 "register_operand")
3108 (match_operand:VDC 1 "register_operand")
3109 (match_operand:VDC 2 "register_operand")]
3110 "TARGET_SIMD"
3111 {
3112 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3113
3114 DONE;
3115 }
3116 )
3117
3118 (define_expand "aarch64_simd_combine<mode>"
3119 [(match_operand:<VDBL> 0 "register_operand")
3120 (match_operand:VDC 1 "register_operand")
3121 (match_operand:VDC 2 "register_operand")]
3122 "TARGET_SIMD"
3123 {
3124 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3125 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3126 DONE;
3127 }
3128 [(set_attr "type" "multiple")]
3129 )
3130
3131 ;; <su><addsub>l<q>.
3132
3133 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3134 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3135 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3136 (match_operand:VQW 1 "register_operand" "w")
3137 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3138 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3139 (match_operand:VQW 2 "register_operand" "w")
3140 (match_dup 3)))))]
3141 "TARGET_SIMD"
3142 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3143 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3144 )
3145
3146 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3147 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3148 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3149 (match_operand:VQW 1 "register_operand" "w")
3150 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3151 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3152 (match_operand:VQW 2 "register_operand" "w")
3153 (match_dup 3)))))]
3154 "TARGET_SIMD"
3155 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3156 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3157 )
3158
3159
3160 (define_expand "aarch64_saddl2<mode>"
3161 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3162 (match_operand:VQW 1 "register_operand" "w")
3163 (match_operand:VQW 2 "register_operand" "w")]
3164 "TARGET_SIMD"
3165 {
3166 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3167 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3168 operands[2], p));
3169 DONE;
3170 })
3171
3172 (define_expand "aarch64_uaddl2<mode>"
3173 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3174 (match_operand:VQW 1 "register_operand" "w")
3175 (match_operand:VQW 2 "register_operand" "w")]
3176 "TARGET_SIMD"
3177 {
3178 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3179 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3180 operands[2], p));
3181 DONE;
3182 })
3183
3184 (define_expand "aarch64_ssubl2<mode>"
3185 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186 (match_operand:VQW 1 "register_operand" "w")
3187 (match_operand:VQW 2 "register_operand" "w")]
3188 "TARGET_SIMD"
3189 {
3190 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3192 operands[2], p));
3193 DONE;
3194 })
3195
3196 (define_expand "aarch64_usubl2<mode>"
3197 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198 (match_operand:VQW 1 "register_operand" "w")
3199 (match_operand:VQW 2 "register_operand" "w")]
3200 "TARGET_SIMD"
3201 {
3202 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3204 operands[2], p));
3205 DONE;
3206 })
3207
3208 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3209 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3210 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3211 (match_operand:VD_BHSI 1 "register_operand" "w"))
3212 (ANY_EXTEND:<VWIDE>
3213 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3214 "TARGET_SIMD"
3215 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3216 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3217 )
3218
3219 ;; <su><addsub>w<q>.
3220
3221 (define_expand "widen_ssum<mode>3"
3222 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3223 (plus:<VDBLW> (sign_extend:<VDBLW>
3224 (match_operand:VQW 1 "register_operand" ""))
3225 (match_operand:<VDBLW> 2 "register_operand" "")))]
3226 "TARGET_SIMD"
3227 {
3228 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3229 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3230
3231 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3232 operands[1], p));
3233 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3234 DONE;
3235 }
3236 )
3237
3238 (define_expand "widen_ssum<mode>3"
3239 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3240 (plus:<VWIDE> (sign_extend:<VWIDE>
3241 (match_operand:VD_BHSI 1 "register_operand" ""))
3242 (match_operand:<VWIDE> 2 "register_operand" "")))]
3243 "TARGET_SIMD"
3244 {
3245 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3246 DONE;
3247 })
3248
3249 (define_expand "widen_usum<mode>3"
3250 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3251 (plus:<VDBLW> (zero_extend:<VDBLW>
3252 (match_operand:VQW 1 "register_operand" ""))
3253 (match_operand:<VDBLW> 2 "register_operand" "")))]
3254 "TARGET_SIMD"
3255 {
3256 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3257 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3258
3259 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3260 operands[1], p));
3261 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3262 DONE;
3263 }
3264 )
3265
3266 (define_expand "widen_usum<mode>3"
3267 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3268 (plus:<VWIDE> (zero_extend:<VWIDE>
3269 (match_operand:VD_BHSI 1 "register_operand" ""))
3270 (match_operand:<VWIDE> 2 "register_operand" "")))]
3271 "TARGET_SIMD"
3272 {
3273 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3274 DONE;
3275 })
3276
3277 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3278 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3279 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3280 (ANY_EXTEND:<VWIDE>
3281 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3282 "TARGET_SIMD"
3283 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3284 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3285 )
3286
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3290 (ANY_EXTEND:<VWIDE>
3291 (vec_select:<VHALF>
3292 (match_operand:VQW 2 "register_operand" "w")
3293 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3294 "TARGET_SIMD"
3295 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3296 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3297 )
3298
3299 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3301 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3302 (ANY_EXTEND:<VWIDE>
3303 (vec_select:<VHALF>
3304 (match_operand:VQW 2 "register_operand" "w")
3305 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3306 "TARGET_SIMD"
3307 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3309 )
3310
3311 (define_expand "aarch64_saddw2<mode>"
3312 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3313 (match_operand:<VWIDE> 1 "register_operand" "w")
3314 (match_operand:VQW 2 "register_operand" "w")]
3315 "TARGET_SIMD"
3316 {
3317 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3318 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3319 operands[2], p));
3320 DONE;
3321 })
3322
3323 (define_expand "aarch64_uaddw2<mode>"
3324 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3325 (match_operand:<VWIDE> 1 "register_operand" "w")
3326 (match_operand:VQW 2 "register_operand" "w")]
3327 "TARGET_SIMD"
3328 {
3329 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3330 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3331 operands[2], p));
3332 DONE;
3333 })
3334
3335
3336 (define_expand "aarch64_ssubw2<mode>"
3337 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3338 (match_operand:<VWIDE> 1 "register_operand" "w")
3339 (match_operand:VQW 2 "register_operand" "w")]
3340 "TARGET_SIMD"
3341 {
3342 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3343 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3344 operands[2], p));
3345 DONE;
3346 })
3347
3348 (define_expand "aarch64_usubw2<mode>"
3349 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3350 (match_operand:<VWIDE> 1 "register_operand" "w")
3351 (match_operand:VQW 2 "register_operand" "w")]
3352 "TARGET_SIMD"
3353 {
3354 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3355 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3356 operands[2], p));
3357 DONE;
3358 })
3359
3360 ;; <su><r>h<addsub>.
3361
3362 (define_insn "aarch64_<sur>h<addsub><mode>"
3363 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3364 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3365 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3366 HADDSUB))]
3367 "TARGET_SIMD"
3368 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3369 [(set_attr "type" "neon_<addsub>_halve<q>")]
3370 )
3371
3372 ;; <r><addsub>hn<q>.
3373
3374 (define_insn "aarch64_<sur><addsub>hn<mode>"
3375 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3376 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3377 (match_operand:VQN 2 "register_operand" "w")]
3378 ADDSUBHN))]
3379 "TARGET_SIMD"
3380 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3381 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3382 )
3383
3384 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3385 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3386 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3387 (match_operand:VQN 2 "register_operand" "w")
3388 (match_operand:VQN 3 "register_operand" "w")]
3389 ADDSUBHN2))]
3390 "TARGET_SIMD"
3391 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3392 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3393 )
3394
3395 ;; pmul.
3396
3397 (define_insn "aarch64_pmul<mode>"
3398 [(set (match_operand:VB 0 "register_operand" "=w")
3399 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3400 (match_operand:VB 2 "register_operand" "w")]
3401 UNSPEC_PMUL))]
3402 "TARGET_SIMD"
3403 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3404 [(set_attr "type" "neon_mul_<Vetype><q>")]
3405 )
3406
3407 ;; fmulx.
3408
3409 (define_insn "aarch64_fmulx<mode>"
3410 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3411 (unspec:VHSDF_HSDF
3412 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3413 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3414 UNSPEC_FMULX))]
3415 "TARGET_SIMD"
3416 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3417 [(set_attr "type" "neon_fp_mul_<stype>")]
3418 )
3419
3420 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3421
3422 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3423 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3424 (unspec:VDQSF
3425 [(match_operand:VDQSF 1 "register_operand" "w")
3426 (vec_duplicate:VDQSF
3427 (vec_select:<VEL>
3428 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3429 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3430 UNSPEC_FMULX))]
3431 "TARGET_SIMD"
3432 {
3433 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3434 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3435 }
3436 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3437 )
3438
3439 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3440
3441 (define_insn "*aarch64_mulx_elt<mode>"
3442 [(set (match_operand:VDQF 0 "register_operand" "=w")
3443 (unspec:VDQF
3444 [(match_operand:VDQF 1 "register_operand" "w")
3445 (vec_duplicate:VDQF
3446 (vec_select:<VEL>
3447 (match_operand:VDQF 2 "register_operand" "w")
3448 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3449 UNSPEC_FMULX))]
3450 "TARGET_SIMD"
3451 {
3452 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3453 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3454 }
3455 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3456 )
3457
3458 ;; vmulxq_lane
3459
3460 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3461 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3462 (unspec:VHSDF
3463 [(match_operand:VHSDF 1 "register_operand" "w")
3464 (vec_duplicate:VHSDF
3465 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3466 UNSPEC_FMULX))]
3467 "TARGET_SIMD"
3468 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3469 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3470 )
3471
3472 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3473 ;; vmulxd_lane_f64 == vmulx_lane_f64
3474 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3475
3476 (define_insn "*aarch64_vgetfmulx<mode>"
3477 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3478 (unspec:<VEL>
3479 [(match_operand:<VEL> 1 "register_operand" "w")
3480 (vec_select:<VEL>
3481 (match_operand:VDQF 2 "register_operand" "w")
3482 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3483 UNSPEC_FMULX))]
3484 "TARGET_SIMD"
3485 {
3486 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3487 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3488 }
3489 [(set_attr "type" "fmul<Vetype>")]
3490 )
3491 ;; <su>q<addsub>
3492
3493 (define_insn "aarch64_<su_optab><optab><mode>"
3494 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3495 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3496 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3497 "TARGET_SIMD"
3498 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3499 [(set_attr "type" "neon_<optab><q>")]
3500 )
3501
3502 ;; suqadd and usqadd
3503
3504 (define_insn "aarch64_<sur>qadd<mode>"
3505 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3506 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3507 (match_operand:VSDQ_I 2 "register_operand" "w")]
3508 USSUQADD))]
3509 "TARGET_SIMD"
3510 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3511 [(set_attr "type" "neon_qadd<q>")]
3512 )
3513
3514 ;; sqmovun
3515
3516 (define_insn "aarch64_sqmovun<mode>"
3517 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3518 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3519 UNSPEC_SQXTUN))]
3520 "TARGET_SIMD"
3521 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3522 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3523 )
3524
3525 ;; sqmovn and uqmovn
3526
3527 (define_insn "aarch64_<sur>qmovn<mode>"
3528 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3529 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3530 SUQMOVN))]
3531 "TARGET_SIMD"
3532 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3533 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3534 )
3535
3536 ;; <su>q<absneg>
3537
3538 (define_insn "aarch64_s<optab><mode>"
3539 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3540 (UNQOPS:VSDQ_I
3541 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3542 "TARGET_SIMD"
3543 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3544 [(set_attr "type" "neon_<optab><q>")]
3545 )
3546
3547 ;; sq<r>dmulh.
3548
3549 (define_insn "aarch64_sq<r>dmulh<mode>"
3550 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3551 (unspec:VSDQ_HSI
3552 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3553 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3554 VQDMULH))]
3555 "TARGET_SIMD"
3556 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3557 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3558 )
3559
3560 ;; sq<r>dmulh_lane
3561
3562 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3563 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3564 (unspec:VDQHS
3565 [(match_operand:VDQHS 1 "register_operand" "w")
3566 (vec_select:<VEL>
3567 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3568 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3569 VQDMULH))]
3570 "TARGET_SIMD"
3571 "*
3572 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3573 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3574 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3575 )
3576
3577 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3578 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3579 (unspec:VDQHS
3580 [(match_operand:VDQHS 1 "register_operand" "w")
3581 (vec_select:<VEL>
3582 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3583 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3584 VQDMULH))]
3585 "TARGET_SIMD"
3586 "*
3587 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3588 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3589 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3590 )
3591
3592 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3593 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3594 (unspec:SD_HSI
3595 [(match_operand:SD_HSI 1 "register_operand" "w")
3596 (vec_select:<VEL>
3597 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3598 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3599 VQDMULH))]
3600 "TARGET_SIMD"
3601 "*
3602 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3603 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3604 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3605 )
3606
3607 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3608 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3609 (unspec:SD_HSI
3610 [(match_operand:SD_HSI 1 "register_operand" "w")
3611 (vec_select:<VEL>
3612 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3613 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3614 VQDMULH))]
3615 "TARGET_SIMD"
3616 "*
3617 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3618 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3619 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3620 )
3621
3622 ;; sqrdml[as]h.
3623
3624 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3625 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3626 (unspec:VSDQ_HSI
3627 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3628 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3629 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3630 SQRDMLH_AS))]
3631 "TARGET_SIMD_RDMA"
3632 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3633 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3634 )
3635
3636 ;; sqrdml[as]h_lane.
3637
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3640 (unspec:VDQHS
3641 [(match_operand:VDQHS 1 "register_operand" "0")
3642 (match_operand:VDQHS 2 "register_operand" "w")
3643 (vec_select:<VEL>
3644 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3646 SQRDMLH_AS))]
3647 "TARGET_SIMD_RDMA"
3648 {
3649 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3650 return
3651 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3652 }
3653 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3654 )
3655
3656 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3657 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3658 (unspec:SD_HSI
3659 [(match_operand:SD_HSI 1 "register_operand" "0")
3660 (match_operand:SD_HSI 2 "register_operand" "w")
3661 (vec_select:<VEL>
3662 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3663 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3664 SQRDMLH_AS))]
3665 "TARGET_SIMD_RDMA"
3666 {
3667 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3668 return
3669 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3670 }
3671 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3672 )
3673
3674 ;; sqrdml[as]h_laneq.
3675
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3678 (unspec:VDQHS
3679 [(match_operand:VDQHS 1 "register_operand" "0")
3680 (match_operand:VDQHS 2 "register_operand" "w")
3681 (vec_select:<VEL>
3682 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3684 SQRDMLH_AS))]
3685 "TARGET_SIMD_RDMA"
3686 {
3687 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3688 return
3689 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3690 }
3691 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3692 )
3693
3694 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3695 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3696 (unspec:SD_HSI
3697 [(match_operand:SD_HSI 1 "register_operand" "0")
3698 (match_operand:SD_HSI 2 "register_operand" "w")
3699 (vec_select:<VEL>
3700 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3701 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3702 SQRDMLH_AS))]
3703 "TARGET_SIMD_RDMA"
3704 {
3705 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3706 return
3707 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3708 }
3709 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3710 )
3711
3712 ;; vqdml[sa]l
3713
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3715 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3716 (SBINQOPS:<VWIDE>
3717 (match_operand:<VWIDE> 1 "register_operand" "0")
3718 (ss_ashift:<VWIDE>
3719 (mult:<VWIDE>
3720 (sign_extend:<VWIDE>
3721 (match_operand:VSD_HSI 2 "register_operand" "w"))
3722 (sign_extend:<VWIDE>
3723 (match_operand:VSD_HSI 3 "register_operand" "w")))
3724 (const_int 1))))]
3725 "TARGET_SIMD"
3726 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3727 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3728 )
3729
3730 ;; vqdml[sa]l_lane
3731
3732 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3733 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3734 (SBINQOPS:<VWIDE>
3735 (match_operand:<VWIDE> 1 "register_operand" "0")
3736 (ss_ashift:<VWIDE>
3737 (mult:<VWIDE>
3738 (sign_extend:<VWIDE>
3739 (match_operand:VD_HSI 2 "register_operand" "w"))
3740 (sign_extend:<VWIDE>
3741 (vec_duplicate:VD_HSI
3742 (vec_select:<VEL>
3743 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3744 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3745 ))
3746 (const_int 1))))]
3747 "TARGET_SIMD"
3748 {
3749 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3750 return
3751 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3752 }
3753 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3754 )
3755
3756 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3757 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3758 (SBINQOPS:<VWIDE>
3759 (match_operand:<VWIDE> 1 "register_operand" "0")
3760 (ss_ashift:<VWIDE>
3761 (mult:<VWIDE>
3762 (sign_extend:<VWIDE>
3763 (match_operand:VD_HSI 2 "register_operand" "w"))
3764 (sign_extend:<VWIDE>
3765 (vec_duplicate:VD_HSI
3766 (vec_select:<VEL>
3767 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3768 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3769 ))
3770 (const_int 1))))]
3771 "TARGET_SIMD"
3772 {
3773 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3774 return
3775 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3776 }
3777 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3778 )
3779
3780 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3781 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3782 (SBINQOPS:<VWIDE>
3783 (match_operand:<VWIDE> 1 "register_operand" "0")
3784 (ss_ashift:<VWIDE>
3785 (mult:<VWIDE>
3786 (sign_extend:<VWIDE>
3787 (match_operand:SD_HSI 2 "register_operand" "w"))
3788 (sign_extend:<VWIDE>
3789 (vec_select:<VEL>
3790 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3791 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3792 )
3793 (const_int 1))))]
3794 "TARGET_SIMD"
3795 {
3796 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3797 return
3798 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3799 }
3800 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3801 )
3802
3803 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3804 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3805 (SBINQOPS:<VWIDE>
3806 (match_operand:<VWIDE> 1 "register_operand" "0")
3807 (ss_ashift:<VWIDE>
3808 (mult:<VWIDE>
3809 (sign_extend:<VWIDE>
3810 (match_operand:SD_HSI 2 "register_operand" "w"))
3811 (sign_extend:<VWIDE>
3812 (vec_select:<VEL>
3813 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3814 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3815 )
3816 (const_int 1))))]
3817 "TARGET_SIMD"
3818 {
3819 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3820 return
3821 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3822 }
3823 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3824 )
3825
3826 ;; vqdml[sa]l_n
3827
3828 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3829 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3830 (SBINQOPS:<VWIDE>
3831 (match_operand:<VWIDE> 1 "register_operand" "0")
3832 (ss_ashift:<VWIDE>
3833 (mult:<VWIDE>
3834 (sign_extend:<VWIDE>
3835 (match_operand:VD_HSI 2 "register_operand" "w"))
3836 (sign_extend:<VWIDE>
3837 (vec_duplicate:VD_HSI
3838 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3839 (const_int 1))))]
3840 "TARGET_SIMD"
3841 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3842 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3843 )
3844
3845 ;; sqdml[as]l2
3846
3847 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3848 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3849 (SBINQOPS:<VWIDE>
3850 (match_operand:<VWIDE> 1 "register_operand" "0")
3851 (ss_ashift:<VWIDE>
3852 (mult:<VWIDE>
3853 (sign_extend:<VWIDE>
3854 (vec_select:<VHALF>
3855 (match_operand:VQ_HSI 2 "register_operand" "w")
3856 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3857 (sign_extend:<VWIDE>
3858 (vec_select:<VHALF>
3859 (match_operand:VQ_HSI 3 "register_operand" "w")
3860 (match_dup 4))))
3861 (const_int 1))))]
3862 "TARGET_SIMD"
3863 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3864 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3865 )
3866
3867 (define_expand "aarch64_sqdmlal2<mode>"
3868 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3869 (match_operand:<VWIDE> 1 "register_operand" "w")
3870 (match_operand:VQ_HSI 2 "register_operand" "w")
3871 (match_operand:VQ_HSI 3 "register_operand" "w")]
3872 "TARGET_SIMD"
3873 {
3874 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3875 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3876 operands[2], operands[3], p));
3877 DONE;
3878 })
3879
3880 (define_expand "aarch64_sqdmlsl2<mode>"
3881 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3882 (match_operand:<VWIDE> 1 "register_operand" "w")
3883 (match_operand:VQ_HSI 2 "register_operand" "w")
3884 (match_operand:VQ_HSI 3 "register_operand" "w")]
3885 "TARGET_SIMD"
3886 {
3887 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3888 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3889 operands[2], operands[3], p));
3890 DONE;
3891 })
3892
3893 ;; vqdml[sa]l2_lane
3894
3895 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3896 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3897 (SBINQOPS:<VWIDE>
3898 (match_operand:<VWIDE> 1 "register_operand" "0")
3899 (ss_ashift:<VWIDE>
3900 (mult:<VWIDE>
3901 (sign_extend:<VWIDE>
3902 (vec_select:<VHALF>
3903 (match_operand:VQ_HSI 2 "register_operand" "w")
3904 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3905 (sign_extend:<VWIDE>
3906 (vec_duplicate:<VHALF>
3907 (vec_select:<VEL>
3908 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3909 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3910 ))))
3911 (const_int 1))))]
3912 "TARGET_SIMD"
3913 {
3914 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3915 return
3916 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3917 }
3918 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3919 )
3920
3921 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3922 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3923 (SBINQOPS:<VWIDE>
3924 (match_operand:<VWIDE> 1 "register_operand" "0")
3925 (ss_ashift:<VWIDE>
3926 (mult:<VWIDE>
3927 (sign_extend:<VWIDE>
3928 (vec_select:<VHALF>
3929 (match_operand:VQ_HSI 2 "register_operand" "w")
3930 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3931 (sign_extend:<VWIDE>
3932 (vec_duplicate:<VHALF>
3933 (vec_select:<VEL>
3934 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3935 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3936 ))))
3937 (const_int 1))))]
3938 "TARGET_SIMD"
3939 {
3940 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3941 return
3942 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3943 }
3944 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3945 )
3946
3947 (define_expand "aarch64_sqdmlal2_lane<mode>"
3948 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3949 (match_operand:<VWIDE> 1 "register_operand" "w")
3950 (match_operand:VQ_HSI 2 "register_operand" "w")
3951 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3952 (match_operand:SI 4 "immediate_operand" "i")]
3953 "TARGET_SIMD"
3954 {
3955 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3956 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3957 operands[2], operands[3],
3958 operands[4], p));
3959 DONE;
3960 })
3961
3962 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3963 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3964 (match_operand:<VWIDE> 1 "register_operand" "w")
3965 (match_operand:VQ_HSI 2 "register_operand" "w")
3966 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3967 (match_operand:SI 4 "immediate_operand" "i")]
3968 "TARGET_SIMD"
3969 {
3970 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3971 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3972 operands[2], operands[3],
3973 operands[4], p));
3974 DONE;
3975 })
3976
3977 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3978 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3979 (match_operand:<VWIDE> 1 "register_operand" "w")
3980 (match_operand:VQ_HSI 2 "register_operand" "w")
3981 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3982 (match_operand:SI 4 "immediate_operand" "i")]
3983 "TARGET_SIMD"
3984 {
3985 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3986 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3987 operands[2], operands[3],
3988 operands[4], p));
3989 DONE;
3990 })
3991
3992 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3993 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3994 (match_operand:<VWIDE> 1 "register_operand" "w")
3995 (match_operand:VQ_HSI 2 "register_operand" "w")
3996 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3997 (match_operand:SI 4 "immediate_operand" "i")]
3998 "TARGET_SIMD"
3999 {
4000 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4001 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4002 operands[2], operands[3],
4003 operands[4], p));
4004 DONE;
4005 })
4006
4007 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4008 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009 (SBINQOPS:<VWIDE>
4010 (match_operand:<VWIDE> 1 "register_operand" "0")
4011 (ss_ashift:<VWIDE>
4012 (mult:<VWIDE>
4013 (sign_extend:<VWIDE>
4014 (vec_select:<VHALF>
4015 (match_operand:VQ_HSI 2 "register_operand" "w")
4016 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017 (sign_extend:<VWIDE>
4018 (vec_duplicate:<VHALF>
4019 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4020 (const_int 1))))]
4021 "TARGET_SIMD"
4022 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4023 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4024 )
4025
4026 (define_expand "aarch64_sqdmlal2_n<mode>"
4027 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4028 (match_operand:<VWIDE> 1 "register_operand" "w")
4029 (match_operand:VQ_HSI 2 "register_operand" "w")
4030 (match_operand:<VEL> 3 "register_operand" "w")]
4031 "TARGET_SIMD"
4032 {
4033 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4034 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4035 operands[2], operands[3],
4036 p));
4037 DONE;
4038 })
4039
4040 (define_expand "aarch64_sqdmlsl2_n<mode>"
4041 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042 (match_operand:<VWIDE> 1 "register_operand" "w")
4043 (match_operand:VQ_HSI 2 "register_operand" "w")
4044 (match_operand:<VEL> 3 "register_operand" "w")]
4045 "TARGET_SIMD"
4046 {
4047 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4049 operands[2], operands[3],
4050 p));
4051 DONE;
4052 })
4053
4054 ;; vqdmull
4055
4056 (define_insn "aarch64_sqdmull<mode>"
4057 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4058 (ss_ashift:<VWIDE>
4059 (mult:<VWIDE>
4060 (sign_extend:<VWIDE>
4061 (match_operand:VSD_HSI 1 "register_operand" "w"))
4062 (sign_extend:<VWIDE>
4063 (match_operand:VSD_HSI 2 "register_operand" "w")))
4064 (const_int 1)))]
4065 "TARGET_SIMD"
4066 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4067 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4068 )
4069
4070 ;; vqdmull_lane
4071
4072 (define_insn "aarch64_sqdmull_lane<mode>"
4073 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4074 (ss_ashift:<VWIDE>
4075 (mult:<VWIDE>
4076 (sign_extend:<VWIDE>
4077 (match_operand:VD_HSI 1 "register_operand" "w"))
4078 (sign_extend:<VWIDE>
4079 (vec_duplicate:VD_HSI
4080 (vec_select:<VEL>
4081 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4082 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4083 ))
4084 (const_int 1)))]
4085 "TARGET_SIMD"
4086 {
4087 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4088 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4089 }
4090 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4091 )
4092
4093 (define_insn "aarch64_sqdmull_laneq<mode>"
4094 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4095 (ss_ashift:<VWIDE>
4096 (mult:<VWIDE>
4097 (sign_extend:<VWIDE>
4098 (match_operand:VD_HSI 1 "register_operand" "w"))
4099 (sign_extend:<VWIDE>
4100 (vec_duplicate:VD_HSI
4101 (vec_select:<VEL>
4102 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4103 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4104 ))
4105 (const_int 1)))]
4106 "TARGET_SIMD"
4107 {
4108 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4109 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4110 }
4111 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4112 )
4113
4114 (define_insn "aarch64_sqdmull_lane<mode>"
4115 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4116 (ss_ashift:<VWIDE>
4117 (mult:<VWIDE>
4118 (sign_extend:<VWIDE>
4119 (match_operand:SD_HSI 1 "register_operand" "w"))
4120 (sign_extend:<VWIDE>
4121 (vec_select:<VEL>
4122 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4123 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4124 ))
4125 (const_int 1)))]
4126 "TARGET_SIMD"
4127 {
4128 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4129 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4130 }
4131 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4132 )
4133
4134 (define_insn "aarch64_sqdmull_laneq<mode>"
4135 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4136 (ss_ashift:<VWIDE>
4137 (mult:<VWIDE>
4138 (sign_extend:<VWIDE>
4139 (match_operand:SD_HSI 1 "register_operand" "w"))
4140 (sign_extend:<VWIDE>
4141 (vec_select:<VEL>
4142 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4143 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4144 ))
4145 (const_int 1)))]
4146 "TARGET_SIMD"
4147 {
4148 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4149 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4150 }
4151 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4152 )
4153
4154 ;; vqdmull_n
4155
4156 (define_insn "aarch64_sqdmull_n<mode>"
4157 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4158 (ss_ashift:<VWIDE>
4159 (mult:<VWIDE>
4160 (sign_extend:<VWIDE>
4161 (match_operand:VD_HSI 1 "register_operand" "w"))
4162 (sign_extend:<VWIDE>
4163 (vec_duplicate:VD_HSI
4164 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4165 )
4166 (const_int 1)))]
4167 "TARGET_SIMD"
4168 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4169 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4170 )
4171
4172 ;; vqdmull2
4173
4174
4175
4176 (define_insn "aarch64_sqdmull2<mode>_internal"
4177 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4178 (ss_ashift:<VWIDE>
4179 (mult:<VWIDE>
4180 (sign_extend:<VWIDE>
4181 (vec_select:<VHALF>
4182 (match_operand:VQ_HSI 1 "register_operand" "w")
4183 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4184 (sign_extend:<VWIDE>
4185 (vec_select:<VHALF>
4186 (match_operand:VQ_HSI 2 "register_operand" "w")
4187 (match_dup 3)))
4188 )
4189 (const_int 1)))]
4190 "TARGET_SIMD"
4191 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4192 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4193 )
4194
4195 (define_expand "aarch64_sqdmull2<mode>"
4196 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4197 (match_operand:VQ_HSI 1 "register_operand" "w")
4198 (match_operand:VQ_HSI 2 "register_operand" "w")]
4199 "TARGET_SIMD"
4200 {
4201 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4202 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4203 operands[2], p));
4204 DONE;
4205 })
4206
4207 ;; vqdmull2_lane
4208
4209 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4210 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4211 (ss_ashift:<VWIDE>
4212 (mult:<VWIDE>
4213 (sign_extend:<VWIDE>
4214 (vec_select:<VHALF>
4215 (match_operand:VQ_HSI 1 "register_operand" "w")
4216 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4217 (sign_extend:<VWIDE>
4218 (vec_duplicate:<VHALF>
4219 (vec_select:<VEL>
4220 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4221 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4222 ))
4223 (const_int 1)))]
4224 "TARGET_SIMD"
4225 {
4226 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4227 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4228 }
4229 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4230 )
4231
4232 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234 (ss_ashift:<VWIDE>
4235 (mult:<VWIDE>
4236 (sign_extend:<VWIDE>
4237 (vec_select:<VHALF>
4238 (match_operand:VQ_HSI 1 "register_operand" "w")
4239 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4240 (sign_extend:<VWIDE>
4241 (vec_duplicate:<VHALF>
4242 (vec_select:<VEL>
4243 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4244 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4245 ))
4246 (const_int 1)))]
4247 "TARGET_SIMD"
4248 {
4249 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4250 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4251 }
4252 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4253 )
4254
4255 (define_expand "aarch64_sqdmull2_lane<mode>"
4256 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4257 (match_operand:VQ_HSI 1 "register_operand" "w")
4258 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4259 (match_operand:SI 3 "immediate_operand" "i")]
4260 "TARGET_SIMD"
4261 {
4262 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4263 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4264 operands[2], operands[3],
4265 p));
4266 DONE;
4267 })
4268
4269 (define_expand "aarch64_sqdmull2_laneq<mode>"
4270 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4271 (match_operand:VQ_HSI 1 "register_operand" "w")
4272 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4273 (match_operand:SI 3 "immediate_operand" "i")]
4274 "TARGET_SIMD"
4275 {
4276 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4277 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4278 operands[2], operands[3],
4279 p));
4280 DONE;
4281 })
4282
4283 ;; vqdmull2_n
4284
4285 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4286 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4287 (ss_ashift:<VWIDE>
4288 (mult:<VWIDE>
4289 (sign_extend:<VWIDE>
4290 (vec_select:<VHALF>
4291 (match_operand:VQ_HSI 1 "register_operand" "w")
4292 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4293 (sign_extend:<VWIDE>
4294 (vec_duplicate:<VHALF>
4295 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4296 )
4297 (const_int 1)))]
4298 "TARGET_SIMD"
4299 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4300 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4301 )
4302
4303 (define_expand "aarch64_sqdmull2_n<mode>"
4304 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4305 (match_operand:VQ_HSI 1 "register_operand" "w")
4306 (match_operand:<VEL> 2 "register_operand" "w")]
4307 "TARGET_SIMD"
4308 {
4309 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4310 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4311 operands[2], p));
4312 DONE;
4313 })
4314
4315 ;; vshl
4316
4317 (define_insn "aarch64_<sur>shl<mode>"
4318 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4319 (unspec:VSDQ_I_DI
4320 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4321 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4322 VSHL))]
4323 "TARGET_SIMD"
4324 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4325 [(set_attr "type" "neon_shift_reg<q>")]
4326 )
4327
4328
4329 ;; vqshl
4330
4331 (define_insn "aarch64_<sur>q<r>shl<mode>"
4332 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4333 (unspec:VSDQ_I
4334 [(match_operand:VSDQ_I 1 "register_operand" "w")
4335 (match_operand:VSDQ_I 2 "register_operand" "w")]
4336 VQSHL))]
4337 "TARGET_SIMD"
4338 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4339 [(set_attr "type" "neon_sat_shift_reg<q>")]
4340 )
4341
4342 ;; vshll_n
4343
4344 (define_insn "aarch64_<sur>shll_n<mode>"
4345 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4347 (match_operand:SI 2
4348 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4349 VSHLL))]
4350 "TARGET_SIMD"
4351 {
4352 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4353 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4354 else
4355 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4356 }
4357 [(set_attr "type" "neon_shift_imm_long")]
4358 )
4359
4360 ;; vshll_high_n
4361
4362 (define_insn "aarch64_<sur>shll2_n<mode>"
4363 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4365 (match_operand:SI 2 "immediate_operand" "i")]
4366 VSHLL))]
4367 "TARGET_SIMD"
4368 {
4369 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4370 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4371 else
4372 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4373 }
4374 [(set_attr "type" "neon_shift_imm_long")]
4375 )
4376
4377 ;; vrshr_n
4378
4379 (define_insn "aarch64_<sur>shr_n<mode>"
4380 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4381 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4382 (match_operand:SI 2
4383 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4384 VRSHR_N))]
4385 "TARGET_SIMD"
4386 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4387 [(set_attr "type" "neon_sat_shift_imm<q>")]
4388 )
4389
4390 ;; v(r)sra_n
4391
4392 (define_insn "aarch64_<sur>sra_n<mode>"
4393 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4394 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4395 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4396 (match_operand:SI 3
4397 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4398 VSRA))]
4399 "TARGET_SIMD"
4400 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4401 [(set_attr "type" "neon_shift_acc<q>")]
4402 )
4403
4404 ;; vs<lr>i_n
4405
4406 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4407 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4408 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4409 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4410 (match_operand:SI 3
4411 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4412 VSLRI))]
4413 "TARGET_SIMD"
4414 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4415 [(set_attr "type" "neon_shift_imm<q>")]
4416 )
4417
4418 ;; vqshl(u)
4419
4420 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4421 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4422 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4423 (match_operand:SI 2
4424 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4425 VQSHL_N))]
4426 "TARGET_SIMD"
4427 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4428 [(set_attr "type" "neon_sat_shift_imm<q>")]
4429 )
4430
4431
4432 ;; vq(r)shr(u)n_n
4433
4434 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4435 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4436 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4437 (match_operand:SI 2
4438 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4439 VQSHRN_N))]
4440 "TARGET_SIMD"
4441 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4442 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4443 )
4444
4445
4446 ;; cm(eq|ge|gt|lt|le)
4447 ;; Note, we have constraints for Dz and Z as different expanders
4448 ;; have different ideas of what should be passed to this pattern.
4449
4450 (define_insn "aarch64_cm<optab><mode>"
4451 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4452 (neg:<V_INT_EQUIV>
4453 (COMPARISONS:<V_INT_EQUIV>
4454 (match_operand:VDQ_I 1 "register_operand" "w,w")
4455 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4456 )))]
4457 "TARGET_SIMD"
4458 "@
4459 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4460 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4461 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4462 )
4463
4464 (define_insn_and_split "aarch64_cm<optab>di"
4465 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4466 (neg:DI
4467 (COMPARISONS:DI
4468 (match_operand:DI 1 "register_operand" "w,w,r")
4469 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4470 )))
4471 (clobber (reg:CC CC_REGNUM))]
4472 "TARGET_SIMD"
4473 "#"
4474 "&& reload_completed"
4475 [(set (match_operand:DI 0 "register_operand")
4476 (neg:DI
4477 (COMPARISONS:DI
4478 (match_operand:DI 1 "register_operand")
4479 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4480 )))]
4481 {
4482 /* If we are in the general purpose register file,
4483 we split to a sequence of comparison and store. */
4484 if (GP_REGNUM_P (REGNO (operands[0]))
4485 && GP_REGNUM_P (REGNO (operands[1])))
4486 {
4487 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4488 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4489 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4490 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4491 DONE;
4492 }
4493 /* Otherwise, we expand to a similar pattern which does not
4494 clobber CC_REGNUM. */
4495 }
4496 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4497 )
4498
4499 (define_insn "*aarch64_cm<optab>di"
4500 [(set (match_operand:DI 0 "register_operand" "=w,w")
4501 (neg:DI
4502 (COMPARISONS:DI
4503 (match_operand:DI 1 "register_operand" "w,w")
4504 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4505 )))]
4506 "TARGET_SIMD && reload_completed"
4507 "@
4508 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4509 cm<optab>\t%d0, %d1, #0"
4510 [(set_attr "type" "neon_compare, neon_compare_zero")]
4511 )
4512
4513 ;; cm(hs|hi)
4514
4515 (define_insn "aarch64_cm<optab><mode>"
4516 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4517 (neg:<V_INT_EQUIV>
4518 (UCOMPARISONS:<V_INT_EQUIV>
4519 (match_operand:VDQ_I 1 "register_operand" "w")
4520 (match_operand:VDQ_I 2 "register_operand" "w")
4521 )))]
4522 "TARGET_SIMD"
4523 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4524 [(set_attr "type" "neon_compare<q>")]
4525 )
4526
4527 (define_insn_and_split "aarch64_cm<optab>di"
4528 [(set (match_operand:DI 0 "register_operand" "=w,r")
4529 (neg:DI
4530 (UCOMPARISONS:DI
4531 (match_operand:DI 1 "register_operand" "w,r")
4532 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4533 )))
4534 (clobber (reg:CC CC_REGNUM))]
4535 "TARGET_SIMD"
4536 "#"
4537 "&& reload_completed"
4538 [(set (match_operand:DI 0 "register_operand")
4539 (neg:DI
4540 (UCOMPARISONS:DI
4541 (match_operand:DI 1 "register_operand")
4542 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4543 )))]
4544 {
4545 /* If we are in the general purpose register file,
4546 we split to a sequence of comparison and store. */
4547 if (GP_REGNUM_P (REGNO (operands[0]))
4548 && GP_REGNUM_P (REGNO (operands[1])))
4549 {
4550 machine_mode mode = CCmode;
4551 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4552 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4553 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4554 DONE;
4555 }
4556 /* Otherwise, we expand to a similar pattern which does not
4557 clobber CC_REGNUM. */
4558 }
4559 [(set_attr "type" "neon_compare,multiple")]
4560 )
4561
4562 (define_insn "*aarch64_cm<optab>di"
4563 [(set (match_operand:DI 0 "register_operand" "=w")
4564 (neg:DI
4565 (UCOMPARISONS:DI
4566 (match_operand:DI 1 "register_operand" "w")
4567 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4568 )))]
4569 "TARGET_SIMD && reload_completed"
4570 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4571 [(set_attr "type" "neon_compare")]
4572 )
4573
4574 ;; cmtst
4575
4576 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4577 ;; we don't have any insns using ne, and aarch64_vcond outputs
4578 ;; not (neg (eq (and x y) 0))
4579 ;; which is rewritten by simplify_rtx as
4580 ;; plus (eq (and x y) 0) -1.
4581
4582 (define_insn "aarch64_cmtst<mode>"
4583 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4584 (plus:<V_INT_EQUIV>
4585 (eq:<V_INT_EQUIV>
4586 (and:VDQ_I
4587 (match_operand:VDQ_I 1 "register_operand" "w")
4588 (match_operand:VDQ_I 2 "register_operand" "w"))
4589 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4590 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4591 ]
4592 "TARGET_SIMD"
4593 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4594 [(set_attr "type" "neon_tst<q>")]
4595 )
4596
4597 (define_insn_and_split "aarch64_cmtstdi"
4598 [(set (match_operand:DI 0 "register_operand" "=w,r")
4599 (neg:DI
4600 (ne:DI
4601 (and:DI
4602 (match_operand:DI 1 "register_operand" "w,r")
4603 (match_operand:DI 2 "register_operand" "w,r"))
4604 (const_int 0))))
4605 (clobber (reg:CC CC_REGNUM))]
4606 "TARGET_SIMD"
4607 "#"
4608 "&& reload_completed"
4609 [(set (match_operand:DI 0 "register_operand")
4610 (neg:DI
4611 (ne:DI
4612 (and:DI
4613 (match_operand:DI 1 "register_operand")
4614 (match_operand:DI 2 "register_operand"))
4615 (const_int 0))))]
4616 {
4617 /* If we are in the general purpose register file,
4618 we split to a sequence of comparison and store. */
4619 if (GP_REGNUM_P (REGNO (operands[0]))
4620 && GP_REGNUM_P (REGNO (operands[1])))
4621 {
4622 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4623 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4624 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4625 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4626 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4627 DONE;
4628 }
4629 /* Otherwise, we expand to a similar pattern which does not
4630 clobber CC_REGNUM. */
4631 }
4632 [(set_attr "type" "neon_tst,multiple")]
4633 )
4634
4635 (define_insn "*aarch64_cmtstdi"
4636 [(set (match_operand:DI 0 "register_operand" "=w")
4637 (neg:DI
4638 (ne:DI
4639 (and:DI
4640 (match_operand:DI 1 "register_operand" "w")
4641 (match_operand:DI 2 "register_operand" "w"))
4642 (const_int 0))))]
4643 "TARGET_SIMD"
4644 "cmtst\t%d0, %d1, %d2"
4645 [(set_attr "type" "neon_tst")]
4646 )
4647
4648 ;; fcm(eq|ge|gt|le|lt)
4649
4650 (define_insn "aarch64_cm<optab><mode>"
4651 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4652 (neg:<V_INT_EQUIV>
4653 (COMPARISONS:<V_INT_EQUIV>
4654 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4655 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4656 )))]
4657 "TARGET_SIMD"
4658 "@
4659 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4660 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4661 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4662 )
4663
4664 ;; fac(ge|gt)
4665 ;; Note we can also handle what would be fac(le|lt) by
4666 ;; generating fac(ge|gt).
4667
4668 (define_insn "aarch64_fac<optab><mode>"
4669 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4670 (neg:<V_INT_EQUIV>
4671 (FAC_COMPARISONS:<V_INT_EQUIV>
4672 (abs:VHSDF_HSDF
4673 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4674 (abs:VHSDF_HSDF
4675 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4676 )))]
4677 "TARGET_SIMD"
4678 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4679 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4680 )
4681
4682 ;; addp
4683
4684 (define_insn "aarch64_addp<mode>"
4685 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4686 (unspec:VD_BHSI
4687 [(match_operand:VD_BHSI 1 "register_operand" "w")
4688 (match_operand:VD_BHSI 2 "register_operand" "w")]
4689 UNSPEC_ADDP))]
4690 "TARGET_SIMD"
4691 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4692 [(set_attr "type" "neon_reduc_add<q>")]
4693 )
4694
4695 (define_insn "aarch64_addpdi"
4696 [(set (match_operand:DI 0 "register_operand" "=w")
4697 (unspec:DI
4698 [(match_operand:V2DI 1 "register_operand" "w")]
4699 UNSPEC_ADDP))]
4700 "TARGET_SIMD"
4701 "addp\t%d0, %1.2d"
4702 [(set_attr "type" "neon_reduc_add")]
4703 )
4704
4705 ;; sqrt
4706
4707 (define_expand "sqrt<mode>2"
4708 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4709 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4710 "TARGET_SIMD"
4711 {
4712 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4713 DONE;
4714 })
4715
4716 (define_insn "*sqrt<mode>2"
4717 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4718 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4719 "TARGET_SIMD"
4720 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4721 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4722 )
4723
4724 ;; Patterns for vector struct loads and stores.
4725
4726 (define_insn "aarch64_simd_ld2<mode>"
4727 [(set (match_operand:OI 0 "register_operand" "=w")
4728 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4729 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4730 UNSPEC_LD2))]
4731 "TARGET_SIMD"
4732 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4733 [(set_attr "type" "neon_load2_2reg<q>")]
4734 )
4735
4736 (define_insn "aarch64_simd_ld2r<mode>"
4737 [(set (match_operand:OI 0 "register_operand" "=w")
4738 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4739 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4740 UNSPEC_LD2_DUP))]
4741 "TARGET_SIMD"
4742 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4743 [(set_attr "type" "neon_load2_all_lanes<q>")]
4744 )
4745
4746 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4747 [(set (match_operand:OI 0 "register_operand" "=w")
4748 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4749 (match_operand:OI 2 "register_operand" "0")
4750 (match_operand:SI 3 "immediate_operand" "i")
4751 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4752 UNSPEC_LD2_LANE))]
4753 "TARGET_SIMD"
4754 {
4755 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4756 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4757 }
4758 [(set_attr "type" "neon_load2_one_lane")]
4759 )
4760
4761 (define_expand "vec_load_lanesoi<mode>"
4762 [(set (match_operand:OI 0 "register_operand" "=w")
4763 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4764 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4765 UNSPEC_LD2))]
4766 "TARGET_SIMD"
4767 {
4768 if (BYTES_BIG_ENDIAN)
4769 {
4770 rtx tmp = gen_reg_rtx (OImode);
4771 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4772 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4773 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4774 }
4775 else
4776 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4777 DONE;
4778 })
4779
4780 (define_insn "aarch64_simd_st2<mode>"
4781 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4782 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4783 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4784 UNSPEC_ST2))]
4785 "TARGET_SIMD"
4786 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4787 [(set_attr "type" "neon_store2_2reg<q>")]
4788 )
4789
4790 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4791 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4792 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4793 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4794 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4795 (match_operand:SI 2 "immediate_operand" "i")]
4796 UNSPEC_ST2_LANE))]
4797 "TARGET_SIMD"
4798 {
4799 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4800 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4801 }
4802 [(set_attr "type" "neon_store2_one_lane<q>")]
4803 )
4804
4805 (define_expand "vec_store_lanesoi<mode>"
4806 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4807 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4808 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4809 UNSPEC_ST2))]
4810 "TARGET_SIMD"
4811 {
4812 if (BYTES_BIG_ENDIAN)
4813 {
4814 rtx tmp = gen_reg_rtx (OImode);
4815 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4816 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4817 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4818 }
4819 else
4820 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4821 DONE;
4822 })
4823
4824 (define_insn "aarch64_simd_ld3<mode>"
4825 [(set (match_operand:CI 0 "register_operand" "=w")
4826 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4827 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4828 UNSPEC_LD3))]
4829 "TARGET_SIMD"
4830 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4831 [(set_attr "type" "neon_load3_3reg<q>")]
4832 )
4833
4834 (define_insn "aarch64_simd_ld3r<mode>"
4835 [(set (match_operand:CI 0 "register_operand" "=w")
4836 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4837 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4838 UNSPEC_LD3_DUP))]
4839 "TARGET_SIMD"
4840 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4841 [(set_attr "type" "neon_load3_all_lanes<q>")]
4842 )
4843
4844 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4845 [(set (match_operand:CI 0 "register_operand" "=w")
4846 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4847 (match_operand:CI 2 "register_operand" "0")
4848 (match_operand:SI 3 "immediate_operand" "i")
4849 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4850 UNSPEC_LD3_LANE))]
4851 "TARGET_SIMD"
4852 {
4853 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4854 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4855 }
4856 [(set_attr "type" "neon_load3_one_lane")]
4857 )
4858
4859 (define_expand "vec_load_lanesci<mode>"
4860 [(set (match_operand:CI 0 "register_operand" "=w")
4861 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4862 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4863 UNSPEC_LD3))]
4864 "TARGET_SIMD"
4865 {
4866 if (BYTES_BIG_ENDIAN)
4867 {
4868 rtx tmp = gen_reg_rtx (CImode);
4869 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4870 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4871 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4872 }
4873 else
4874 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4875 DONE;
4876 })
4877
4878 (define_insn "aarch64_simd_st3<mode>"
4879 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4880 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4881 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4882 UNSPEC_ST3))]
4883 "TARGET_SIMD"
4884 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4885 [(set_attr "type" "neon_store3_3reg<q>")]
4886 )
4887
4888 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4889 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4890 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4891 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4892 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4893 (match_operand:SI 2 "immediate_operand" "i")]
4894 UNSPEC_ST3_LANE))]
4895 "TARGET_SIMD"
4896 {
4897 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4898 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4899 }
4900 [(set_attr "type" "neon_store3_one_lane<q>")]
4901 )
4902
4903 (define_expand "vec_store_lanesci<mode>"
4904 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4905 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4906 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4907 UNSPEC_ST3))]
4908 "TARGET_SIMD"
4909 {
4910 if (BYTES_BIG_ENDIAN)
4911 {
4912 rtx tmp = gen_reg_rtx (CImode);
4913 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4914 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4915 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4916 }
4917 else
4918 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4919 DONE;
4920 })
4921
4922 (define_insn "aarch64_simd_ld4<mode>"
4923 [(set (match_operand:XI 0 "register_operand" "=w")
4924 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4925 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926 UNSPEC_LD4))]
4927 "TARGET_SIMD"
4928 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4929 [(set_attr "type" "neon_load4_4reg<q>")]
4930 )
4931
4932 (define_insn "aarch64_simd_ld4r<mode>"
4933 [(set (match_operand:XI 0 "register_operand" "=w")
4934 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4935 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4936 UNSPEC_LD4_DUP))]
4937 "TARGET_SIMD"
4938 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4939 [(set_attr "type" "neon_load4_all_lanes<q>")]
4940 )
4941
4942 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4943 [(set (match_operand:XI 0 "register_operand" "=w")
4944 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945 (match_operand:XI 2 "register_operand" "0")
4946 (match_operand:SI 3 "immediate_operand" "i")
4947 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4948 UNSPEC_LD4_LANE))]
4949 "TARGET_SIMD"
4950 {
4951 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4952 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4953 }
4954 [(set_attr "type" "neon_load4_one_lane")]
4955 )
4956
4957 (define_expand "vec_load_lanesxi<mode>"
4958 [(set (match_operand:XI 0 "register_operand" "=w")
4959 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4960 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4961 UNSPEC_LD4))]
4962 "TARGET_SIMD"
4963 {
4964 if (BYTES_BIG_ENDIAN)
4965 {
4966 rtx tmp = gen_reg_rtx (XImode);
4967 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4968 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4969 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4970 }
4971 else
4972 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4973 DONE;
4974 })
4975
4976 (define_insn "aarch64_simd_st4<mode>"
4977 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4978 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4979 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980 UNSPEC_ST4))]
4981 "TARGET_SIMD"
4982 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4983 [(set_attr "type" "neon_store4_4reg<q>")]
4984 )
4985
4986 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4987 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4988 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4989 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4990 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4991 (match_operand:SI 2 "immediate_operand" "i")]
4992 UNSPEC_ST4_LANE))]
4993 "TARGET_SIMD"
4994 {
4995 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4996 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4997 }
4998 [(set_attr "type" "neon_store4_one_lane<q>")]
4999 )
5000
5001 (define_expand "vec_store_lanesxi<mode>"
5002 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5003 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5004 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5005 UNSPEC_ST4))]
5006 "TARGET_SIMD"
5007 {
5008 if (BYTES_BIG_ENDIAN)
5009 {
5010 rtx tmp = gen_reg_rtx (XImode);
5011 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5012 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5013 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5014 }
5015 else
5016 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5017 DONE;
5018 })
5019
5020 (define_insn_and_split "aarch64_rev_reglist<mode>"
5021 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5022 (unspec:VSTRUCT
5023 [(match_operand:VSTRUCT 1 "register_operand" "w")
5024 (match_operand:V16QI 2 "register_operand" "w")]
5025 UNSPEC_REV_REGLIST))]
5026 "TARGET_SIMD"
5027 "#"
5028 "&& reload_completed"
5029 [(const_int 0)]
5030 {
5031 int i;
5032 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5033 for (i = 0; i < nregs; i++)
5034 {
5035 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5036 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5037 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5038 }
5039 DONE;
5040 }
5041 [(set_attr "type" "neon_tbl1_q")
5042 (set_attr "length" "<insn_count>")]
5043 )
5044
5045 ;; Reload patterns for AdvSIMD register list operands.
5046
5047 (define_expand "mov<mode>"
5048 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5049 (match_operand:VSTRUCT 1 "general_operand" ""))]
5050 "TARGET_SIMD"
5051 {
5052 if (can_create_pseudo_p ())
5053 {
5054 if (GET_CODE (operands[0]) != REG)
5055 operands[1] = force_reg (<MODE>mode, operands[1]);
5056 }
5057 })
5058
5059
5060 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5061 [(match_operand:CI 0 "register_operand" "=w")
5062 (match_operand:DI 1 "register_operand" "r")
5063 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5064 "TARGET_SIMD"
5065 {
5066 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5067 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5068 DONE;
5069 })
5070
5071 (define_insn "aarch64_ld1_x3_<mode>"
5072 [(set (match_operand:CI 0 "register_operand" "=w")
5073 (unspec:CI
5074 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5075 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5076 "TARGET_SIMD"
5077 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5078 [(set_attr "type" "neon_load1_3reg<q>")]
5079 )
5080
5081 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5082 [(match_operand:DI 0 "register_operand" "")
5083 (match_operand:OI 1 "register_operand" "")
5084 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5085 "TARGET_SIMD"
5086 {
5087 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5088 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5089 DONE;
5090 })
5091
5092 (define_insn "aarch64_st1_x2_<mode>"
5093 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5094 (unspec:OI
5095 [(match_operand:OI 1 "register_operand" "w")
5096 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5097 "TARGET_SIMD"
5098 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5099 [(set_attr "type" "neon_store1_2reg<q>")]
5100 )
5101
5102 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5103 [(match_operand:DI 0 "register_operand" "")
5104 (match_operand:CI 1 "register_operand" "")
5105 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5106 "TARGET_SIMD"
5107 {
5108 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5109 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5110 DONE;
5111 })
5112
5113 (define_insn "aarch64_st1_x3_<mode>"
5114 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5115 (unspec:CI
5116 [(match_operand:CI 1 "register_operand" "w")
5117 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5118 "TARGET_SIMD"
5119 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5120 [(set_attr "type" "neon_store1_3reg<q>")]
5121 )
5122
5123 (define_insn "*aarch64_mov<mode>"
5124 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5125 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5126 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5127 && (register_operand (operands[0], <MODE>mode)
5128 || register_operand (operands[1], <MODE>mode))"
5129 "@
5130 #
5131 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5132 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5133 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5134 neon_load<nregs>_<nregs>reg_q")
5135 (set_attr "length" "<insn_count>,4,4")]
5136 )
5137
5138 (define_insn "aarch64_be_ld1<mode>"
5139 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5140 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5141 "aarch64_simd_struct_operand" "Utv")]
5142 UNSPEC_LD1))]
5143 "TARGET_SIMD"
5144 "ld1\\t{%0<Vmtype>}, %1"
5145 [(set_attr "type" "neon_load1_1reg<q>")]
5146 )
5147
5148 (define_insn "aarch64_be_st1<mode>"
5149 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5150 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5151 UNSPEC_ST1))]
5152 "TARGET_SIMD"
5153 "st1\\t{%1<Vmtype>}, %0"
5154 [(set_attr "type" "neon_store1_1reg<q>")]
5155 )
5156
5157 (define_insn "*aarch64_be_movoi"
5158 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5159 (match_operand:OI 1 "general_operand" " w,w,m"))]
5160 "TARGET_SIMD && BYTES_BIG_ENDIAN
5161 && (register_operand (operands[0], OImode)
5162 || register_operand (operands[1], OImode))"
5163 "@
5164 #
5165 stp\\t%q1, %R1, %0
5166 ldp\\t%q0, %R0, %1"
5167 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5168 (set_attr "length" "8,4,4")]
5169 )
5170
5171 (define_insn "*aarch64_be_movci"
5172 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5173 (match_operand:CI 1 "general_operand" " w,w,o"))]
5174 "TARGET_SIMD && BYTES_BIG_ENDIAN
5175 && (register_operand (operands[0], CImode)
5176 || register_operand (operands[1], CImode))"
5177 "#"
5178 [(set_attr "type" "multiple")
5179 (set_attr "length" "12,4,4")]
5180 )
5181
5182 (define_insn "*aarch64_be_movxi"
5183 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5184 (match_operand:XI 1 "general_operand" " w,w,o"))]
5185 "TARGET_SIMD && BYTES_BIG_ENDIAN
5186 && (register_operand (operands[0], XImode)
5187 || register_operand (operands[1], XImode))"
5188 "#"
5189 [(set_attr "type" "multiple")
5190 (set_attr "length" "16,4,4")]
5191 )
5192
5193 (define_split
5194 [(set (match_operand:OI 0 "register_operand")
5195 (match_operand:OI 1 "register_operand"))]
5196 "TARGET_SIMD && reload_completed"
5197 [(const_int 0)]
5198 {
5199 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5200 DONE;
5201 })
5202
5203 (define_split
5204 [(set (match_operand:CI 0 "nonimmediate_operand")
5205 (match_operand:CI 1 "general_operand"))]
5206 "TARGET_SIMD && reload_completed"
5207 [(const_int 0)]
5208 {
5209 if (register_operand (operands[0], CImode)
5210 && register_operand (operands[1], CImode))
5211 {
5212 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5213 DONE;
5214 }
5215 else if (BYTES_BIG_ENDIAN)
5216 {
5217 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5218 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5219 emit_move_insn (gen_lowpart (V16QImode,
5220 simplify_gen_subreg (TImode, operands[0],
5221 CImode, 32)),
5222 gen_lowpart (V16QImode,
5223 simplify_gen_subreg (TImode, operands[1],
5224 CImode, 32)));
5225 DONE;
5226 }
5227 else
5228 FAIL;
5229 })
5230
5231 (define_split
5232 [(set (match_operand:XI 0 "nonimmediate_operand")
5233 (match_operand:XI 1 "general_operand"))]
5234 "TARGET_SIMD && reload_completed"
5235 [(const_int 0)]
5236 {
5237 if (register_operand (operands[0], XImode)
5238 && register_operand (operands[1], XImode))
5239 {
5240 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5241 DONE;
5242 }
5243 else if (BYTES_BIG_ENDIAN)
5244 {
5245 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5246 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5247 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5248 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5249 DONE;
5250 }
5251 else
5252 FAIL;
5253 })
5254
5255 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5256 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5257 (match_operand:DI 1 "register_operand" "w")
5258 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5259 "TARGET_SIMD"
5260 {
5261 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5262 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5263 * <VSTRUCT:nregs>);
5264
5265 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5266 mem));
5267 DONE;
5268 })
5269
5270 (define_insn "aarch64_ld2<mode>_dreg"
5271 [(set (match_operand:OI 0 "register_operand" "=w")
5272 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5273 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5274 UNSPEC_LD2_DREG))]
5275 "TARGET_SIMD"
5276 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5277 [(set_attr "type" "neon_load2_2reg<q>")]
5278 )
5279
5280 (define_insn "aarch64_ld2<mode>_dreg"
5281 [(set (match_operand:OI 0 "register_operand" "=w")
5282 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5283 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5284 UNSPEC_LD2_DREG))]
5285 "TARGET_SIMD"
5286 "ld1\\t{%S0.1d - %T0.1d}, %1"
5287 [(set_attr "type" "neon_load1_2reg<q>")]
5288 )
5289
5290 (define_insn "aarch64_ld3<mode>_dreg"
5291 [(set (match_operand:CI 0 "register_operand" "=w")
5292 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5293 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5294 UNSPEC_LD3_DREG))]
5295 "TARGET_SIMD"
5296 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5297 [(set_attr "type" "neon_load3_3reg<q>")]
5298 )
5299
5300 (define_insn "aarch64_ld3<mode>_dreg"
5301 [(set (match_operand:CI 0 "register_operand" "=w")
5302 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5303 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5304 UNSPEC_LD3_DREG))]
5305 "TARGET_SIMD"
5306 "ld1\\t{%S0.1d - %U0.1d}, %1"
5307 [(set_attr "type" "neon_load1_3reg<q>")]
5308 )
5309
5310 (define_insn "aarch64_ld4<mode>_dreg"
5311 [(set (match_operand:XI 0 "register_operand" "=w")
5312 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5313 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5314 UNSPEC_LD4_DREG))]
5315 "TARGET_SIMD"
5316 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5317 [(set_attr "type" "neon_load4_4reg<q>")]
5318 )
5319
5320 (define_insn "aarch64_ld4<mode>_dreg"
5321 [(set (match_operand:XI 0 "register_operand" "=w")
5322 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5323 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324 UNSPEC_LD4_DREG))]
5325 "TARGET_SIMD"
5326 "ld1\\t{%S0.1d - %V0.1d}, %1"
5327 [(set_attr "type" "neon_load1_4reg<q>")]
5328 )
5329
5330 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5331 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5332 (match_operand:DI 1 "register_operand" "r")
5333 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5334 "TARGET_SIMD"
5335 {
5336 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5337 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5338
5339 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5340 DONE;
5341 })
5342
5343 (define_expand "aarch64_ld1<VALL_F16:mode>"
5344 [(match_operand:VALL_F16 0 "register_operand")
5345 (match_operand:DI 1 "register_operand")]
5346 "TARGET_SIMD"
5347 {
5348 machine_mode mode = <VALL_F16:MODE>mode;
5349 rtx mem = gen_rtx_MEM (mode, operands[1]);
5350
5351 if (BYTES_BIG_ENDIAN)
5352 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5353 else
5354 emit_move_insn (operands[0], mem);
5355 DONE;
5356 })
5357
5358 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5359 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5360 (match_operand:DI 1 "register_operand" "r")
5361 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5362 "TARGET_SIMD"
5363 {
5364 machine_mode mode = <VSTRUCT:MODE>mode;
5365 rtx mem = gen_rtx_MEM (mode, operands[1]);
5366
5367 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5368 DONE;
5369 })
5370
5371 (define_expand "aarch64_ld1x2<VQ:mode>"
5372 [(match_operand:OI 0 "register_operand" "=w")
5373 (match_operand:DI 1 "register_operand" "r")
5374 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5375 "TARGET_SIMD"
5376 {
5377 machine_mode mode = OImode;
5378 rtx mem = gen_rtx_MEM (mode, operands[1]);
5379
5380 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5381 DONE;
5382 })
5383
5384 (define_expand "aarch64_ld1x2<VDC:mode>"
5385 [(match_operand:OI 0 "register_operand" "=w")
5386 (match_operand:DI 1 "register_operand" "r")
5387 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5388 "TARGET_SIMD"
5389 {
5390 machine_mode mode = OImode;
5391 rtx mem = gen_rtx_MEM (mode, operands[1]);
5392
5393 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5394 DONE;
5395 })
5396
5397
5398 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5399 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5400 (match_operand:DI 1 "register_operand" "w")
5401 (match_operand:VSTRUCT 2 "register_operand" "0")
5402 (match_operand:SI 3 "immediate_operand" "i")
5403 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5404 "TARGET_SIMD"
5405 {
5406 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5407 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5408 * <VSTRUCT:nregs>);
5409
5410 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5411 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5412 operands[0], mem, operands[2], operands[3]));
5413 DONE;
5414 })
5415
5416 ;; Expanders for builtins to extract vector registers from large
5417 ;; opaque integer modes.
5418
5419 ;; D-register list.
5420
5421 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5422 [(match_operand:VDC 0 "register_operand" "=w")
5423 (match_operand:VSTRUCT 1 "register_operand" "w")
5424 (match_operand:SI 2 "immediate_operand" "i")]
5425 "TARGET_SIMD"
5426 {
5427 int part = INTVAL (operands[2]);
5428 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5429 int offset = part * 16;
5430
5431 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5432 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5433 DONE;
5434 })
5435
5436 ;; Q-register list.
5437
5438 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5439 [(match_operand:VQ 0 "register_operand" "=w")
5440 (match_operand:VSTRUCT 1 "register_operand" "w")
5441 (match_operand:SI 2 "immediate_operand" "i")]
5442 "TARGET_SIMD"
5443 {
5444 int part = INTVAL (operands[2]);
5445 int offset = part * 16;
5446
5447 emit_move_insn (operands[0],
5448 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5449 DONE;
5450 })
5451
5452 ;; Permuted-store expanders for neon intrinsics.
5453
5454 ;; Permute instructions
5455
5456 ;; vec_perm support
5457
5458 (define_expand "vec_perm<mode>"
5459 [(match_operand:VB 0 "register_operand")
5460 (match_operand:VB 1 "register_operand")
5461 (match_operand:VB 2 "register_operand")
5462 (match_operand:VB 3 "register_operand")]
5463 "TARGET_SIMD"
5464 {
5465 aarch64_expand_vec_perm (operands[0], operands[1],
5466 operands[2], operands[3], <nunits>);
5467 DONE;
5468 })
5469
5470 (define_insn "aarch64_tbl1<mode>"
5471 [(set (match_operand:VB 0 "register_operand" "=w")
5472 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5473 (match_operand:VB 2 "register_operand" "w")]
5474 UNSPEC_TBL))]
5475 "TARGET_SIMD"
5476 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5477 [(set_attr "type" "neon_tbl1<q>")]
5478 )
5479
5480 ;; Two source registers.
5481
5482 (define_insn "aarch64_tbl2v16qi"
5483 [(set (match_operand:V16QI 0 "register_operand" "=w")
5484 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5485 (match_operand:V16QI 2 "register_operand" "w")]
5486 UNSPEC_TBL))]
5487 "TARGET_SIMD"
5488 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5489 [(set_attr "type" "neon_tbl2_q")]
5490 )
5491
5492 (define_insn "aarch64_tbl3<mode>"
5493 [(set (match_operand:VB 0 "register_operand" "=w")
5494 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5495 (match_operand:VB 2 "register_operand" "w")]
5496 UNSPEC_TBL))]
5497 "TARGET_SIMD"
5498 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5499 [(set_attr "type" "neon_tbl3")]
5500 )
5501
5502 (define_insn "aarch64_tbx4<mode>"
5503 [(set (match_operand:VB 0 "register_operand" "=w")
5504 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5505 (match_operand:OI 2 "register_operand" "w")
5506 (match_operand:VB 3 "register_operand" "w")]
5507 UNSPEC_TBX))]
5508 "TARGET_SIMD"
5509 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5510 [(set_attr "type" "neon_tbl4")]
5511 )
5512
5513 ;; Three source registers.
5514
5515 (define_insn "aarch64_qtbl3<mode>"
5516 [(set (match_operand:VB 0 "register_operand" "=w")
5517 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5518 (match_operand:VB 2 "register_operand" "w")]
5519 UNSPEC_TBL))]
5520 "TARGET_SIMD"
5521 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5522 [(set_attr "type" "neon_tbl3")]
5523 )
5524
5525 (define_insn "aarch64_qtbx3<mode>"
5526 [(set (match_operand:VB 0 "register_operand" "=w")
5527 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5528 (match_operand:CI 2 "register_operand" "w")
5529 (match_operand:VB 3 "register_operand" "w")]
5530 UNSPEC_TBX))]
5531 "TARGET_SIMD"
5532 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5533 [(set_attr "type" "neon_tbl3")]
5534 )
5535
5536 ;; Four source registers.
5537
5538 (define_insn "aarch64_qtbl4<mode>"
5539 [(set (match_operand:VB 0 "register_operand" "=w")
5540 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5541 (match_operand:VB 2 "register_operand" "w")]
5542 UNSPEC_TBL))]
5543 "TARGET_SIMD"
5544 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5545 [(set_attr "type" "neon_tbl4")]
5546 )
5547
5548 (define_insn "aarch64_qtbx4<mode>"
5549 [(set (match_operand:VB 0 "register_operand" "=w")
5550 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5551 (match_operand:XI 2 "register_operand" "w")
5552 (match_operand:VB 3 "register_operand" "w")]
5553 UNSPEC_TBX))]
5554 "TARGET_SIMD"
5555 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5556 [(set_attr "type" "neon_tbl4")]
5557 )
5558
5559 (define_insn_and_split "aarch64_combinev16qi"
5560 [(set (match_operand:OI 0 "register_operand" "=w")
5561 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5562 (match_operand:V16QI 2 "register_operand" "w")]
5563 UNSPEC_CONCAT))]
5564 "TARGET_SIMD"
5565 "#"
5566 "&& reload_completed"
5567 [(const_int 0)]
5568 {
5569 aarch64_split_combinev16qi (operands);
5570 DONE;
5571 }
5572 [(set_attr "type" "multiple")]
5573 )
5574
5575 ;; This instruction's pattern is generated directly by
5576 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5577 ;; need corresponding changes there.
5578 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5579 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5580 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5581 (match_operand:VALL_F16 2 "register_operand" "w")]
5582 PERMUTE))]
5583 "TARGET_SIMD"
5584 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5585 [(set_attr "type" "neon_permute<q>")]
5586 )
5587
5588 ;; This instruction's pattern is generated directly by
5589 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5590 ;; need corresponding changes there. Note that the immediate (third)
5591 ;; operand is a lane index not a byte index.
5592 (define_insn "aarch64_ext<mode>"
5593 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5594 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5595 (match_operand:VALL_F16 2 "register_operand" "w")
5596 (match_operand:SI 3 "immediate_operand" "i")]
5597 UNSPEC_EXT))]
5598 "TARGET_SIMD"
5599 {
5600 operands[3] = GEN_INT (INTVAL (operands[3])
5601 * GET_MODE_UNIT_SIZE (<MODE>mode));
5602 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5603 }
5604 [(set_attr "type" "neon_ext<q>")]
5605 )
5606
5607 ;; This instruction's pattern is generated directly by
5608 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5609 ;; need corresponding changes there.
5610 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5611 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5612 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5613 REVERSE))]
5614 "TARGET_SIMD"
5615 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5616 [(set_attr "type" "neon_rev<q>")]
5617 )
5618
5619 (define_insn "aarch64_st2<mode>_dreg"
5620 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5621 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5622 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5623 UNSPEC_ST2))]
5624 "TARGET_SIMD"
5625 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5626 [(set_attr "type" "neon_store2_2reg")]
5627 )
5628
5629 (define_insn "aarch64_st2<mode>_dreg"
5630 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5631 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5632 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5633 UNSPEC_ST2))]
5634 "TARGET_SIMD"
5635 "st1\\t{%S1.1d - %T1.1d}, %0"
5636 [(set_attr "type" "neon_store1_2reg")]
5637 )
5638
5639 (define_insn "aarch64_st3<mode>_dreg"
5640 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5641 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5642 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5643 UNSPEC_ST3))]
5644 "TARGET_SIMD"
5645 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5646 [(set_attr "type" "neon_store3_3reg")]
5647 )
5648
5649 (define_insn "aarch64_st3<mode>_dreg"
5650 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5651 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5652 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5653 UNSPEC_ST3))]
5654 "TARGET_SIMD"
5655 "st1\\t{%S1.1d - %U1.1d}, %0"
5656 [(set_attr "type" "neon_store1_3reg")]
5657 )
5658
5659 (define_insn "aarch64_st4<mode>_dreg"
5660 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5661 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5662 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5663 UNSPEC_ST4))]
5664 "TARGET_SIMD"
5665 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5666 [(set_attr "type" "neon_store4_4reg")]
5667 )
5668
5669 (define_insn "aarch64_st4<mode>_dreg"
5670 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5671 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5672 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5673 UNSPEC_ST4))]
5674 "TARGET_SIMD"
5675 "st1\\t{%S1.1d - %V1.1d}, %0"
5676 [(set_attr "type" "neon_store1_4reg")]
5677 )
5678
5679 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5680 [(match_operand:DI 0 "register_operand" "r")
5681 (match_operand:VSTRUCT 1 "register_operand" "w")
5682 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5683 "TARGET_SIMD"
5684 {
5685 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5686 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5687
5688 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5689 DONE;
5690 })
5691
5692 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5693 [(match_operand:DI 0 "register_operand" "r")
5694 (match_operand:VSTRUCT 1 "register_operand" "w")
5695 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5696 "TARGET_SIMD"
5697 {
5698 machine_mode mode = <VSTRUCT:MODE>mode;
5699 rtx mem = gen_rtx_MEM (mode, operands[0]);
5700
5701 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5702 DONE;
5703 })
5704
5705 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5706 [(match_operand:DI 0 "register_operand" "r")
5707 (match_operand:VSTRUCT 1 "register_operand" "w")
5708 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5709 (match_operand:SI 2 "immediate_operand")]
5710 "TARGET_SIMD"
5711 {
5712 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5713 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5714 * <VSTRUCT:nregs>);
5715
5716 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5717 mem, operands[1], operands[2]));
5718 DONE;
5719 })
5720
5721 (define_expand "aarch64_st1<VALL_F16:mode>"
5722 [(match_operand:DI 0 "register_operand")
5723 (match_operand:VALL_F16 1 "register_operand")]
5724 "TARGET_SIMD"
5725 {
5726 machine_mode mode = <VALL_F16:MODE>mode;
5727 rtx mem = gen_rtx_MEM (mode, operands[0]);
5728
5729 if (BYTES_BIG_ENDIAN)
5730 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5731 else
5732 emit_move_insn (mem, operands[1]);
5733 DONE;
5734 })
5735
5736 ;; Expander for builtins to insert vector registers into large
5737 ;; opaque integer modes.
5738
5739 ;; Q-register list. We don't need a D-reg inserter as we zero
5740 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5741
5742 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5743 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5744 (match_operand:VSTRUCT 1 "register_operand" "0")
5745 (match_operand:VQ 2 "register_operand" "w")
5746 (match_operand:SI 3 "immediate_operand" "i")]
5747 "TARGET_SIMD"
5748 {
5749 int part = INTVAL (operands[3]);
5750 int offset = part * 16;
5751
5752 emit_move_insn (operands[0], operands[1]);
5753 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5754 operands[2]);
5755 DONE;
5756 })
5757
5758 ;; Standard pattern name vec_init<mode><Vel>.
5759
5760 (define_expand "vec_init<mode><Vel>"
5761 [(match_operand:VALL_F16 0 "register_operand" "")
5762 (match_operand 1 "" "")]
5763 "TARGET_SIMD"
5764 {
5765 aarch64_expand_vector_init (operands[0], operands[1]);
5766 DONE;
5767 })
5768
5769 (define_insn "*aarch64_simd_ld1r<mode>"
5770 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5771 (vec_duplicate:VALL_F16
5772 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5773 "TARGET_SIMD"
5774 "ld1r\\t{%0.<Vtype>}, %1"
5775 [(set_attr "type" "neon_load1_all_lanes")]
5776 )
5777
5778 (define_insn "aarch64_simd_ld1<mode>_x2"
5779 [(set (match_operand:OI 0 "register_operand" "=w")
5780 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5781 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5782 UNSPEC_LD1))]
5783 "TARGET_SIMD"
5784 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5785 [(set_attr "type" "neon_load1_2reg<q>")]
5786 )
5787
5788 (define_insn "aarch64_simd_ld1<mode>_x2"
5789 [(set (match_operand:OI 0 "register_operand" "=w")
5790 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5791 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5792 UNSPEC_LD1))]
5793 "TARGET_SIMD"
5794 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5795 [(set_attr "type" "neon_load1_2reg<q>")]
5796 )
5797
5798
5799 (define_insn "aarch64_frecpe<mode>"
5800 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5801 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5802 UNSPEC_FRECPE))]
5803 "TARGET_SIMD"
5804 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5805 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5806 )
5807
5808 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5809 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5810 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5811 FRECP))]
5812 "TARGET_SIMD"
5813 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5814 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5815 )
5816
5817 (define_insn "aarch64_frecps<mode>"
5818 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5819 (unspec:VHSDF_HSDF
5820 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5821 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5822 UNSPEC_FRECPS))]
5823 "TARGET_SIMD"
5824 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5825 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5826 )
5827
5828 (define_insn "aarch64_urecpe<mode>"
5829 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5830 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5831 UNSPEC_URECPE))]
5832 "TARGET_SIMD"
5833 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5834 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5835
5836 ;; Standard pattern name vec_extract<mode><Vel>.
5837
5838 (define_expand "vec_extract<mode><Vel>"
5839 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5840 (match_operand:VALL_F16 1 "register_operand" "")
5841 (match_operand:SI 2 "immediate_operand" "")]
5842 "TARGET_SIMD"
5843 {
5844 emit_insn
5845 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5846 DONE;
5847 })
5848
5849 ;; aes
5850
5851 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5852 [(set (match_operand:V16QI 0 "register_operand" "=w")
5853 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5854 (match_operand:V16QI 2 "register_operand" "w")]
5855 CRYPTO_AES))]
5856 "TARGET_SIMD && TARGET_AES"
5857 "aes<aes_op>\\t%0.16b, %2.16b"
5858 [(set_attr "type" "crypto_aese")]
5859 )
5860
5861 ;; When AES/AESMC fusion is enabled we want the register allocation to
5862 ;; look like:
5863 ;; AESE Vn, _
5864 ;; AESMC Vn, Vn
5865 ;; So prefer to tie operand 1 to operand 0 when fusing.
5866
5867 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5868 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5869 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5870 CRYPTO_AESMC))]
5871 "TARGET_SIMD && TARGET_AES"
5872 "aes<aesmc_op>\\t%0.16b, %1.16b"
5873 [(set_attr "type" "crypto_aesmc")
5874 (set_attr_alternative "enabled"
5875 [(if_then_else (match_test
5876 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5877 (const_string "yes" )
5878 (const_string "no"))
5879 (const_string "yes")])]
5880 )
5881
5882 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5883 ;; and enforce the register dependency without scheduling or register
5884 ;; allocation messing up the order or introducing moves inbetween.
5885 ;; Mash the two together during combine.
5886
5887 (define_insn "*aarch64_crypto_aese_fused"
5888 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5889 (unspec:V16QI
5890 [(unspec:V16QI
5891 [(match_operand:V16QI 1 "register_operand" "0")
5892 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5893 ] UNSPEC_AESMC))]
5894 "TARGET_SIMD && TARGET_AES
5895 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5896 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5897 [(set_attr "type" "crypto_aese")
5898 (set_attr "length" "8")]
5899 )
5900
5901 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
5902 ;; and enforce the register dependency without scheduling or register
5903 ;; allocation messing up the order or introducing moves inbetween.
5904 ;; Mash the two together during combine.
5905
5906 (define_insn "*aarch64_crypto_aesd_fused"
5907 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5908 (unspec:V16QI
5909 [(unspec:V16QI
5910 [(match_operand:V16QI 1 "register_operand" "0")
5911 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
5912 ] UNSPEC_AESIMC))]
5913 "TARGET_SIMD && TARGET_AES
5914 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5915 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
5916 [(set_attr "type" "crypto_aese")
5917 (set_attr "length" "8")]
5918 )
5919
5920 ;; sha1
5921
5922 (define_insn "aarch64_crypto_sha1hsi"
5923 [(set (match_operand:SI 0 "register_operand" "=w")
5924 (unspec:SI [(match_operand:SI 1
5925 "register_operand" "w")]
5926 UNSPEC_SHA1H))]
5927 "TARGET_SIMD && TARGET_SHA2"
5928 "sha1h\\t%s0, %s1"
5929 [(set_attr "type" "crypto_sha1_fast")]
5930 )
5931
5932 (define_insn "aarch64_crypto_sha1hv4si"
5933 [(set (match_operand:SI 0 "register_operand" "=w")
5934 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5935 (parallel [(const_int 0)]))]
5936 UNSPEC_SHA1H))]
5937 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5938 "sha1h\\t%s0, %s1"
5939 [(set_attr "type" "crypto_sha1_fast")]
5940 )
5941
5942 (define_insn "aarch64_be_crypto_sha1hv4si"
5943 [(set (match_operand:SI 0 "register_operand" "=w")
5944 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5945 (parallel [(const_int 3)]))]
5946 UNSPEC_SHA1H))]
5947 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5948 "sha1h\\t%s0, %s1"
5949 [(set_attr "type" "crypto_sha1_fast")]
5950 )
5951
5952 (define_insn "aarch64_crypto_sha1su1v4si"
5953 [(set (match_operand:V4SI 0 "register_operand" "=w")
5954 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5955 (match_operand:V4SI 2 "register_operand" "w")]
5956 UNSPEC_SHA1SU1))]
5957 "TARGET_SIMD && TARGET_SHA2"
5958 "sha1su1\\t%0.4s, %2.4s"
5959 [(set_attr "type" "crypto_sha1_fast")]
5960 )
5961
5962 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5963 [(set (match_operand:V4SI 0 "register_operand" "=w")
5964 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5965 (match_operand:SI 2 "register_operand" "w")
5966 (match_operand:V4SI 3 "register_operand" "w")]
5967 CRYPTO_SHA1))]
5968 "TARGET_SIMD && TARGET_SHA2"
5969 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5970 [(set_attr "type" "crypto_sha1_slow")]
5971 )
5972
5973 (define_insn "aarch64_crypto_sha1su0v4si"
5974 [(set (match_operand:V4SI 0 "register_operand" "=w")
5975 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5976 (match_operand:V4SI 2 "register_operand" "w")
5977 (match_operand:V4SI 3 "register_operand" "w")]
5978 UNSPEC_SHA1SU0))]
5979 "TARGET_SIMD && TARGET_SHA2"
5980 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5981 [(set_attr "type" "crypto_sha1_xor")]
5982 )
5983
5984 ;; sha256
5985
5986 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5987 [(set (match_operand:V4SI 0 "register_operand" "=w")
5988 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5989 (match_operand:V4SI 2 "register_operand" "w")
5990 (match_operand:V4SI 3 "register_operand" "w")]
5991 CRYPTO_SHA256))]
5992 "TARGET_SIMD && TARGET_SHA2"
5993 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5994 [(set_attr "type" "crypto_sha256_slow")]
5995 )
5996
5997 (define_insn "aarch64_crypto_sha256su0v4si"
5998 [(set (match_operand:V4SI 0 "register_operand" "=w")
5999 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6000 (match_operand:V4SI 2 "register_operand" "w")]
6001 UNSPEC_SHA256SU0))]
6002 "TARGET_SIMD && TARGET_SHA2"
6003 "sha256su0\\t%0.4s, %2.4s"
6004 [(set_attr "type" "crypto_sha256_fast")]
6005 )
6006
6007 (define_insn "aarch64_crypto_sha256su1v4si"
6008 [(set (match_operand:V4SI 0 "register_operand" "=w")
6009 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6010 (match_operand:V4SI 2 "register_operand" "w")
6011 (match_operand:V4SI 3 "register_operand" "w")]
6012 UNSPEC_SHA256SU1))]
6013 "TARGET_SIMD && TARGET_SHA2"
6014 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6015 [(set_attr "type" "crypto_sha256_slow")]
6016 )
6017
6018 ;; sha512
6019
6020 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6021 [(set (match_operand:V2DI 0 "register_operand" "=w")
6022 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6023 (match_operand:V2DI 2 "register_operand" "w")
6024 (match_operand:V2DI 3 "register_operand" "w")]
6025 CRYPTO_SHA512))]
6026 "TARGET_SIMD && TARGET_SHA3"
6027 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6028 [(set_attr "type" "crypto_sha512")]
6029 )
6030
6031 (define_insn "aarch64_crypto_sha512su0qv2di"
6032 [(set (match_operand:V2DI 0 "register_operand" "=w")
6033 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6034 (match_operand:V2DI 2 "register_operand" "w")]
6035 UNSPEC_SHA512SU0))]
6036 "TARGET_SIMD && TARGET_SHA3"
6037 "sha512su0\\t%0.2d, %2.2d"
6038 [(set_attr "type" "crypto_sha512")]
6039 )
6040
6041 (define_insn "aarch64_crypto_sha512su1qv2di"
6042 [(set (match_operand:V2DI 0 "register_operand" "=w")
6043 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6044 (match_operand:V2DI 2 "register_operand" "w")
6045 (match_operand:V2DI 3 "register_operand" "w")]
6046 UNSPEC_SHA512SU1))]
6047 "TARGET_SIMD && TARGET_SHA3"
6048 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6049 [(set_attr "type" "crypto_sha512")]
6050 )
6051
6052 ;; sha3
6053
6054 (define_insn "eor3q<mode>4"
6055 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6056 (xor:VQ_I
6057 (xor:VQ_I
6058 (match_operand:VQ_I 2 "register_operand" "w")
6059 (match_operand:VQ_I 3 "register_operand" "w"))
6060 (match_operand:VQ_I 1 "register_operand" "w")))]
6061 "TARGET_SIMD && TARGET_SHA3"
6062 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6063 [(set_attr "type" "crypto_sha3")]
6064 )
6065
6066 (define_insn "aarch64_rax1qv2di"
6067 [(set (match_operand:V2DI 0 "register_operand" "=w")
6068 (xor:V2DI
6069 (rotate:V2DI
6070 (match_operand:V2DI 2 "register_operand" "w")
6071 (const_int 1))
6072 (match_operand:V2DI 1 "register_operand" "w")))]
6073 "TARGET_SIMD && TARGET_SHA3"
6074 "rax1\\t%0.2d, %1.2d, %2.2d"
6075 [(set_attr "type" "crypto_sha3")]
6076 )
6077
6078 (define_insn "aarch64_xarqv2di"
6079 [(set (match_operand:V2DI 0 "register_operand" "=w")
6080 (rotatert:V2DI
6081 (xor:V2DI
6082 (match_operand:V2DI 1 "register_operand" "%w")
6083 (match_operand:V2DI 2 "register_operand" "w"))
6084 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6085 "TARGET_SIMD && TARGET_SHA3"
6086 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6087 [(set_attr "type" "crypto_sha3")]
6088 )
6089
6090 (define_insn "bcaxq<mode>4"
6091 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6092 (xor:VQ_I
6093 (and:VQ_I
6094 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6095 (match_operand:VQ_I 2 "register_operand" "w"))
6096 (match_operand:VQ_I 1 "register_operand" "w")))]
6097 "TARGET_SIMD && TARGET_SHA3"
6098 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6099 [(set_attr "type" "crypto_sha3")]
6100 )
6101
6102 ;; SM3
6103
6104 (define_insn "aarch64_sm3ss1qv4si"
6105 [(set (match_operand:V4SI 0 "register_operand" "=w")
6106 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6107 (match_operand:V4SI 2 "register_operand" "w")
6108 (match_operand:V4SI 3 "register_operand" "w")]
6109 UNSPEC_SM3SS1))]
6110 "TARGET_SIMD && TARGET_SM4"
6111 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6112 [(set_attr "type" "crypto_sm3")]
6113 )
6114
6115
6116 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6117 [(set (match_operand:V4SI 0 "register_operand" "=w")
6118 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6119 (match_operand:V4SI 2 "register_operand" "w")
6120 (match_operand:V4SI 3 "register_operand" "w")
6121 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6122 CRYPTO_SM3TT))]
6123 "TARGET_SIMD && TARGET_SM4"
6124 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6125 [(set_attr "type" "crypto_sm3")]
6126 )
6127
6128 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6129 [(set (match_operand:V4SI 0 "register_operand" "=w")
6130 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6131 (match_operand:V4SI 2 "register_operand" "w")
6132 (match_operand:V4SI 3 "register_operand" "w")]
6133 CRYPTO_SM3PART))]
6134 "TARGET_SIMD && TARGET_SM4"
6135 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6136 [(set_attr "type" "crypto_sm3")]
6137 )
6138
6139 ;; SM4
6140
6141 (define_insn "aarch64_sm4eqv4si"
6142 [(set (match_operand:V4SI 0 "register_operand" "=w")
6143 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6144 (match_operand:V4SI 2 "register_operand" "w")]
6145 UNSPEC_SM4E))]
6146 "TARGET_SIMD && TARGET_SM4"
6147 "sm4e\\t%0.4s, %2.4s"
6148 [(set_attr "type" "crypto_sm4")]
6149 )
6150
6151 (define_insn "aarch64_sm4ekeyqv4si"
6152 [(set (match_operand:V4SI 0 "register_operand" "=w")
6153 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6154 (match_operand:V4SI 2 "register_operand" "w")]
6155 UNSPEC_SM4EKEY))]
6156 "TARGET_SIMD && TARGET_SM4"
6157 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6158 [(set_attr "type" "crypto_sm4")]
6159 )
6160
6161 ;; fp16fml
6162
6163 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6164 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6165 (unspec:VDQSF
6166 [(match_operand:VDQSF 1 "register_operand" "0")
6167 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6168 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6169 VFMLA16_LOW))]
6170 "TARGET_F16FML"
6171 {
6172 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6173 <nunits> * 2, false);
6174 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6175 <nunits> * 2, false);
6176
6177 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6178 operands[1],
6179 operands[2],
6180 operands[3],
6181 p1, p2));
6182 DONE;
6183
6184 })
6185
6186 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6187 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6188 (unspec:VDQSF
6189 [(match_operand:VDQSF 1 "register_operand" "0")
6190 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6191 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6192 VFMLA16_HIGH))]
6193 "TARGET_F16FML"
6194 {
6195 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6196 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6197
6198 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6199 operands[1],
6200 operands[2],
6201 operands[3],
6202 p1, p2));
6203 DONE;
6204 })
6205
6206 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6207 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6208 (fma:VDQSF
6209 (float_extend:VDQSF
6210 (vec_select:<VFMLA_SEL_W>
6211 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6212 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6213 (float_extend:VDQSF
6214 (vec_select:<VFMLA_SEL_W>
6215 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6216 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6217 (match_operand:VDQSF 1 "register_operand" "0")))]
6218 "TARGET_F16FML"
6219 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6220 [(set_attr "type" "neon_fp_mul_s")]
6221 )
6222
6223 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6224 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6225 (fma:VDQSF
6226 (float_extend:VDQSF
6227 (neg:<VFMLA_SEL_W>
6228 (vec_select:<VFMLA_SEL_W>
6229 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6230 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6231 (float_extend:VDQSF
6232 (vec_select:<VFMLA_SEL_W>
6233 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6234 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6235 (match_operand:VDQSF 1 "register_operand" "0")))]
6236 "TARGET_F16FML"
6237 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6238 [(set_attr "type" "neon_fp_mul_s")]
6239 )
6240
6241 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6242 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6243 (fma:VDQSF
6244 (float_extend:VDQSF
6245 (vec_select:<VFMLA_SEL_W>
6246 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6247 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6248 (float_extend:VDQSF
6249 (vec_select:<VFMLA_SEL_W>
6250 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6251 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6252 (match_operand:VDQSF 1 "register_operand" "0")))]
6253 "TARGET_F16FML"
6254 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6255 [(set_attr "type" "neon_fp_mul_s")]
6256 )
6257
6258 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6259 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6260 (fma:VDQSF
6261 (float_extend:VDQSF
6262 (neg:<VFMLA_SEL_W>
6263 (vec_select:<VFMLA_SEL_W>
6264 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6265 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6266 (float_extend:VDQSF
6267 (vec_select:<VFMLA_SEL_W>
6268 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6269 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6270 (match_operand:VDQSF 1 "register_operand" "0")))]
6271 "TARGET_F16FML"
6272 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6273 [(set_attr "type" "neon_fp_mul_s")]
6274 )
6275
6276 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6277 [(set (match_operand:V2SF 0 "register_operand" "")
6278 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6279 (match_operand:V4HF 2 "register_operand" "")
6280 (match_operand:V4HF 3 "register_operand" "")
6281 (match_operand:SI 4 "aarch64_imm2" "")]
6282 VFMLA16_LOW))]
6283 "TARGET_F16FML"
6284 {
6285 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6286 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6287
6288 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6289 operands[1],
6290 operands[2],
6291 operands[3],
6292 p1, lane));
6293 DONE;
6294 }
6295 )
6296
6297 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6298 [(set (match_operand:V2SF 0 "register_operand" "")
6299 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6300 (match_operand:V4HF 2 "register_operand" "")
6301 (match_operand:V4HF 3 "register_operand" "")
6302 (match_operand:SI 4 "aarch64_imm2" "")]
6303 VFMLA16_HIGH))]
6304 "TARGET_F16FML"
6305 {
6306 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6307 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6308
6309 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6310 operands[1],
6311 operands[2],
6312 operands[3],
6313 p1, lane));
6314 DONE;
6315 })
6316
6317 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6318 [(set (match_operand:V2SF 0 "register_operand" "=w")
6319 (fma:V2SF
6320 (float_extend:V2SF
6321 (vec_select:V2HF
6322 (match_operand:V4HF 2 "register_operand" "w")
6323 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6324 (float_extend:V2SF
6325 (vec_duplicate:V2HF
6326 (vec_select:HF
6327 (match_operand:V4HF 3 "register_operand" "x")
6328 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6329 (match_operand:V2SF 1 "register_operand" "0")))]
6330 "TARGET_F16FML"
6331 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6332 [(set_attr "type" "neon_fp_mul_s")]
6333 )
6334
6335 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6336 [(set (match_operand:V2SF 0 "register_operand" "=w")
6337 (fma:V2SF
6338 (float_extend:V2SF
6339 (neg:V2HF
6340 (vec_select:V2HF
6341 (match_operand:V4HF 2 "register_operand" "w")
6342 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6343 (float_extend:V2SF
6344 (vec_duplicate:V2HF
6345 (vec_select:HF
6346 (match_operand:V4HF 3 "register_operand" "x")
6347 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6348 (match_operand:V2SF 1 "register_operand" "0")))]
6349 "TARGET_F16FML"
6350 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6351 [(set_attr "type" "neon_fp_mul_s")]
6352 )
6353
6354 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6355 [(set (match_operand:V2SF 0 "register_operand" "=w")
6356 (fma:V2SF
6357 (float_extend:V2SF
6358 (vec_select:V2HF
6359 (match_operand:V4HF 2 "register_operand" "w")
6360 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6361 (float_extend:V2SF
6362 (vec_duplicate:V2HF
6363 (vec_select:HF
6364 (match_operand:V4HF 3 "register_operand" "x")
6365 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6366 (match_operand:V2SF 1 "register_operand" "0")))]
6367 "TARGET_F16FML"
6368 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6369 [(set_attr "type" "neon_fp_mul_s")]
6370 )
6371
6372 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6373 [(set (match_operand:V2SF 0 "register_operand" "=w")
6374 (fma:V2SF
6375 (float_extend:V2SF
6376 (neg:V2HF
6377 (vec_select:V2HF
6378 (match_operand:V4HF 2 "register_operand" "w")
6379 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6380 (float_extend:V2SF
6381 (vec_duplicate:V2HF
6382 (vec_select:HF
6383 (match_operand:V4HF 3 "register_operand" "x")
6384 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6385 (match_operand:V2SF 1 "register_operand" "0")))]
6386 "TARGET_F16FML"
6387 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6388 [(set_attr "type" "neon_fp_mul_s")]
6389 )
6390
6391 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6392 [(set (match_operand:V4SF 0 "register_operand" "")
6393 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6394 (match_operand:V8HF 2 "register_operand" "")
6395 (match_operand:V8HF 3 "register_operand" "")
6396 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6397 VFMLA16_LOW))]
6398 "TARGET_F16FML"
6399 {
6400 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6401 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6402
6403 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6404 operands[1],
6405 operands[2],
6406 operands[3],
6407 p1, lane));
6408 DONE;
6409 })
6410
6411 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6412 [(set (match_operand:V4SF 0 "register_operand" "")
6413 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6414 (match_operand:V8HF 2 "register_operand" "")
6415 (match_operand:V8HF 3 "register_operand" "")
6416 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6417 VFMLA16_HIGH))]
6418 "TARGET_F16FML"
6419 {
6420 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6421 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6422
6423 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6424 operands[1],
6425 operands[2],
6426 operands[3],
6427 p1, lane));
6428 DONE;
6429 })
6430
6431 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6432 [(set (match_operand:V4SF 0 "register_operand" "=w")
6433 (fma:V4SF
6434 (float_extend:V4SF
6435 (vec_select:V4HF
6436 (match_operand:V8HF 2 "register_operand" "w")
6437 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6438 (float_extend:V4SF
6439 (vec_duplicate:V4HF
6440 (vec_select:HF
6441 (match_operand:V8HF 3 "register_operand" "x")
6442 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6443 (match_operand:V4SF 1 "register_operand" "0")))]
6444 "TARGET_F16FML"
6445 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6446 [(set_attr "type" "neon_fp_mul_s")]
6447 )
6448
6449 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6450 [(set (match_operand:V4SF 0 "register_operand" "=w")
6451 (fma:V4SF
6452 (float_extend:V4SF
6453 (neg:V4HF
6454 (vec_select:V4HF
6455 (match_operand:V8HF 2 "register_operand" "w")
6456 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6457 (float_extend:V4SF
6458 (vec_duplicate:V4HF
6459 (vec_select:HF
6460 (match_operand:V8HF 3 "register_operand" "x")
6461 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6462 (match_operand:V4SF 1 "register_operand" "0")))]
6463 "TARGET_F16FML"
6464 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6465 [(set_attr "type" "neon_fp_mul_s")]
6466 )
6467
6468 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6469 [(set (match_operand:V4SF 0 "register_operand" "=w")
6470 (fma:V4SF
6471 (float_extend:V4SF
6472 (vec_select:V4HF
6473 (match_operand:V8HF 2 "register_operand" "w")
6474 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6475 (float_extend:V4SF
6476 (vec_duplicate:V4HF
6477 (vec_select:HF
6478 (match_operand:V8HF 3 "register_operand" "x")
6479 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6480 (match_operand:V4SF 1 "register_operand" "0")))]
6481 "TARGET_F16FML"
6482 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6483 [(set_attr "type" "neon_fp_mul_s")]
6484 )
6485
6486 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6487 [(set (match_operand:V4SF 0 "register_operand" "=w")
6488 (fma:V4SF
6489 (float_extend:V4SF
6490 (neg:V4HF
6491 (vec_select:V4HF
6492 (match_operand:V8HF 2 "register_operand" "w")
6493 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6494 (float_extend:V4SF
6495 (vec_duplicate:V4HF
6496 (vec_select:HF
6497 (match_operand:V8HF 3 "register_operand" "x")
6498 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6499 (match_operand:V4SF 1 "register_operand" "0")))]
6500 "TARGET_F16FML"
6501 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6502 [(set_attr "type" "neon_fp_mul_s")]
6503 )
6504
6505 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6506 [(set (match_operand:V2SF 0 "register_operand" "")
6507 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6508 (match_operand:V4HF 2 "register_operand" "")
6509 (match_operand:V8HF 3 "register_operand" "")
6510 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6511 VFMLA16_LOW))]
6512 "TARGET_F16FML"
6513 {
6514 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6515 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6516
6517 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6518 operands[1],
6519 operands[2],
6520 operands[3],
6521 p1, lane));
6522 DONE;
6523
6524 })
6525
6526 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6527 [(set (match_operand:V2SF 0 "register_operand" "")
6528 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6529 (match_operand:V4HF 2 "register_operand" "")
6530 (match_operand:V8HF 3 "register_operand" "")
6531 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6532 VFMLA16_HIGH))]
6533 "TARGET_F16FML"
6534 {
6535 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6536 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6537
6538 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6539 operands[1],
6540 operands[2],
6541 operands[3],
6542 p1, lane));
6543 DONE;
6544
6545 })
6546
6547 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6548 [(set (match_operand:V2SF 0 "register_operand" "=w")
6549 (fma:V2SF
6550 (float_extend:V2SF
6551 (vec_select:V2HF
6552 (match_operand:V4HF 2 "register_operand" "w")
6553 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6554 (float_extend:V2SF
6555 (vec_duplicate:V2HF
6556 (vec_select:HF
6557 (match_operand:V8HF 3 "register_operand" "x")
6558 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6559 (match_operand:V2SF 1 "register_operand" "0")))]
6560 "TARGET_F16FML"
6561 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6562 [(set_attr "type" "neon_fp_mul_s")]
6563 )
6564
6565 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6566 [(set (match_operand:V2SF 0 "register_operand" "=w")
6567 (fma:V2SF
6568 (float_extend:V2SF
6569 (neg:V2HF
6570 (vec_select:V2HF
6571 (match_operand:V4HF 2 "register_operand" "w")
6572 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6573 (float_extend:V2SF
6574 (vec_duplicate:V2HF
6575 (vec_select:HF
6576 (match_operand:V8HF 3 "register_operand" "x")
6577 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6578 (match_operand:V2SF 1 "register_operand" "0")))]
6579 "TARGET_F16FML"
6580 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6581 [(set_attr "type" "neon_fp_mul_s")]
6582 )
6583
6584 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6585 [(set (match_operand:V2SF 0 "register_operand" "=w")
6586 (fma:V2SF
6587 (float_extend:V2SF
6588 (vec_select:V2HF
6589 (match_operand:V4HF 2 "register_operand" "w")
6590 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6591 (float_extend:V2SF
6592 (vec_duplicate:V2HF
6593 (vec_select:HF
6594 (match_operand:V8HF 3 "register_operand" "x")
6595 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6596 (match_operand:V2SF 1 "register_operand" "0")))]
6597 "TARGET_F16FML"
6598 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6599 [(set_attr "type" "neon_fp_mul_s")]
6600 )
6601
6602 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6603 [(set (match_operand:V2SF 0 "register_operand" "=w")
6604 (fma:V2SF
6605 (float_extend:V2SF
6606 (neg:V2HF
6607 (vec_select:V2HF
6608 (match_operand:V4HF 2 "register_operand" "w")
6609 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6610 (float_extend:V2SF
6611 (vec_duplicate:V2HF
6612 (vec_select:HF
6613 (match_operand:V8HF 3 "register_operand" "x")
6614 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6615 (match_operand:V2SF 1 "register_operand" "0")))]
6616 "TARGET_F16FML"
6617 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6618 [(set_attr "type" "neon_fp_mul_s")]
6619 )
6620
6621 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6622 [(set (match_operand:V4SF 0 "register_operand" "")
6623 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6624 (match_operand:V8HF 2 "register_operand" "")
6625 (match_operand:V4HF 3 "register_operand" "")
6626 (match_operand:SI 4 "aarch64_imm2" "")]
6627 VFMLA16_LOW))]
6628 "TARGET_F16FML"
6629 {
6630 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6631 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6632
6633 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6634 operands[1],
6635 operands[2],
6636 operands[3],
6637 p1, lane));
6638 DONE;
6639 })
6640
6641 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6642 [(set (match_operand:V4SF 0 "register_operand" "")
6643 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6644 (match_operand:V8HF 2 "register_operand" "")
6645 (match_operand:V4HF 3 "register_operand" "")
6646 (match_operand:SI 4 "aarch64_imm2" "")]
6647 VFMLA16_HIGH))]
6648 "TARGET_F16FML"
6649 {
6650 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6651 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6652
6653 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6654 operands[1],
6655 operands[2],
6656 operands[3],
6657 p1, lane));
6658 DONE;
6659 })
6660
6661 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6662 [(set (match_operand:V4SF 0 "register_operand" "=w")
6663 (fma:V4SF
6664 (float_extend:V4SF
6665 (vec_select:V4HF
6666 (match_operand:V8HF 2 "register_operand" "w")
6667 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6668 (float_extend:V4SF
6669 (vec_duplicate:V4HF
6670 (vec_select:HF
6671 (match_operand:V4HF 3 "register_operand" "x")
6672 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6673 (match_operand:V4SF 1 "register_operand" "0")))]
6674 "TARGET_F16FML"
6675 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6676 [(set_attr "type" "neon_fp_mul_s")]
6677 )
6678
6679 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6680 [(set (match_operand:V4SF 0 "register_operand" "=w")
6681 (fma:V4SF
6682 (float_extend:V4SF
6683 (neg:V4HF
6684 (vec_select:V4HF
6685 (match_operand:V8HF 2 "register_operand" "w")
6686 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6687 (float_extend:V4SF
6688 (vec_duplicate:V4HF
6689 (vec_select:HF
6690 (match_operand:V4HF 3 "register_operand" "x")
6691 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6692 (match_operand:V4SF 1 "register_operand" "0")))]
6693 "TARGET_F16FML"
6694 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6695 [(set_attr "type" "neon_fp_mul_s")]
6696 )
6697
6698 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6699 [(set (match_operand:V4SF 0 "register_operand" "=w")
6700 (fma:V4SF
6701 (float_extend:V4SF
6702 (vec_select:V4HF
6703 (match_operand:V8HF 2 "register_operand" "w")
6704 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6705 (float_extend:V4SF
6706 (vec_duplicate:V4HF
6707 (vec_select:HF
6708 (match_operand:V4HF 3 "register_operand" "x")
6709 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6710 (match_operand:V4SF 1 "register_operand" "0")))]
6711 "TARGET_F16FML"
6712 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6713 [(set_attr "type" "neon_fp_mul_s")]
6714 )
6715
6716 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6717 [(set (match_operand:V4SF 0 "register_operand" "=w")
6718 (fma:V4SF
6719 (float_extend:V4SF
6720 (neg:V4HF
6721 (vec_select:V4HF
6722 (match_operand:V8HF 2 "register_operand" "w")
6723 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6724 (float_extend:V4SF
6725 (vec_duplicate:V4HF
6726 (vec_select:HF
6727 (match_operand:V4HF 3 "register_operand" "x")
6728 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6729 (match_operand:V4SF 1 "register_operand" "0")))]
6730 "TARGET_F16FML"
6731 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6732 [(set_attr "type" "neon_fp_mul_s")]
6733 )
6734
6735 ;; pmull
6736
6737 (define_insn "aarch64_crypto_pmulldi"
6738 [(set (match_operand:TI 0 "register_operand" "=w")
6739 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6740 (match_operand:DI 2 "register_operand" "w")]
6741 UNSPEC_PMULL))]
6742 "TARGET_SIMD && TARGET_AES"
6743 "pmull\\t%0.1q, %1.1d, %2.1d"
6744 [(set_attr "type" "crypto_pmull")]
6745 )
6746
6747 (define_insn "aarch64_crypto_pmullv2di"
6748 [(set (match_operand:TI 0 "register_operand" "=w")
6749 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6750 (match_operand:V2DI 2 "register_operand" "w")]
6751 UNSPEC_PMULL2))]
6752 "TARGET_SIMD && TARGET_AES"
6753 "pmull2\\t%0.1q, %1.2d, %2.2d"
6754 [(set_attr "type" "crypto_pmull")]
6755 )