]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((GET_MODE_SIZE (<MODE>mode) == 16
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || GET_MODE_SIZE (<MODE>mode) == 8)))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1],
124 <MODE>mode, 64);
125 default: gcc_unreachable ();
126 }
127 }
128 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
129 neon_logic<q>, neon_to_gp<q>, f_mcr,\
130 mov_reg, neon_move<q>")]
131 )
132
133 (define_insn "*aarch64_simd_mov<VQ:mode>"
134 [(set (match_operand:VQ 0 "nonimmediate_operand"
135 "=w, Umq, m, w, ?r, ?w, ?r, w")
136 (match_operand:VQ 1 "general_operand"
137 "m, Dz, w, w, w, r, r, Dn"))]
138 "TARGET_SIMD
139 && (register_operand (operands[0], <MODE>mode)
140 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 {
142 switch (which_alternative)
143 {
144 case 0:
145 return "ldr\t%q0, %1";
146 case 1:
147 return "stp\txzr, xzr, %0";
148 case 2:
149 return "str\t%q1, %0";
150 case 3:
151 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
152 case 4:
153 case 5:
154 case 6:
155 return "#";
156 case 7:
157 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
158 default:
159 gcc_unreachable ();
160 }
161 }
162 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
163 neon_logic<q>, multiple, multiple,\
164 multiple, neon_move<q>")
165 (set_attr "length" "4,4,4,4,8,8,8,4")]
166 )
167
168 ;; When storing lane zero we can use the normal STR and its more permissive
169 ;; addressing modes.
170
171 (define_insn "aarch64_store_lane0<mode>"
172 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
173 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
174 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 "TARGET_SIMD
176 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
177 "str\\t%<Vetype>1, %0"
178 [(set_attr "type" "neon_store1_1reg<q>")]
179 )
180
181 (define_insn "load_pair<mode>"
182 [(set (match_operand:VD 0 "register_operand" "=w")
183 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
184 (set (match_operand:VD 2 "register_operand" "=w")
185 (match_operand:VD 3 "memory_operand" "m"))]
186 "TARGET_SIMD
187 && rtx_equal_p (XEXP (operands[3], 0),
188 plus_constant (Pmode,
189 XEXP (operands[1], 0),
190 GET_MODE_SIZE (<MODE>mode)))"
191 "ldp\\t%d0, %d2, %1"
192 [(set_attr "type" "neon_ldp")]
193 )
194
195 (define_insn "store_pair<mode>"
196 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
197 (match_operand:VD 1 "register_operand" "w"))
198 (set (match_operand:VD 2 "memory_operand" "=m")
199 (match_operand:VD 3 "register_operand" "w"))]
200 "TARGET_SIMD
201 && rtx_equal_p (XEXP (operands[2], 0),
202 plus_constant (Pmode,
203 XEXP (operands[0], 0),
204 GET_MODE_SIZE (<MODE>mode)))"
205 "stp\\t%d1, %d3, %0"
206 [(set_attr "type" "neon_stp")]
207 )
208
209 (define_split
210 [(set (match_operand:VQ 0 "register_operand" "")
211 (match_operand:VQ 1 "register_operand" ""))]
212 "TARGET_SIMD && reload_completed
213 && GP_REGNUM_P (REGNO (operands[0]))
214 && GP_REGNUM_P (REGNO (operands[1]))"
215 [(const_int 0)]
216 {
217 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
218 DONE;
219 })
220
221 (define_split
222 [(set (match_operand:VQ 0 "register_operand" "")
223 (match_operand:VQ 1 "register_operand" ""))]
224 "TARGET_SIMD && reload_completed
225 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
226 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
227 [(const_int 0)]
228 {
229 aarch64_split_simd_move (operands[0], operands[1]);
230 DONE;
231 })
232
233 (define_expand "aarch64_split_simd_mov<mode>"
234 [(set (match_operand:VQ 0)
235 (match_operand:VQ 1))]
236 "TARGET_SIMD"
237 {
238 rtx dst = operands[0];
239 rtx src = operands[1];
240
241 if (GP_REGNUM_P (REGNO (src)))
242 {
243 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
244 rtx src_high_part = gen_highpart (<VHALF>mode, src);
245
246 emit_insn
247 (gen_move_lo_quad_<mode> (dst, src_low_part));
248 emit_insn
249 (gen_move_hi_quad_<mode> (dst, src_high_part));
250 }
251
252 else
253 {
254 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
255 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
256 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
257 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
258
259 emit_insn
260 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
261 emit_insn
262 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
263 }
264 DONE;
265 }
266 )
267
268 (define_insn "aarch64_simd_mov_from_<mode>low"
269 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
270 (vec_select:<VHALF>
271 (match_operand:VQ 1 "register_operand" "w")
272 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
273 "TARGET_SIMD && reload_completed"
274 "umov\t%0, %1.d[0]"
275 [(set_attr "type" "neon_to_gp<q>")
276 (set_attr "length" "4")
277 ])
278
279 (define_insn "aarch64_simd_mov_from_<mode>high"
280 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
281 (vec_select:<VHALF>
282 (match_operand:VQ 1 "register_operand" "w")
283 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
284 "TARGET_SIMD && reload_completed"
285 "umov\t%0, %1.d[1]"
286 [(set_attr "type" "neon_to_gp<q>")
287 (set_attr "length" "4")
288 ])
289
290 (define_insn "orn<mode>3"
291 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
292 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
293 (match_operand:VDQ_I 2 "register_operand" "w")))]
294 "TARGET_SIMD"
295 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
296 [(set_attr "type" "neon_logic<q>")]
297 )
298
299 (define_insn "bic<mode>3"
300 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
301 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
302 (match_operand:VDQ_I 2 "register_operand" "w")))]
303 "TARGET_SIMD"
304 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
305 [(set_attr "type" "neon_logic<q>")]
306 )
307
308 (define_insn "add<mode>3"
309 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
310 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
311 (match_operand:VDQ_I 2 "register_operand" "w")))]
312 "TARGET_SIMD"
313 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
314 [(set_attr "type" "neon_add<q>")]
315 )
316
317 (define_insn "sub<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
323 [(set_attr "type" "neon_sub<q>")]
324 )
325
326 (define_insn "mul<mode>3"
327 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
328 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
329 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
332 [(set_attr "type" "neon_mul_<Vetype><q>")]
333 )
334
335 (define_insn "bswap<mode>2"
336 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
337 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
338 "TARGET_SIMD"
339 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
340 [(set_attr "type" "neon_rev<q>")]
341 )
342
343 (define_insn "aarch64_rbit<mode>"
344 [(set (match_operand:VB 0 "register_operand" "=w")
345 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
346 UNSPEC_RBIT))]
347 "TARGET_SIMD"
348 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
349 [(set_attr "type" "neon_rbit")]
350 )
351
352 (define_expand "ctz<mode>2"
353 [(set (match_operand:VS 0 "register_operand")
354 (ctz:VS (match_operand:VS 1 "register_operand")))]
355 "TARGET_SIMD"
356 {
357 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
358 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
359 <MODE>mode, 0);
360 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
361 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
362 DONE;
363 }
364 )
365
366 (define_expand "xorsign<mode>3"
367 [(match_operand:VHSDF 0 "register_operand")
368 (match_operand:VHSDF 1 "register_operand")
369 (match_operand:VHSDF 2 "register_operand")]
370 "TARGET_SIMD"
371 {
372
373 machine_mode imode = <V_INT_EQUIV>mode;
374 rtx v_bitmask = gen_reg_rtx (imode);
375 rtx op1x = gen_reg_rtx (imode);
376 rtx op2x = gen_reg_rtx (imode);
377
378 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
379 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
380
381 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
382
383 emit_move_insn (v_bitmask,
384 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
385 HOST_WIDE_INT_M1U << bits));
386
387 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
388 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
389 emit_move_insn (operands[0],
390 lowpart_subreg (<MODE>mode, op1x, imode));
391 DONE;
392 }
393 )
394
395 ;; These instructions map to the __builtins for the Dot Product operations.
396 (define_insn "aarch64_<sur>dot<vsi2qi>"
397 [(set (match_operand:VS 0 "register_operand" "=w")
398 (plus:VS (match_operand:VS 1 "register_operand" "0")
399 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
400 (match_operand:<VSI2QI> 3 "register_operand" "w")]
401 DOTPROD)))]
402 "TARGET_DOTPROD"
403 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
404 [(set_attr "type" "neon_dot")]
405 )
406
407 ;; These expands map to the Dot Product optab the vectorizer checks for.
408 ;; The auto-vectorizer expects a dot product builtin that also does an
409 ;; accumulation into the provided register.
410 ;; Given the following pattern
411 ;;
412 ;; for (i=0; i<len; i++) {
413 ;; c = a[i] * b[i];
414 ;; r += c;
415 ;; }
416 ;; return result;
417 ;;
418 ;; This can be auto-vectorized to
419 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
420 ;;
421 ;; given enough iterations. However the vectorizer can keep unrolling the loop
422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
424 ;; ...
425 ;;
426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
427 (define_expand "<sur>dot_prod<vsi2qi>"
428 [(set (match_operand:VS 0 "register_operand")
429 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
430 (match_operand:<VSI2QI> 2 "register_operand")]
431 DOTPROD)
432 (match_operand:VS 3 "register_operand")))]
433 "TARGET_DOTPROD"
434 {
435 emit_insn (
436 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
437 operands[2]));
438 emit_insn (gen_rtx_SET (operands[0], operands[3]));
439 DONE;
440 })
441
442 ;; These instructions map to the __builtins for the Dot Product
443 ;; indexed operations.
444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
445 [(set (match_operand:VS 0 "register_operand" "=w")
446 (plus:VS (match_operand:VS 1 "register_operand" "0")
447 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
448 (match_operand:V8QI 3 "register_operand" "<h_con>")
449 (match_operand:SI 4 "immediate_operand" "i")]
450 DOTPROD)))]
451 "TARGET_DOTPROD"
452 {
453 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
454 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
455 }
456 [(set_attr "type" "neon_dot")]
457 )
458
459 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
460 [(set (match_operand:VS 0 "register_operand" "=w")
461 (plus:VS (match_operand:VS 1 "register_operand" "0")
462 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
463 (match_operand:V16QI 3 "register_operand" "<h_con>")
464 (match_operand:SI 4 "immediate_operand" "i")]
465 DOTPROD)))]
466 "TARGET_DOTPROD"
467 {
468 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
469 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
470 }
471 [(set_attr "type" "neon_dot")]
472 )
473
474 (define_expand "copysign<mode>3"
475 [(match_operand:VHSDF 0 "register_operand")
476 (match_operand:VHSDF 1 "register_operand")
477 (match_operand:VHSDF 2 "register_operand")]
478 "TARGET_FLOAT && TARGET_SIMD"
479 {
480 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
481 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
482
483 emit_move_insn (v_bitmask,
484 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
485 HOST_WIDE_INT_M1U << bits));
486 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
487 operands[2], operands[1]));
488 DONE;
489 }
490 )
491
492 (define_insn "*aarch64_mul3_elt<mode>"
493 [(set (match_operand:VMUL 0 "register_operand" "=w")
494 (mult:VMUL
495 (vec_duplicate:VMUL
496 (vec_select:<VEL>
497 (match_operand:VMUL 1 "register_operand" "<h_con>")
498 (parallel [(match_operand:SI 2 "immediate_operand")])))
499 (match_operand:VMUL 3 "register_operand" "w")))]
500 "TARGET_SIMD"
501 {
502 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
503 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
504 }
505 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
506 )
507
508 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
509 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
510 (mult:VMUL_CHANGE_NLANES
511 (vec_duplicate:VMUL_CHANGE_NLANES
512 (vec_select:<VEL>
513 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
514 (parallel [(match_operand:SI 2 "immediate_operand")])))
515 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
516 "TARGET_SIMD"
517 {
518 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
519 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
520 }
521 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
522 )
523
524 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
525 [(set (match_operand:VMUL 0 "register_operand" "=w")
526 (mult:VMUL
527 (vec_duplicate:VMUL
528 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
529 (match_operand:VMUL 2 "register_operand" "w")))]
530 "TARGET_SIMD"
531 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
533 )
534
535 (define_insn "aarch64_rsqrte<mode>"
536 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
537 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
538 UNSPEC_RSQRTE))]
539 "TARGET_SIMD"
540 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
541 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
542
543 (define_insn "aarch64_rsqrts<mode>"
544 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
545 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
546 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
547 UNSPEC_RSQRTS))]
548 "TARGET_SIMD"
549 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
550 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
551
552 (define_expand "rsqrt<mode>2"
553 [(set (match_operand:VALLF 0 "register_operand" "=w")
554 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
555 UNSPEC_RSQRT))]
556 "TARGET_SIMD"
557 {
558 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
559 DONE;
560 })
561
562 (define_insn "*aarch64_mul3_elt_to_64v2df"
563 [(set (match_operand:DF 0 "register_operand" "=w")
564 (mult:DF
565 (vec_select:DF
566 (match_operand:V2DF 1 "register_operand" "w")
567 (parallel [(match_operand:SI 2 "immediate_operand")]))
568 (match_operand:DF 3 "register_operand" "w")))]
569 "TARGET_SIMD"
570 {
571 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
572 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
573 }
574 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
575 )
576
577 (define_insn "neg<mode>2"
578 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
579 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
580 "TARGET_SIMD"
581 "neg\t%0.<Vtype>, %1.<Vtype>"
582 [(set_attr "type" "neon_neg<q>")]
583 )
584
585 (define_insn "abs<mode>2"
586 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
587 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
588 "TARGET_SIMD"
589 "abs\t%0.<Vtype>, %1.<Vtype>"
590 [(set_attr "type" "neon_abs<q>")]
591 )
592
593 ;; The intrinsic version of integer ABS must not be allowed to
594 ;; combine with any operation with an integerated ABS step, such
595 ;; as SABD.
596 (define_insn "aarch64_abs<mode>"
597 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
598 (unspec:VSDQ_I_DI
599 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
600 UNSPEC_ABS))]
601 "TARGET_SIMD"
602 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
603 [(set_attr "type" "neon_abs<q>")]
604 )
605
606 (define_insn "abd<mode>_3"
607 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
608 (abs:VDQ_BHSI (minus:VDQ_BHSI
609 (match_operand:VDQ_BHSI 1 "register_operand" "w")
610 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
611 "TARGET_SIMD"
612 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
613 [(set_attr "type" "neon_abd<q>")]
614 )
615
616 (define_insn "aba<mode>_3"
617 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
618 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
619 (match_operand:VDQ_BHSI 1 "register_operand" "w")
620 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
621 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
622 "TARGET_SIMD"
623 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
624 [(set_attr "type" "neon_arith_acc<q>")]
625 )
626
627 (define_insn "fabd<mode>3"
628 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
629 (abs:VHSDF_HSDF
630 (minus:VHSDF_HSDF
631 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
632 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
633 "TARGET_SIMD"
634 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
635 [(set_attr "type" "neon_fp_abd_<stype><q>")]
636 )
637
638 ;; For AND (vector, register) and BIC (vector, immediate)
639 (define_insn "and<mode>3"
640 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
641 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
642 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
643 "TARGET_SIMD"
644 {
645 switch (which_alternative)
646 {
647 case 0:
648 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
649 case 1:
650 return aarch64_output_simd_mov_immediate (operands[2],
651 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
652 default:
653 gcc_unreachable ();
654 }
655 }
656 [(set_attr "type" "neon_logic<q>")]
657 )
658
659 ;; For ORR (vector, register) and ORR (vector, immediate)
660 (define_insn "ior<mode>3"
661 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
662 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
663 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
664 "TARGET_SIMD"
665 {
666 switch (which_alternative)
667 {
668 case 0:
669 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
670 case 1:
671 return aarch64_output_simd_mov_immediate (operands[2],
672 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
673 default:
674 gcc_unreachable ();
675 }
676 }
677 [(set_attr "type" "neon_logic<q>")]
678 )
679
680 (define_insn "xor<mode>3"
681 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
682 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
683 (match_operand:VDQ_I 2 "register_operand" "w")))]
684 "TARGET_SIMD"
685 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
686 [(set_attr "type" "neon_logic<q>")]
687 )
688
689 (define_insn "one_cmpl<mode>2"
690 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
691 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
692 "TARGET_SIMD"
693 "not\t%0.<Vbtype>, %1.<Vbtype>"
694 [(set_attr "type" "neon_logic<q>")]
695 )
696
697 (define_insn "aarch64_simd_vec_set<mode>"
698 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
699 (vec_merge:VDQ_BHSI
700 (vec_duplicate:VDQ_BHSI
701 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
702 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
703 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
704 "TARGET_SIMD"
705 {
706 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
707 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
708 switch (which_alternative)
709 {
710 case 0:
711 return "ins\\t%0.<Vetype>[%p2], %w1";
712 case 1:
713 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
714 case 2:
715 return "ld1\\t{%0.<Vetype>}[%p2], %1";
716 default:
717 gcc_unreachable ();
718 }
719 }
720 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
721 )
722
723 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
724 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
725 (vec_merge:VALL_F16
726 (vec_duplicate:VALL_F16
727 (vec_select:<VEL>
728 (match_operand:VALL_F16 3 "register_operand" "w")
729 (parallel
730 [(match_operand:SI 4 "immediate_operand" "i")])))
731 (match_operand:VALL_F16 1 "register_operand" "0")
732 (match_operand:SI 2 "immediate_operand" "i")))]
733 "TARGET_SIMD"
734 {
735 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
736 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
737 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
738
739 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
740 }
741 [(set_attr "type" "neon_ins<q>")]
742 )
743
744 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
745 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
746 (vec_merge:VALL_F16_NO_V2Q
747 (vec_duplicate:VALL_F16_NO_V2Q
748 (vec_select:<VEL>
749 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
750 (parallel
751 [(match_operand:SI 4 "immediate_operand" "i")])))
752 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
753 (match_operand:SI 2 "immediate_operand" "i")))]
754 "TARGET_SIMD"
755 {
756 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
757 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
758 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
759 INTVAL (operands[4]));
760
761 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
762 }
763 [(set_attr "type" "neon_ins<q>")]
764 )
765
766 (define_insn "aarch64_simd_lshr<mode>"
767 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
768 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
769 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
770 "TARGET_SIMD"
771 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
772 [(set_attr "type" "neon_shift_imm<q>")]
773 )
774
775 (define_insn "aarch64_simd_ashr<mode>"
776 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
777 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
778 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
779 "TARGET_SIMD"
780 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
781 [(set_attr "type" "neon_shift_imm<q>")]
782 )
783
784 (define_insn "aarch64_simd_imm_shl<mode>"
785 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
786 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
787 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
788 "TARGET_SIMD"
789 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
790 [(set_attr "type" "neon_shift_imm<q>")]
791 )
792
793 (define_insn "aarch64_simd_reg_sshl<mode>"
794 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
795 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
796 (match_operand:VDQ_I 2 "register_operand" "w")))]
797 "TARGET_SIMD"
798 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
799 [(set_attr "type" "neon_shift_reg<q>")]
800 )
801
802 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
803 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
804 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
805 (match_operand:VDQ_I 2 "register_operand" "w")]
806 UNSPEC_ASHIFT_UNSIGNED))]
807 "TARGET_SIMD"
808 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
809 [(set_attr "type" "neon_shift_reg<q>")]
810 )
811
812 (define_insn "aarch64_simd_reg_shl<mode>_signed"
813 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
814 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
815 (match_operand:VDQ_I 2 "register_operand" "w")]
816 UNSPEC_ASHIFT_SIGNED))]
817 "TARGET_SIMD"
818 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
819 [(set_attr "type" "neon_shift_reg<q>")]
820 )
821
822 (define_expand "ashl<mode>3"
823 [(match_operand:VDQ_I 0 "register_operand" "")
824 (match_operand:VDQ_I 1 "register_operand" "")
825 (match_operand:SI 2 "general_operand" "")]
826 "TARGET_SIMD"
827 {
828 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
829 int shift_amount;
830
831 if (CONST_INT_P (operands[2]))
832 {
833 shift_amount = INTVAL (operands[2]);
834 if (shift_amount >= 0 && shift_amount < bit_width)
835 {
836 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
837 shift_amount);
838 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
839 operands[1],
840 tmp));
841 DONE;
842 }
843 else
844 {
845 operands[2] = force_reg (SImode, operands[2]);
846 }
847 }
848 else if (MEM_P (operands[2]))
849 {
850 operands[2] = force_reg (SImode, operands[2]);
851 }
852
853 if (REG_P (operands[2]))
854 {
855 rtx tmp = gen_reg_rtx (<MODE>mode);
856 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
857 convert_to_mode (<VEL>mode,
858 operands[2],
859 0)));
860 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
861 tmp));
862 DONE;
863 }
864 else
865 FAIL;
866 }
867 )
868
869 (define_expand "lshr<mode>3"
870 [(match_operand:VDQ_I 0 "register_operand" "")
871 (match_operand:VDQ_I 1 "register_operand" "")
872 (match_operand:SI 2 "general_operand" "")]
873 "TARGET_SIMD"
874 {
875 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
876 int shift_amount;
877
878 if (CONST_INT_P (operands[2]))
879 {
880 shift_amount = INTVAL (operands[2]);
881 if (shift_amount > 0 && shift_amount <= bit_width)
882 {
883 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
884 shift_amount);
885 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
886 operands[1],
887 tmp));
888 DONE;
889 }
890 else
891 operands[2] = force_reg (SImode, operands[2]);
892 }
893 else if (MEM_P (operands[2]))
894 {
895 operands[2] = force_reg (SImode, operands[2]);
896 }
897
898 if (REG_P (operands[2]))
899 {
900 rtx tmp = gen_reg_rtx (SImode);
901 rtx tmp1 = gen_reg_rtx (<MODE>mode);
902 emit_insn (gen_negsi2 (tmp, operands[2]));
903 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
904 convert_to_mode (<VEL>mode,
905 tmp, 0)));
906 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
907 operands[1],
908 tmp1));
909 DONE;
910 }
911 else
912 FAIL;
913 }
914 )
915
916 (define_expand "ashr<mode>3"
917 [(match_operand:VDQ_I 0 "register_operand" "")
918 (match_operand:VDQ_I 1 "register_operand" "")
919 (match_operand:SI 2 "general_operand" "")]
920 "TARGET_SIMD"
921 {
922 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
923 int shift_amount;
924
925 if (CONST_INT_P (operands[2]))
926 {
927 shift_amount = INTVAL (operands[2]);
928 if (shift_amount > 0 && shift_amount <= bit_width)
929 {
930 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
931 shift_amount);
932 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
933 operands[1],
934 tmp));
935 DONE;
936 }
937 else
938 operands[2] = force_reg (SImode, operands[2]);
939 }
940 else if (MEM_P (operands[2]))
941 {
942 operands[2] = force_reg (SImode, operands[2]);
943 }
944
945 if (REG_P (operands[2]))
946 {
947 rtx tmp = gen_reg_rtx (SImode);
948 rtx tmp1 = gen_reg_rtx (<MODE>mode);
949 emit_insn (gen_negsi2 (tmp, operands[2]));
950 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
951 convert_to_mode (<VEL>mode,
952 tmp, 0)));
953 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
954 operands[1],
955 tmp1));
956 DONE;
957 }
958 else
959 FAIL;
960 }
961 )
962
963 (define_expand "vashl<mode>3"
964 [(match_operand:VDQ_I 0 "register_operand" "")
965 (match_operand:VDQ_I 1 "register_operand" "")
966 (match_operand:VDQ_I 2 "register_operand" "")]
967 "TARGET_SIMD"
968 {
969 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
970 operands[2]));
971 DONE;
972 })
973
974 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
975 ;; Negating individual lanes most certainly offsets the
976 ;; gain from vectorization.
977 (define_expand "vashr<mode>3"
978 [(match_operand:VDQ_BHSI 0 "register_operand" "")
979 (match_operand:VDQ_BHSI 1 "register_operand" "")
980 (match_operand:VDQ_BHSI 2 "register_operand" "")]
981 "TARGET_SIMD"
982 {
983 rtx neg = gen_reg_rtx (<MODE>mode);
984 emit (gen_neg<mode>2 (neg, operands[2]));
985 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
986 neg));
987 DONE;
988 })
989
990 ;; DI vector shift
991 (define_expand "aarch64_ashr_simddi"
992 [(match_operand:DI 0 "register_operand" "=w")
993 (match_operand:DI 1 "register_operand" "w")
994 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
995 "TARGET_SIMD"
996 {
997 /* An arithmetic shift right by 64 fills the result with copies of the sign
998 bit, just like asr by 63 - however the standard pattern does not handle
999 a shift by 64. */
1000 if (INTVAL (operands[2]) == 64)
1001 operands[2] = GEN_INT (63);
1002 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1003 DONE;
1004 }
1005 )
1006
1007 (define_expand "vlshr<mode>3"
1008 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1009 (match_operand:VDQ_BHSI 1 "register_operand" "")
1010 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1011 "TARGET_SIMD"
1012 {
1013 rtx neg = gen_reg_rtx (<MODE>mode);
1014 emit (gen_neg<mode>2 (neg, operands[2]));
1015 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1016 neg));
1017 DONE;
1018 })
1019
1020 (define_expand "aarch64_lshr_simddi"
1021 [(match_operand:DI 0 "register_operand" "=w")
1022 (match_operand:DI 1 "register_operand" "w")
1023 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1024 "TARGET_SIMD"
1025 {
1026 if (INTVAL (operands[2]) == 64)
1027 emit_move_insn (operands[0], const0_rtx);
1028 else
1029 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1030 DONE;
1031 }
1032 )
1033
1034 (define_expand "vec_set<mode>"
1035 [(match_operand:VDQ_BHSI 0 "register_operand")
1036 (match_operand:<VEL> 1 "register_operand")
1037 (match_operand:SI 2 "immediate_operand")]
1038 "TARGET_SIMD"
1039 {
1040 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1041 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1042 GEN_INT (elem), operands[0]));
1043 DONE;
1044 }
1045 )
1046
1047 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1048 (define_insn "vec_shr_<mode>"
1049 [(set (match_operand:VD 0 "register_operand" "=w")
1050 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1051 (match_operand:SI 2 "immediate_operand" "i")]
1052 UNSPEC_VEC_SHR))]
1053 "TARGET_SIMD"
1054 {
1055 if (BYTES_BIG_ENDIAN)
1056 return "shl %d0, %d1, %2";
1057 else
1058 return "ushr %d0, %d1, %2";
1059 }
1060 [(set_attr "type" "neon_shift_imm")]
1061 )
1062
1063 (define_insn "aarch64_simd_vec_setv2di"
1064 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1065 (vec_merge:V2DI
1066 (vec_duplicate:V2DI
1067 (match_operand:DI 1 "register_operand" "r,w"))
1068 (match_operand:V2DI 3 "register_operand" "0,0")
1069 (match_operand:SI 2 "immediate_operand" "i,i")))]
1070 "TARGET_SIMD"
1071 {
1072 int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1073 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1074 switch (which_alternative)
1075 {
1076 case 0:
1077 return "ins\\t%0.d[%p2], %1";
1078 case 1:
1079 return "ins\\t%0.d[%p2], %1.d[0]";
1080 default:
1081 gcc_unreachable ();
1082 }
1083 }
1084 [(set_attr "type" "neon_from_gp, neon_ins_q")]
1085 )
1086
1087 (define_expand "vec_setv2di"
1088 [(match_operand:V2DI 0 "register_operand")
1089 (match_operand:DI 1 "register_operand")
1090 (match_operand:SI 2 "immediate_operand")]
1091 "TARGET_SIMD"
1092 {
1093 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1094 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1095 GEN_INT (elem), operands[0]));
1096 DONE;
1097 }
1098 )
1099
1100 (define_insn "aarch64_simd_vec_set<mode>"
1101 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1102 (vec_merge:VDQF_F16
1103 (vec_duplicate:VDQF_F16
1104 (match_operand:<VEL> 1 "register_operand" "w"))
1105 (match_operand:VDQF_F16 3 "register_operand" "0")
1106 (match_operand:SI 2 "immediate_operand" "i")))]
1107 "TARGET_SIMD"
1108 {
1109 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1110
1111 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1112 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1113 }
1114 [(set_attr "type" "neon_ins<q>")]
1115 )
1116
1117 (define_expand "vec_set<mode>"
1118 [(match_operand:VDQF_F16 0 "register_operand" "+w")
1119 (match_operand:<VEL> 1 "register_operand" "w")
1120 (match_operand:SI 2 "immediate_operand" "")]
1121 "TARGET_SIMD"
1122 {
1123 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1124 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1125 GEN_INT (elem), operands[0]));
1126 DONE;
1127 }
1128 )
1129
1130
1131 (define_insn "aarch64_mla<mode>"
1132 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1133 (plus:VDQ_BHSI (mult:VDQ_BHSI
1134 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1135 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1136 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1137 "TARGET_SIMD"
1138 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1139 [(set_attr "type" "neon_mla_<Vetype><q>")]
1140 )
1141
1142 (define_insn "*aarch64_mla_elt<mode>"
1143 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1144 (plus:VDQHS
1145 (mult:VDQHS
1146 (vec_duplicate:VDQHS
1147 (vec_select:<VEL>
1148 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1149 (parallel [(match_operand:SI 2 "immediate_operand")])))
1150 (match_operand:VDQHS 3 "register_operand" "w"))
1151 (match_operand:VDQHS 4 "register_operand" "0")))]
1152 "TARGET_SIMD"
1153 {
1154 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1155 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1156 }
1157 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1158 )
1159
1160 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1161 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1162 (plus:VDQHS
1163 (mult:VDQHS
1164 (vec_duplicate:VDQHS
1165 (vec_select:<VEL>
1166 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1167 (parallel [(match_operand:SI 2 "immediate_operand")])))
1168 (match_operand:VDQHS 3 "register_operand" "w"))
1169 (match_operand:VDQHS 4 "register_operand" "0")))]
1170 "TARGET_SIMD"
1171 {
1172 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1173 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1174 }
1175 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1176 )
1177
1178 (define_insn "*aarch64_mla_elt_merge<mode>"
1179 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1180 (plus:VDQHS
1181 (mult:VDQHS (vec_duplicate:VDQHS
1182 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1183 (match_operand:VDQHS 2 "register_operand" "w"))
1184 (match_operand:VDQHS 3 "register_operand" "0")))]
1185 "TARGET_SIMD"
1186 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1187 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1188 )
1189
1190 (define_insn "aarch64_mls<mode>"
1191 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1192 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1193 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1194 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1195 "TARGET_SIMD"
1196 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1197 [(set_attr "type" "neon_mla_<Vetype><q>")]
1198 )
1199
1200 (define_insn "*aarch64_mls_elt<mode>"
1201 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1202 (minus:VDQHS
1203 (match_operand:VDQHS 4 "register_operand" "0")
1204 (mult:VDQHS
1205 (vec_duplicate:VDQHS
1206 (vec_select:<VEL>
1207 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1208 (parallel [(match_operand:SI 2 "immediate_operand")])))
1209 (match_operand:VDQHS 3 "register_operand" "w"))))]
1210 "TARGET_SIMD"
1211 {
1212 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1213 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1214 }
1215 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1216 )
1217
1218 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1219 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1220 (minus:VDQHS
1221 (match_operand:VDQHS 4 "register_operand" "0")
1222 (mult:VDQHS
1223 (vec_duplicate:VDQHS
1224 (vec_select:<VEL>
1225 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1226 (parallel [(match_operand:SI 2 "immediate_operand")])))
1227 (match_operand:VDQHS 3 "register_operand" "w"))))]
1228 "TARGET_SIMD"
1229 {
1230 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1231 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1232 }
1233 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1234 )
1235
1236 (define_insn "*aarch64_mls_elt_merge<mode>"
1237 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1238 (minus:VDQHS
1239 (match_operand:VDQHS 1 "register_operand" "0")
1240 (mult:VDQHS (vec_duplicate:VDQHS
1241 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1242 (match_operand:VDQHS 3 "register_operand" "w"))))]
1243 "TARGET_SIMD"
1244 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1245 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1246 )
1247
1248 ;; Max/Min operations.
1249 (define_insn "<su><maxmin><mode>3"
1250 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1251 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1252 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1253 "TARGET_SIMD"
1254 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1255 [(set_attr "type" "neon_minmax<q>")]
1256 )
1257
1258 (define_expand "<su><maxmin>v2di3"
1259 [(set (match_operand:V2DI 0 "register_operand" "")
1260 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1261 (match_operand:V2DI 2 "register_operand" "")))]
1262 "TARGET_SIMD"
1263 {
1264 enum rtx_code cmp_operator;
1265 rtx cmp_fmt;
1266
1267 switch (<CODE>)
1268 {
1269 case UMIN:
1270 cmp_operator = LTU;
1271 break;
1272 case SMIN:
1273 cmp_operator = LT;
1274 break;
1275 case UMAX:
1276 cmp_operator = GTU;
1277 break;
1278 case SMAX:
1279 cmp_operator = GT;
1280 break;
1281 default:
1282 gcc_unreachable ();
1283 }
1284
1285 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1286 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1287 operands[2], cmp_fmt, operands[1], operands[2]));
1288 DONE;
1289 })
1290
1291 ;; Pairwise Integer Max/Min operations.
1292 (define_insn "aarch64_<maxmin_uns>p<mode>"
1293 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1294 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1295 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1296 MAXMINV))]
1297 "TARGET_SIMD"
1298 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1299 [(set_attr "type" "neon_minmax<q>")]
1300 )
1301
1302 ;; Pairwise FP Max/Min operations.
1303 (define_insn "aarch64_<maxmin_uns>p<mode>"
1304 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1305 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1306 (match_operand:VHSDF 2 "register_operand" "w")]
1307 FMAXMINV))]
1308 "TARGET_SIMD"
1309 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1310 [(set_attr "type" "neon_minmax<q>")]
1311 )
1312
1313 ;; vec_concat gives a new vector with the low elements from operand 1, and
1314 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1315 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1316 ;; What that means, is that the RTL descriptions of the below patterns
1317 ;; need to change depending on endianness.
1318
1319 ;; Move to the low architectural bits of the register.
1320 ;; On little-endian this is { operand, zeroes }
1321 ;; On big-endian this is { zeroes, operand }
1322
1323 (define_insn "move_lo_quad_internal_<mode>"
1324 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1325 (vec_concat:VQ_NO2E
1326 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1327 (vec_duplicate:<VHALF> (const_int 0))))]
1328 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1329 "@
1330 dup\\t%d0, %1.d[0]
1331 fmov\\t%d0, %1
1332 dup\\t%d0, %1"
1333 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1334 (set_attr "simd" "yes,*,yes")
1335 (set_attr "fp" "*,yes,*")
1336 (set_attr "length" "4")]
1337 )
1338
1339 (define_insn "move_lo_quad_internal_<mode>"
1340 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1341 (vec_concat:VQ_2E
1342 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1343 (const_int 0)))]
1344 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1345 "@
1346 dup\\t%d0, %1.d[0]
1347 fmov\\t%d0, %1
1348 dup\\t%d0, %1"
1349 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1350 (set_attr "simd" "yes,*,yes")
1351 (set_attr "fp" "*,yes,*")
1352 (set_attr "length" "4")]
1353 )
1354
1355 (define_insn "move_lo_quad_internal_be_<mode>"
1356 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1357 (vec_concat:VQ_NO2E
1358 (vec_duplicate:<VHALF> (const_int 0))
1359 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1360 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1361 "@
1362 dup\\t%d0, %1.d[0]
1363 fmov\\t%d0, %1
1364 dup\\t%d0, %1"
1365 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1366 (set_attr "simd" "yes,*,yes")
1367 (set_attr "fp" "*,yes,*")
1368 (set_attr "length" "4")]
1369 )
1370
1371 (define_insn "move_lo_quad_internal_be_<mode>"
1372 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1373 (vec_concat:VQ_2E
1374 (const_int 0)
1375 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1376 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1377 "@
1378 dup\\t%d0, %1.d[0]
1379 fmov\\t%d0, %1
1380 dup\\t%d0, %1"
1381 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1382 (set_attr "simd" "yes,*,yes")
1383 (set_attr "fp" "*,yes,*")
1384 (set_attr "length" "4")]
1385 )
1386
1387 (define_expand "move_lo_quad_<mode>"
1388 [(match_operand:VQ 0 "register_operand")
1389 (match_operand:VQ 1 "register_operand")]
1390 "TARGET_SIMD"
1391 {
1392 if (BYTES_BIG_ENDIAN)
1393 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1394 else
1395 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1396 DONE;
1397 }
1398 )
1399
1400 ;; Move operand1 to the high architectural bits of the register, keeping
1401 ;; the low architectural bits of operand2.
1402 ;; For little-endian this is { operand2, operand1 }
1403 ;; For big-endian this is { operand1, operand2 }
1404
1405 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1406 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1407 (vec_concat:VQ
1408 (vec_select:<VHALF>
1409 (match_dup 0)
1410 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1411 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1412 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1413 "@
1414 ins\\t%0.d[1], %1.d[0]
1415 ins\\t%0.d[1], %1"
1416 [(set_attr "type" "neon_ins")]
1417 )
1418
1419 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1420 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1421 (vec_concat:VQ
1422 (match_operand:<VHALF> 1 "register_operand" "w,r")
1423 (vec_select:<VHALF>
1424 (match_dup 0)
1425 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1426 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1427 "@
1428 ins\\t%0.d[1], %1.d[0]
1429 ins\\t%0.d[1], %1"
1430 [(set_attr "type" "neon_ins")]
1431 )
1432
1433 (define_expand "move_hi_quad_<mode>"
1434 [(match_operand:VQ 0 "register_operand" "")
1435 (match_operand:<VHALF> 1 "register_operand" "")]
1436 "TARGET_SIMD"
1437 {
1438 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1439 if (BYTES_BIG_ENDIAN)
1440 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1441 operands[1], p));
1442 else
1443 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1444 operands[1], p));
1445 DONE;
1446 })
1447
1448 ;; Narrowing operations.
1449
1450 ;; For doubles.
1451 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1452 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1453 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1454 "TARGET_SIMD"
1455 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1456 [(set_attr "type" "neon_shift_imm_narrow_q")]
1457 )
1458
1459 (define_expand "vec_pack_trunc_<mode>"
1460 [(match_operand:<VNARROWD> 0 "register_operand" "")
1461 (match_operand:VDN 1 "register_operand" "")
1462 (match_operand:VDN 2 "register_operand" "")]
1463 "TARGET_SIMD"
1464 {
1465 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1466 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1467 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1468
1469 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1470 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1471 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1472 DONE;
1473 })
1474
1475 ;; For quads.
1476
1477 (define_insn "vec_pack_trunc_<mode>"
1478 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1479 (vec_concat:<VNARROWQ2>
1480 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1481 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1482 "TARGET_SIMD"
1483 {
1484 if (BYTES_BIG_ENDIAN)
1485 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1486 else
1487 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1488 }
1489 [(set_attr "type" "multiple")
1490 (set_attr "length" "8")]
1491 )
1492
1493 ;; Widening operations.
1494
1495 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1496 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1497 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1498 (match_operand:VQW 1 "register_operand" "w")
1499 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1500 )))]
1501 "TARGET_SIMD"
1502 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1503 [(set_attr "type" "neon_shift_imm_long")]
1504 )
1505
1506 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1507 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1508 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1509 (match_operand:VQW 1 "register_operand" "w")
1510 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1511 )))]
1512 "TARGET_SIMD"
1513 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1514 [(set_attr "type" "neon_shift_imm_long")]
1515 )
1516
1517 (define_expand "vec_unpack<su>_hi_<mode>"
1518 [(match_operand:<VWIDE> 0 "register_operand" "")
1519 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1520 "TARGET_SIMD"
1521 {
1522 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1523 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1524 operands[1], p));
1525 DONE;
1526 }
1527 )
1528
1529 (define_expand "vec_unpack<su>_lo_<mode>"
1530 [(match_operand:<VWIDE> 0 "register_operand" "")
1531 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1532 "TARGET_SIMD"
1533 {
1534 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1535 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1536 operands[1], p));
1537 DONE;
1538 }
1539 )
1540
1541 ;; Widening arithmetic.
1542
1543 (define_insn "*aarch64_<su>mlal_lo<mode>"
1544 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1545 (plus:<VWIDE>
1546 (mult:<VWIDE>
1547 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1548 (match_operand:VQW 2 "register_operand" "w")
1549 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1550 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1551 (match_operand:VQW 4 "register_operand" "w")
1552 (match_dup 3))))
1553 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1554 "TARGET_SIMD"
1555 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1556 [(set_attr "type" "neon_mla_<Vetype>_long")]
1557 )
1558
1559 (define_insn "*aarch64_<su>mlal_hi<mode>"
1560 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1561 (plus:<VWIDE>
1562 (mult:<VWIDE>
1563 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1564 (match_operand:VQW 2 "register_operand" "w")
1565 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1566 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1567 (match_operand:VQW 4 "register_operand" "w")
1568 (match_dup 3))))
1569 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1570 "TARGET_SIMD"
1571 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1572 [(set_attr "type" "neon_mla_<Vetype>_long")]
1573 )
1574
1575 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1576 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1577 (minus:<VWIDE>
1578 (match_operand:<VWIDE> 1 "register_operand" "0")
1579 (mult:<VWIDE>
1580 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581 (match_operand:VQW 2 "register_operand" "w")
1582 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1583 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584 (match_operand:VQW 4 "register_operand" "w")
1585 (match_dup 3))))))]
1586 "TARGET_SIMD"
1587 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1588 [(set_attr "type" "neon_mla_<Vetype>_long")]
1589 )
1590
1591 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1592 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1593 (minus:<VWIDE>
1594 (match_operand:<VWIDE> 1 "register_operand" "0")
1595 (mult:<VWIDE>
1596 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1597 (match_operand:VQW 2 "register_operand" "w")
1598 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1599 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1600 (match_operand:VQW 4 "register_operand" "w")
1601 (match_dup 3))))))]
1602 "TARGET_SIMD"
1603 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1604 [(set_attr "type" "neon_mla_<Vetype>_long")]
1605 )
1606
1607 (define_insn "*aarch64_<su>mlal<mode>"
1608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1609 (plus:<VWIDE>
1610 (mult:<VWIDE>
1611 (ANY_EXTEND:<VWIDE>
1612 (match_operand:VD_BHSI 1 "register_operand" "w"))
1613 (ANY_EXTEND:<VWIDE>
1614 (match_operand:VD_BHSI 2 "register_operand" "w")))
1615 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1616 "TARGET_SIMD"
1617 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1618 [(set_attr "type" "neon_mla_<Vetype>_long")]
1619 )
1620
1621 (define_insn "*aarch64_<su>mlsl<mode>"
1622 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1623 (minus:<VWIDE>
1624 (match_operand:<VWIDE> 1 "register_operand" "0")
1625 (mult:<VWIDE>
1626 (ANY_EXTEND:<VWIDE>
1627 (match_operand:VD_BHSI 2 "register_operand" "w"))
1628 (ANY_EXTEND:<VWIDE>
1629 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1630 "TARGET_SIMD"
1631 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1632 [(set_attr "type" "neon_mla_<Vetype>_long")]
1633 )
1634
1635 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1636 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1637 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1638 (match_operand:VQW 1 "register_operand" "w")
1639 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1640 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1641 (match_operand:VQW 2 "register_operand" "w")
1642 (match_dup 3)))))]
1643 "TARGET_SIMD"
1644 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1645 [(set_attr "type" "neon_mul_<Vetype>_long")]
1646 )
1647
1648 (define_expand "vec_widen_<su>mult_lo_<mode>"
1649 [(match_operand:<VWIDE> 0 "register_operand" "")
1650 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1651 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1652 "TARGET_SIMD"
1653 {
1654 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1655 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1656 operands[1],
1657 operands[2], p));
1658 DONE;
1659 }
1660 )
1661
1662 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1663 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1664 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665 (match_operand:VQW 1 "register_operand" "w")
1666 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668 (match_operand:VQW 2 "register_operand" "w")
1669 (match_dup 3)))))]
1670 "TARGET_SIMD"
1671 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1672 [(set_attr "type" "neon_mul_<Vetype>_long")]
1673 )
1674
1675 (define_expand "vec_widen_<su>mult_hi_<mode>"
1676 [(match_operand:<VWIDE> 0 "register_operand" "")
1677 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1678 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1679 "TARGET_SIMD"
1680 {
1681 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1682 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1683 operands[1],
1684 operands[2], p));
1685 DONE;
1686
1687 }
1688 )
1689
1690 ;; FP vector operations.
1691 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1692 ;; double-precision (64-bit) floating-point data types and arithmetic as
1693 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1694 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1695 ;;
1696 ;; Floating-point operations can raise an exception. Vectorizing such
1697 ;; operations are safe because of reasons explained below.
1698 ;;
1699 ;; ARMv8 permits an extension to enable trapped floating-point
1700 ;; exception handling, however this is an optional feature. In the
1701 ;; event of a floating-point exception being raised by vectorised
1702 ;; code then:
1703 ;; 1. If trapped floating-point exceptions are available, then a trap
1704 ;; will be taken when any lane raises an enabled exception. A trap
1705 ;; handler may determine which lane raised the exception.
1706 ;; 2. Alternatively a sticky exception flag is set in the
1707 ;; floating-point status register (FPSR). Software may explicitly
1708 ;; test the exception flags, in which case the tests will either
1709 ;; prevent vectorisation, allowing precise identification of the
1710 ;; failing operation, or if tested outside of vectorisable regions
1711 ;; then the specific operation and lane are not of interest.
1712
1713 ;; FP arithmetic operations.
1714
1715 (define_insn "add<mode>3"
1716 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1717 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1718 (match_operand:VHSDF 2 "register_operand" "w")))]
1719 "TARGET_SIMD"
1720 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1721 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1722 )
1723
1724 (define_insn "sub<mode>3"
1725 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1726 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1727 (match_operand:VHSDF 2 "register_operand" "w")))]
1728 "TARGET_SIMD"
1729 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1730 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1731 )
1732
1733 (define_insn "mul<mode>3"
1734 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1735 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1736 (match_operand:VHSDF 2 "register_operand" "w")))]
1737 "TARGET_SIMD"
1738 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1739 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1740 )
1741
1742 (define_expand "div<mode>3"
1743 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1744 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1745 (match_operand:VHSDF 2 "register_operand" "w")))]
1746 "TARGET_SIMD"
1747 {
1748 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1749 DONE;
1750
1751 operands[1] = force_reg (<MODE>mode, operands[1]);
1752 })
1753
1754 (define_insn "*div<mode>3"
1755 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757 (match_operand:VHSDF 2 "register_operand" "w")))]
1758 "TARGET_SIMD"
1759 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760 [(set_attr "type" "neon_fp_div_<stype><q>")]
1761 )
1762
1763 (define_insn "neg<mode>2"
1764 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1766 "TARGET_SIMD"
1767 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1768 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1769 )
1770
1771 (define_insn "abs<mode>2"
1772 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1774 "TARGET_SIMD"
1775 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1776 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1777 )
1778
1779 (define_insn "fma<mode>4"
1780 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1781 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1782 (match_operand:VHSDF 2 "register_operand" "w")
1783 (match_operand:VHSDF 3 "register_operand" "0")))]
1784 "TARGET_SIMD"
1785 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1787 )
1788
1789 (define_insn "*aarch64_fma4_elt<mode>"
1790 [(set (match_operand:VDQF 0 "register_operand" "=w")
1791 (fma:VDQF
1792 (vec_duplicate:VDQF
1793 (vec_select:<VEL>
1794 (match_operand:VDQF 1 "register_operand" "<h_con>")
1795 (parallel [(match_operand:SI 2 "immediate_operand")])))
1796 (match_operand:VDQF 3 "register_operand" "w")
1797 (match_operand:VDQF 4 "register_operand" "0")))]
1798 "TARGET_SIMD"
1799 {
1800 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1801 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1802 }
1803 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1804 )
1805
1806 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1807 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1808 (fma:VDQSF
1809 (vec_duplicate:VDQSF
1810 (vec_select:<VEL>
1811 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1812 (parallel [(match_operand:SI 2 "immediate_operand")])))
1813 (match_operand:VDQSF 3 "register_operand" "w")
1814 (match_operand:VDQSF 4 "register_operand" "0")))]
1815 "TARGET_SIMD"
1816 {
1817 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1818 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1819 }
1820 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1821 )
1822
1823 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1824 [(set (match_operand:VMUL 0 "register_operand" "=w")
1825 (fma:VMUL
1826 (vec_duplicate:VMUL
1827 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1828 (match_operand:VMUL 2 "register_operand" "w")
1829 (match_operand:VMUL 3 "register_operand" "0")))]
1830 "TARGET_SIMD"
1831 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1832 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1833 )
1834
1835 (define_insn "*aarch64_fma4_elt_to_64v2df"
1836 [(set (match_operand:DF 0 "register_operand" "=w")
1837 (fma:DF
1838 (vec_select:DF
1839 (match_operand:V2DF 1 "register_operand" "w")
1840 (parallel [(match_operand:SI 2 "immediate_operand")]))
1841 (match_operand:DF 3 "register_operand" "w")
1842 (match_operand:DF 4 "register_operand" "0")))]
1843 "TARGET_SIMD"
1844 {
1845 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1846 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1847 }
1848 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1849 )
1850
1851 (define_insn "fnma<mode>4"
1852 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1853 (fma:VHSDF
1854 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1855 (match_operand:VHSDF 2 "register_operand" "w")
1856 (match_operand:VHSDF 3 "register_operand" "0")))]
1857 "TARGET_SIMD"
1858 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1859 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1860 )
1861
1862 (define_insn "*aarch64_fnma4_elt<mode>"
1863 [(set (match_operand:VDQF 0 "register_operand" "=w")
1864 (fma:VDQF
1865 (neg:VDQF
1866 (match_operand:VDQF 3 "register_operand" "w"))
1867 (vec_duplicate:VDQF
1868 (vec_select:<VEL>
1869 (match_operand:VDQF 1 "register_operand" "<h_con>")
1870 (parallel [(match_operand:SI 2 "immediate_operand")])))
1871 (match_operand:VDQF 4 "register_operand" "0")))]
1872 "TARGET_SIMD"
1873 {
1874 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1875 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1876 }
1877 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1878 )
1879
1880 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1881 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1882 (fma:VDQSF
1883 (neg:VDQSF
1884 (match_operand:VDQSF 3 "register_operand" "w"))
1885 (vec_duplicate:VDQSF
1886 (vec_select:<VEL>
1887 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1888 (parallel [(match_operand:SI 2 "immediate_operand")])))
1889 (match_operand:VDQSF 4 "register_operand" "0")))]
1890 "TARGET_SIMD"
1891 {
1892 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1893 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1894 }
1895 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1896 )
1897
1898 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1899 [(set (match_operand:VMUL 0 "register_operand" "=w")
1900 (fma:VMUL
1901 (neg:VMUL
1902 (match_operand:VMUL 2 "register_operand" "w"))
1903 (vec_duplicate:VMUL
1904 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1905 (match_operand:VMUL 3 "register_operand" "0")))]
1906 "TARGET_SIMD"
1907 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1908 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1909 )
1910
1911 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1912 [(set (match_operand:DF 0 "register_operand" "=w")
1913 (fma:DF
1914 (vec_select:DF
1915 (match_operand:V2DF 1 "register_operand" "w")
1916 (parallel [(match_operand:SI 2 "immediate_operand")]))
1917 (neg:DF
1918 (match_operand:DF 3 "register_operand" "w"))
1919 (match_operand:DF 4 "register_operand" "0")))]
1920 "TARGET_SIMD"
1921 {
1922 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1923 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1924 }
1925 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1926 )
1927
1928 ;; Vector versions of the floating-point frint patterns.
1929 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1930 (define_insn "<frint_pattern><mode>2"
1931 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1932 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1933 FRINT))]
1934 "TARGET_SIMD"
1935 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1936 [(set_attr "type" "neon_fp_round_<stype><q>")]
1937 )
1938
1939 ;; Vector versions of the fcvt standard patterns.
1940 ;; Expands to lbtrunc, lround, lceil, lfloor
1941 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1942 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1943 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1944 [(match_operand:VHSDF 1 "register_operand" "w")]
1945 FCVT)))]
1946 "TARGET_SIMD"
1947 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1948 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1949 )
1950
1951 ;; HF Scalar variants of related SIMD instructions.
1952 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1953 [(set (match_operand:HI 0 "register_operand" "=w")
1954 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1955 FCVT)))]
1956 "TARGET_SIMD_F16INST"
1957 "fcvt<frint_suffix><su>\t%h0, %h1"
1958 [(set_attr "type" "neon_fp_to_int_s")]
1959 )
1960
1961 (define_insn "<optab>_trunchfhi2"
1962 [(set (match_operand:HI 0 "register_operand" "=w")
1963 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1964 "TARGET_SIMD_F16INST"
1965 "fcvtz<su>\t%h0, %h1"
1966 [(set_attr "type" "neon_fp_to_int_s")]
1967 )
1968
1969 (define_insn "<optab>hihf2"
1970 [(set (match_operand:HF 0 "register_operand" "=w")
1971 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1972 "TARGET_SIMD_F16INST"
1973 "<su_optab>cvtf\t%h0, %h1"
1974 [(set_attr "type" "neon_int_to_fp_s")]
1975 )
1976
1977 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1978 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1979 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1980 [(mult:VDQF
1981 (match_operand:VDQF 1 "register_operand" "w")
1982 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1983 UNSPEC_FRINTZ)))]
1984 "TARGET_SIMD
1985 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1986 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1987 {
1988 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1989 char buf[64];
1990 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1991 output_asm_insn (buf, operands);
1992 return "";
1993 }
1994 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1995 )
1996
1997 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1998 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1999 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2000 [(match_operand:VHSDF 1 "register_operand")]
2001 UNSPEC_FRINTZ)))]
2002 "TARGET_SIMD"
2003 {})
2004
2005 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2006 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2007 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2008 [(match_operand:VHSDF 1 "register_operand")]
2009 UNSPEC_FRINTZ)))]
2010 "TARGET_SIMD"
2011 {})
2012
2013 (define_expand "ftrunc<VHSDF:mode>2"
2014 [(set (match_operand:VHSDF 0 "register_operand")
2015 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2016 UNSPEC_FRINTZ))]
2017 "TARGET_SIMD"
2018 {})
2019
2020 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2021 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2022 (FLOATUORS:VHSDF
2023 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2024 "TARGET_SIMD"
2025 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2026 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2027 )
2028
2029 ;; Conversions between vectors of floats and doubles.
2030 ;; Contains a mix of patterns to match standard pattern names
2031 ;; and those for intrinsics.
2032
2033 ;; Float widening operations.
2034
2035 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2036 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2037 (float_extend:<VWIDE> (vec_select:<VHALF>
2038 (match_operand:VQ_HSF 1 "register_operand" "w")
2039 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2040 )))]
2041 "TARGET_SIMD"
2042 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2043 [(set_attr "type" "neon_fp_cvt_widen_s")]
2044 )
2045
2046 ;; Convert between fixed-point and floating-point (vector modes)
2047
2048 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2049 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2050 (unspec:<VHSDF:FCVT_TARGET>
2051 [(match_operand:VHSDF 1 "register_operand" "w")
2052 (match_operand:SI 2 "immediate_operand" "i")]
2053 FCVT_F2FIXED))]
2054 "TARGET_SIMD"
2055 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2056 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2057 )
2058
2059 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2060 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2061 (unspec:<VDQ_HSDI:FCVT_TARGET>
2062 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2063 (match_operand:SI 2 "immediate_operand" "i")]
2064 FCVT_FIXED2F))]
2065 "TARGET_SIMD"
2066 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2067 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2068 )
2069
2070 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2071 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2072 ;; the meaning of HI and LO changes depending on the target endianness.
2073 ;; While elsewhere we map the higher numbered elements of a vector to
2074 ;; the lower architectural lanes of the vector, for these patterns we want
2075 ;; to always treat "hi" as referring to the higher architectural lanes.
2076 ;; Consequently, while the patterns below look inconsistent with our
2077 ;; other big-endian patterns their behavior is as required.
2078
2079 (define_expand "vec_unpacks_lo_<mode>"
2080 [(match_operand:<VWIDE> 0 "register_operand" "")
2081 (match_operand:VQ_HSF 1 "register_operand" "")]
2082 "TARGET_SIMD"
2083 {
2084 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2085 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2086 operands[1], p));
2087 DONE;
2088 }
2089 )
2090
2091 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093 (float_extend:<VWIDE> (vec_select:<VHALF>
2094 (match_operand:VQ_HSF 1 "register_operand" "w")
2095 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2096 )))]
2097 "TARGET_SIMD"
2098 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2099 [(set_attr "type" "neon_fp_cvt_widen_s")]
2100 )
2101
2102 (define_expand "vec_unpacks_hi_<mode>"
2103 [(match_operand:<VWIDE> 0 "register_operand" "")
2104 (match_operand:VQ_HSF 1 "register_operand" "")]
2105 "TARGET_SIMD"
2106 {
2107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2109 operands[1], p));
2110 DONE;
2111 }
2112 )
2113 (define_insn "aarch64_float_extend_lo_<Vwide>"
2114 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2115 (float_extend:<VWIDE>
2116 (match_operand:VDF 1 "register_operand" "w")))]
2117 "TARGET_SIMD"
2118 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2119 [(set_attr "type" "neon_fp_cvt_widen_s")]
2120 )
2121
2122 ;; Float narrowing operations.
2123
2124 (define_insn "aarch64_float_truncate_lo_<mode>"
2125 [(set (match_operand:VDF 0 "register_operand" "=w")
2126 (float_truncate:VDF
2127 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2128 "TARGET_SIMD"
2129 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2130 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2131 )
2132
2133 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2134 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2135 (vec_concat:<VDBL>
2136 (match_operand:VDF 1 "register_operand" "0")
2137 (float_truncate:VDF
2138 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2139 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2140 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2141 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2142 )
2143
2144 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2145 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2146 (vec_concat:<VDBL>
2147 (float_truncate:VDF
2148 (match_operand:<VWIDE> 2 "register_operand" "w"))
2149 (match_operand:VDF 1 "register_operand" "0")))]
2150 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2151 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2152 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2153 )
2154
2155 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2156 [(match_operand:<VDBL> 0 "register_operand" "=w")
2157 (match_operand:VDF 1 "register_operand" "0")
2158 (match_operand:<VWIDE> 2 "register_operand" "w")]
2159 "TARGET_SIMD"
2160 {
2161 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2162 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2163 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2164 emit_insn (gen (operands[0], operands[1], operands[2]));
2165 DONE;
2166 }
2167 )
2168
2169 (define_expand "vec_pack_trunc_v2df"
2170 [(set (match_operand:V4SF 0 "register_operand")
2171 (vec_concat:V4SF
2172 (float_truncate:V2SF
2173 (match_operand:V2DF 1 "register_operand"))
2174 (float_truncate:V2SF
2175 (match_operand:V2DF 2 "register_operand"))
2176 ))]
2177 "TARGET_SIMD"
2178 {
2179 rtx tmp = gen_reg_rtx (V2SFmode);
2180 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2181 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2182
2183 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2184 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2185 tmp, operands[hi]));
2186 DONE;
2187 }
2188 )
2189
2190 (define_expand "vec_pack_trunc_df"
2191 [(set (match_operand:V2SF 0 "register_operand")
2192 (vec_concat:V2SF
2193 (float_truncate:SF
2194 (match_operand:DF 1 "register_operand"))
2195 (float_truncate:SF
2196 (match_operand:DF 2 "register_operand"))
2197 ))]
2198 "TARGET_SIMD"
2199 {
2200 rtx tmp = gen_reg_rtx (V2SFmode);
2201 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2203
2204 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2205 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2206 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2207 DONE;
2208 }
2209 )
2210
2211 ;; FP Max/Min
2212 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2213 ;; expression like:
2214 ;; a = (b < c) ? b : c;
2215 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2216 ;; either explicitly or indirectly via -ffast-math.
2217 ;;
2218 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2219 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2220 ;; operand will be returned when both operands are zero (i.e. they may not
2221 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2222 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2223 ;; NaNs.
2224
2225 (define_insn "<su><maxmin><mode>3"
2226 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2227 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2228 (match_operand:VHSDF 2 "register_operand" "w")))]
2229 "TARGET_SIMD"
2230 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2231 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2232 )
2233
2234 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2235 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2236 ;; which implement the IEEE fmax ()/fmin () functions.
2237 (define_insn "<maxmin_uns><mode>3"
2238 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2239 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2240 (match_operand:VHSDF 2 "register_operand" "w")]
2241 FMAXMIN_UNS))]
2242 "TARGET_SIMD"
2243 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2244 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2245 )
2246
2247 ;; 'across lanes' add.
2248
2249 (define_expand "reduc_plus_scal_<mode>"
2250 [(match_operand:<VEL> 0 "register_operand" "=w")
2251 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2252 UNSPEC_ADDV)]
2253 "TARGET_SIMD"
2254 {
2255 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2256 rtx scratch = gen_reg_rtx (<MODE>mode);
2257 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2258 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2259 DONE;
2260 }
2261 )
2262
2263 (define_insn "aarch64_faddp<mode>"
2264 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2265 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2266 (match_operand:VHSDF 2 "register_operand" "w")]
2267 UNSPEC_FADDV))]
2268 "TARGET_SIMD"
2269 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2270 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2271 )
2272
2273 (define_insn "aarch64_reduc_plus_internal<mode>"
2274 [(set (match_operand:VDQV 0 "register_operand" "=w")
2275 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2276 UNSPEC_ADDV))]
2277 "TARGET_SIMD"
2278 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2279 [(set_attr "type" "neon_reduc_add<q>")]
2280 )
2281
2282 (define_insn "aarch64_reduc_plus_internalv2si"
2283 [(set (match_operand:V2SI 0 "register_operand" "=w")
2284 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2285 UNSPEC_ADDV))]
2286 "TARGET_SIMD"
2287 "addp\\t%0.2s, %1.2s, %1.2s"
2288 [(set_attr "type" "neon_reduc_add")]
2289 )
2290
2291 (define_insn "reduc_plus_scal_<mode>"
2292 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2293 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2294 UNSPEC_FADDV))]
2295 "TARGET_SIMD"
2296 "faddp\\t%<Vetype>0, %1.<Vtype>"
2297 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2298 )
2299
2300 (define_expand "reduc_plus_scal_v4sf"
2301 [(set (match_operand:SF 0 "register_operand")
2302 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2303 UNSPEC_FADDV))]
2304 "TARGET_SIMD"
2305 {
2306 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2307 rtx scratch = gen_reg_rtx (V4SFmode);
2308 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2309 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2310 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2311 DONE;
2312 })
2313
2314 (define_insn "clrsb<mode>2"
2315 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2316 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2317 "TARGET_SIMD"
2318 "cls\\t%0.<Vtype>, %1.<Vtype>"
2319 [(set_attr "type" "neon_cls<q>")]
2320 )
2321
2322 (define_insn "clz<mode>2"
2323 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2324 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2325 "TARGET_SIMD"
2326 "clz\\t%0.<Vtype>, %1.<Vtype>"
2327 [(set_attr "type" "neon_cls<q>")]
2328 )
2329
2330 (define_insn "popcount<mode>2"
2331 [(set (match_operand:VB 0 "register_operand" "=w")
2332 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2333 "TARGET_SIMD"
2334 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2335 [(set_attr "type" "neon_cnt<q>")]
2336 )
2337
2338 ;; 'across lanes' max and min ops.
2339
2340 ;; Template for outputting a scalar, so we can create __builtins which can be
2341 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2342 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2343 [(match_operand:<VEL> 0 "register_operand")
2344 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2345 FMAXMINV)]
2346 "TARGET_SIMD"
2347 {
2348 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2349 rtx scratch = gen_reg_rtx (<MODE>mode);
2350 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2351 operands[1]));
2352 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2353 DONE;
2354 }
2355 )
2356
2357 ;; Likewise for integer cases, signed and unsigned.
2358 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2359 [(match_operand:<VEL> 0 "register_operand")
2360 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2361 MAXMINV)]
2362 "TARGET_SIMD"
2363 {
2364 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2365 rtx scratch = gen_reg_rtx (<MODE>mode);
2366 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2367 operands[1]));
2368 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2369 DONE;
2370 }
2371 )
2372
2373 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2374 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2375 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2376 MAXMINV))]
2377 "TARGET_SIMD"
2378 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2379 [(set_attr "type" "neon_reduc_minmax<q>")]
2380 )
2381
2382 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2383 [(set (match_operand:V2SI 0 "register_operand" "=w")
2384 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2385 MAXMINV))]
2386 "TARGET_SIMD"
2387 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2388 [(set_attr "type" "neon_reduc_minmax")]
2389 )
2390
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2393 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2394 FMAXMINV))]
2395 "TARGET_SIMD"
2396 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2397 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2398 )
2399
2400 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2401 ;; allocation.
2402 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2403 ;; to select.
2404 ;;
2405 ;; Thus our BSL is of the form:
2406 ;; op0 = bsl (mask, op2, op3)
2407 ;; We can use any of:
2408 ;;
2409 ;; if (op0 = mask)
2410 ;; bsl mask, op1, op2
2411 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2412 ;; bit op0, op2, mask
2413 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2414 ;; bif op0, op1, mask
2415 ;;
2416 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2417 ;; Some forms of straight-line code may generate the equivalent form
2418 ;; in *aarch64_simd_bsl<mode>_alt.
2419
2420 (define_insn "aarch64_simd_bsl<mode>_internal"
2421 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2422 (xor:VDQ_I
2423 (and:VDQ_I
2424 (xor:VDQ_I
2425 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2426 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2427 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2428 (match_dup:<V_INT_EQUIV> 3)
2429 ))]
2430 "TARGET_SIMD"
2431 "@
2432 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2433 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2434 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2435 [(set_attr "type" "neon_bsl<q>")]
2436 )
2437
2438 ;; We need this form in addition to the above pattern to match the case
2439 ;; when combine tries merging three insns such that the second operand of
2440 ;; the outer XOR matches the second operand of the inner XOR rather than
2441 ;; the first. The two are equivalent but since recog doesn't try all
2442 ;; permutations of commutative operations, we have to have a separate pattern.
2443
2444 (define_insn "*aarch64_simd_bsl<mode>_alt"
2445 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2446 (xor:VDQ_I
2447 (and:VDQ_I
2448 (xor:VDQ_I
2449 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2450 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2451 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2452 (match_dup:<V_INT_EQUIV> 2)))]
2453 "TARGET_SIMD"
2454 "@
2455 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2456 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2457 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2458 [(set_attr "type" "neon_bsl<q>")]
2459 )
2460
2461 ;; DImode is special, we want to avoid computing operations which are
2462 ;; more naturally computed in general purpose registers in the vector
2463 ;; registers. If we do that, we need to move all three operands from general
2464 ;; purpose registers to vector registers, then back again. However, we
2465 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2466 ;; optimizations based on the component operations of a BSL.
2467 ;;
2468 ;; That means we need a splitter back to the individual operations, if they
2469 ;; would be better calculated on the integer side.
2470
2471 (define_insn_and_split "aarch64_simd_bsldi_internal"
2472 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2473 (xor:DI
2474 (and:DI
2475 (xor:DI
2476 (match_operand:DI 3 "register_operand" "w,0,w,r")
2477 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2478 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2479 (match_dup:DI 3)
2480 ))]
2481 "TARGET_SIMD"
2482 "@
2483 bsl\\t%0.8b, %2.8b, %3.8b
2484 bit\\t%0.8b, %2.8b, %1.8b
2485 bif\\t%0.8b, %3.8b, %1.8b
2486 #"
2487 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2488 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2489 {
2490 /* Split back to individual operations. If we're before reload, and
2491 able to create a temporary register, do so. If we're after reload,
2492 we've got an early-clobber destination register, so use that.
2493 Otherwise, we can't create pseudos and we can't yet guarantee that
2494 operands[0] is safe to write, so FAIL to split. */
2495
2496 rtx scratch;
2497 if (reload_completed)
2498 scratch = operands[0];
2499 else if (can_create_pseudo_p ())
2500 scratch = gen_reg_rtx (DImode);
2501 else
2502 FAIL;
2503
2504 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2505 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2506 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2507 DONE;
2508 }
2509 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2510 (set_attr "length" "4,4,4,12")]
2511 )
2512
2513 (define_insn_and_split "aarch64_simd_bsldi_alt"
2514 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2515 (xor:DI
2516 (and:DI
2517 (xor:DI
2518 (match_operand:DI 3 "register_operand" "w,w,0,r")
2519 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2520 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2521 (match_dup:DI 2)
2522 ))]
2523 "TARGET_SIMD"
2524 "@
2525 bsl\\t%0.8b, %3.8b, %2.8b
2526 bit\\t%0.8b, %3.8b, %1.8b
2527 bif\\t%0.8b, %2.8b, %1.8b
2528 #"
2529 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2530 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2531 {
2532 /* Split back to individual operations. If we're before reload, and
2533 able to create a temporary register, do so. If we're after reload,
2534 we've got an early-clobber destination register, so use that.
2535 Otherwise, we can't create pseudos and we can't yet guarantee that
2536 operands[0] is safe to write, so FAIL to split. */
2537
2538 rtx scratch;
2539 if (reload_completed)
2540 scratch = operands[0];
2541 else if (can_create_pseudo_p ())
2542 scratch = gen_reg_rtx (DImode);
2543 else
2544 FAIL;
2545
2546 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2547 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2548 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2549 DONE;
2550 }
2551 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2552 (set_attr "length" "4,4,4,12")]
2553 )
2554
2555 (define_expand "aarch64_simd_bsl<mode>"
2556 [(match_operand:VALLDIF 0 "register_operand")
2557 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2558 (match_operand:VALLDIF 2 "register_operand")
2559 (match_operand:VALLDIF 3 "register_operand")]
2560 "TARGET_SIMD"
2561 {
2562 /* We can't alias operands together if they have different modes. */
2563 rtx tmp = operands[0];
2564 if (FLOAT_MODE_P (<MODE>mode))
2565 {
2566 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2567 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2568 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2569 }
2570 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2571 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2572 operands[1],
2573 operands[2],
2574 operands[3]));
2575 if (tmp != operands[0])
2576 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2577
2578 DONE;
2579 })
2580
2581 (define_expand "vcond_mask_<mode><v_int_equiv>"
2582 [(match_operand:VALLDI 0 "register_operand")
2583 (match_operand:VALLDI 1 "nonmemory_operand")
2584 (match_operand:VALLDI 2 "nonmemory_operand")
2585 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2586 "TARGET_SIMD"
2587 {
2588 /* If we have (a = (P) ? -1 : 0);
2589 Then we can simply move the generated mask (result must be int). */
2590 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2591 && operands[2] == CONST0_RTX (<MODE>mode))
2592 emit_move_insn (operands[0], operands[3]);
2593 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2594 else if (operands[1] == CONST0_RTX (<MODE>mode)
2595 && operands[2] == CONSTM1_RTX (<MODE>mode))
2596 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2597 else
2598 {
2599 if (!REG_P (operands[1]))
2600 operands[1] = force_reg (<MODE>mode, operands[1]);
2601 if (!REG_P (operands[2]))
2602 operands[2] = force_reg (<MODE>mode, operands[2]);
2603 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2604 operands[1], operands[2]));
2605 }
2606
2607 DONE;
2608 })
2609
2610 ;; Patterns comparing two vectors to produce a mask.
2611
2612 (define_expand "vec_cmp<mode><mode>"
2613 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2614 (match_operator 1 "comparison_operator"
2615 [(match_operand:VSDQ_I_DI 2 "register_operand")
2616 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2617 "TARGET_SIMD"
2618 {
2619 rtx mask = operands[0];
2620 enum rtx_code code = GET_CODE (operands[1]);
2621
2622 switch (code)
2623 {
2624 case NE:
2625 case LE:
2626 case LT:
2627 case GE:
2628 case GT:
2629 case EQ:
2630 if (operands[3] == CONST0_RTX (<MODE>mode))
2631 break;
2632
2633 /* Fall through. */
2634 default:
2635 if (!REG_P (operands[3]))
2636 operands[3] = force_reg (<MODE>mode, operands[3]);
2637
2638 break;
2639 }
2640
2641 switch (code)
2642 {
2643 case LT:
2644 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2645 break;
2646
2647 case GE:
2648 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2649 break;
2650
2651 case LE:
2652 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2653 break;
2654
2655 case GT:
2656 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2657 break;
2658
2659 case LTU:
2660 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2661 break;
2662
2663 case GEU:
2664 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2665 break;
2666
2667 case LEU:
2668 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2669 break;
2670
2671 case GTU:
2672 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2673 break;
2674
2675 case NE:
2676 /* Handle NE as !EQ. */
2677 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2678 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2679 break;
2680
2681 case EQ:
2682 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2683 break;
2684
2685 default:
2686 gcc_unreachable ();
2687 }
2688
2689 DONE;
2690 })
2691
2692 (define_expand "vec_cmp<mode><v_int_equiv>"
2693 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2694 (match_operator 1 "comparison_operator"
2695 [(match_operand:VDQF 2 "register_operand")
2696 (match_operand:VDQF 3 "nonmemory_operand")]))]
2697 "TARGET_SIMD"
2698 {
2699 int use_zero_form = 0;
2700 enum rtx_code code = GET_CODE (operands[1]);
2701 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2702
2703 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2704
2705 switch (code)
2706 {
2707 case LE:
2708 case LT:
2709 case GE:
2710 case GT:
2711 case EQ:
2712 if (operands[3] == CONST0_RTX (<MODE>mode))
2713 {
2714 use_zero_form = 1;
2715 break;
2716 }
2717 /* Fall through. */
2718 default:
2719 if (!REG_P (operands[3]))
2720 operands[3] = force_reg (<MODE>mode, operands[3]);
2721
2722 break;
2723 }
2724
2725 switch (code)
2726 {
2727 case LT:
2728 if (use_zero_form)
2729 {
2730 comparison = gen_aarch64_cmlt<mode>;
2731 break;
2732 }
2733 /* Fall through. */
2734 case UNGE:
2735 std::swap (operands[2], operands[3]);
2736 /* Fall through. */
2737 case UNLE:
2738 case GT:
2739 comparison = gen_aarch64_cmgt<mode>;
2740 break;
2741 case LE:
2742 if (use_zero_form)
2743 {
2744 comparison = gen_aarch64_cmle<mode>;
2745 break;
2746 }
2747 /* Fall through. */
2748 case UNGT:
2749 std::swap (operands[2], operands[3]);
2750 /* Fall through. */
2751 case UNLT:
2752 case GE:
2753 comparison = gen_aarch64_cmge<mode>;
2754 break;
2755 case NE:
2756 case EQ:
2757 comparison = gen_aarch64_cmeq<mode>;
2758 break;
2759 case UNEQ:
2760 case ORDERED:
2761 case UNORDERED:
2762 case LTGT:
2763 break;
2764 default:
2765 gcc_unreachable ();
2766 }
2767
2768 switch (code)
2769 {
2770 case UNGE:
2771 case UNGT:
2772 case UNLE:
2773 case UNLT:
2774 case NE:
2775 /* FCM returns false for lanes which are unordered, so if we use
2776 the inverse of the comparison we actually want to emit, then
2777 invert the result, we will end up with the correct result.
2778 Note that a NE NaN and NaN NE b are true for all a, b.
2779
2780 Our transformations are:
2781 a UNGE b -> !(b GT a)
2782 a UNGT b -> !(b GE a)
2783 a UNLE b -> !(a GT b)
2784 a UNLT b -> !(a GE b)
2785 a NE b -> !(a EQ b) */
2786 gcc_assert (comparison != NULL);
2787 emit_insn (comparison (operands[0], operands[2], operands[3]));
2788 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2789 break;
2790
2791 case LT:
2792 case LE:
2793 case GT:
2794 case GE:
2795 case EQ:
2796 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2797 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2798 a GE b -> a GE b
2799 a GT b -> a GT b
2800 a LE b -> b GE a
2801 a LT b -> b GT a
2802 a EQ b -> a EQ b */
2803 gcc_assert (comparison != NULL);
2804 emit_insn (comparison (operands[0], operands[2], operands[3]));
2805 break;
2806
2807 case UNEQ:
2808 /* We first check (a > b || b > a) which is !UNEQ, inverting
2809 this result will then give us (a == b || a UNORDERED b). */
2810 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2811 operands[2], operands[3]));
2812 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2813 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2814 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2815 break;
2816
2817 case LTGT:
2818 /* LTGT is not guranteed to not generate a FP exception. So let's
2819 go the faster way : ((a > b) || (b > a)). */
2820 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2821 operands[2], operands[3]));
2822 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2823 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2824 break;
2825
2826 case UNORDERED:
2827 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2828 UNORDERED as !ORDERED. */
2829 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2830 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2831 operands[3], operands[2]));
2832 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2833 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2834 break;
2835
2836 case ORDERED:
2837 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2838 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2839 operands[3], operands[2]));
2840 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2841 break;
2842
2843 default:
2844 gcc_unreachable ();
2845 }
2846
2847 DONE;
2848 })
2849
2850 (define_expand "vec_cmpu<mode><mode>"
2851 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2852 (match_operator 1 "comparison_operator"
2853 [(match_operand:VSDQ_I_DI 2 "register_operand")
2854 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2855 "TARGET_SIMD"
2856 {
2857 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2858 operands[2], operands[3]));
2859 DONE;
2860 })
2861
2862 (define_expand "vcond<mode><mode>"
2863 [(set (match_operand:VALLDI 0 "register_operand")
2864 (if_then_else:VALLDI
2865 (match_operator 3 "comparison_operator"
2866 [(match_operand:VALLDI 4 "register_operand")
2867 (match_operand:VALLDI 5 "nonmemory_operand")])
2868 (match_operand:VALLDI 1 "nonmemory_operand")
2869 (match_operand:VALLDI 2 "nonmemory_operand")))]
2870 "TARGET_SIMD"
2871 {
2872 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2873 enum rtx_code code = GET_CODE (operands[3]);
2874
2875 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2876 it as well as switch operands 1/2 in order to avoid the additional
2877 NOT instruction. */
2878 if (code == NE)
2879 {
2880 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2881 operands[4], operands[5]);
2882 std::swap (operands[1], operands[2]);
2883 }
2884 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2885 operands[4], operands[5]));
2886 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2887 operands[2], mask));
2888
2889 DONE;
2890 })
2891
2892 (define_expand "vcond<v_cmp_mixed><mode>"
2893 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2894 (if_then_else:<V_cmp_mixed>
2895 (match_operator 3 "comparison_operator"
2896 [(match_operand:VDQF_COND 4 "register_operand")
2897 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2898 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2899 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2900 "TARGET_SIMD"
2901 {
2902 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2903 enum rtx_code code = GET_CODE (operands[3]);
2904
2905 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2906 it as well as switch operands 1/2 in order to avoid the additional
2907 NOT instruction. */
2908 if (code == NE)
2909 {
2910 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2911 operands[4], operands[5]);
2912 std::swap (operands[1], operands[2]);
2913 }
2914 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2915 operands[4], operands[5]));
2916 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2917 operands[0], operands[1],
2918 operands[2], mask));
2919
2920 DONE;
2921 })
2922
2923 (define_expand "vcondu<mode><mode>"
2924 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2925 (if_then_else:VSDQ_I_DI
2926 (match_operator 3 "comparison_operator"
2927 [(match_operand:VSDQ_I_DI 4 "register_operand")
2928 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2929 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2930 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2931 "TARGET_SIMD"
2932 {
2933 rtx mask = gen_reg_rtx (<MODE>mode);
2934 enum rtx_code code = GET_CODE (operands[3]);
2935
2936 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2937 it as well as switch operands 1/2 in order to avoid the additional
2938 NOT instruction. */
2939 if (code == NE)
2940 {
2941 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2942 operands[4], operands[5]);
2943 std::swap (operands[1], operands[2]);
2944 }
2945 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2946 operands[4], operands[5]));
2947 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2948 operands[2], mask));
2949 DONE;
2950 })
2951
2952 (define_expand "vcondu<mode><v_cmp_mixed>"
2953 [(set (match_operand:VDQF 0 "register_operand")
2954 (if_then_else:VDQF
2955 (match_operator 3 "comparison_operator"
2956 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2957 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2958 (match_operand:VDQF 1 "nonmemory_operand")
2959 (match_operand:VDQF 2 "nonmemory_operand")))]
2960 "TARGET_SIMD"
2961 {
2962 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2963 enum rtx_code code = GET_CODE (operands[3]);
2964
2965 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2966 it as well as switch operands 1/2 in order to avoid the additional
2967 NOT instruction. */
2968 if (code == NE)
2969 {
2970 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2971 operands[4], operands[5]);
2972 std::swap (operands[1], operands[2]);
2973 }
2974 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2975 mask, operands[3],
2976 operands[4], operands[5]));
2977 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2978 operands[2], mask));
2979 DONE;
2980 })
2981
2982 ;; Patterns for AArch64 SIMD Intrinsics.
2983
2984 ;; Lane extraction with sign extension to general purpose register.
2985 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2986 [(set (match_operand:GPI 0 "register_operand" "=r")
2987 (sign_extend:GPI
2988 (vec_select:<VEL>
2989 (match_operand:VDQQH 1 "register_operand" "w")
2990 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2991 "TARGET_SIMD"
2992 {
2993 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2994 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2995 }
2996 [(set_attr "type" "neon_to_gp<q>")]
2997 )
2998
2999 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3000 [(set (match_operand:SI 0 "register_operand" "=r")
3001 (zero_extend:SI
3002 (vec_select:<VEL>
3003 (match_operand:VDQQH 1 "register_operand" "w")
3004 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3005 "TARGET_SIMD"
3006 {
3007 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3008 return "umov\\t%w0, %1.<Vetype>[%2]";
3009 }
3010 [(set_attr "type" "neon_to_gp<q>")]
3011 )
3012
3013 ;; Lane extraction of a value, neither sign nor zero extension
3014 ;; is guaranteed so upper bits should be considered undefined.
3015 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3016 (define_insn "aarch64_get_lane<mode>"
3017 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3018 (vec_select:<VEL>
3019 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3020 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3021 "TARGET_SIMD"
3022 {
3023 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3024 switch (which_alternative)
3025 {
3026 case 0:
3027 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3028 case 1:
3029 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3030 case 2:
3031 return "st1\\t{%1.<Vetype>}[%2], %0";
3032 default:
3033 gcc_unreachable ();
3034 }
3035 }
3036 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3037 )
3038
3039 (define_insn "load_pair_lanes<mode>"
3040 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3041 (vec_concat:<VDBL>
3042 (match_operand:VDC 1 "memory_operand" "Utq")
3043 (match_operand:VDC 2 "memory_operand" "m")))]
3044 "TARGET_SIMD && !STRICT_ALIGNMENT
3045 && rtx_equal_p (XEXP (operands[2], 0),
3046 plus_constant (Pmode,
3047 XEXP (operands[1], 0),
3048 GET_MODE_SIZE (<MODE>mode)))"
3049 "ldr\\t%q0, %1"
3050 [(set_attr "type" "neon_load1_1reg_q")]
3051 )
3052
3053 (define_insn "store_pair_lanes<mode>"
3054 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3055 (vec_concat:<VDBL>
3056 (match_operand:VDC 1 "register_operand" "w, r")
3057 (match_operand:VDC 2 "register_operand" "w, r")))]
3058 "TARGET_SIMD"
3059 "@
3060 stp\\t%d1, %d2, %y0
3061 stp\\t%x1, %x2, %y0"
3062 [(set_attr "type" "neon_stp, store_16")]
3063 )
3064
3065 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3066 ;; dest vector.
3067
3068 (define_insn "*aarch64_combinez<mode>"
3069 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3070 (vec_concat:<VDBL>
3071 (match_operand:VDC 1 "general_operand" "w,?r,m")
3072 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3073 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3074 "@
3075 mov\\t%0.8b, %1.8b
3076 fmov\t%d0, %1
3077 ldr\\t%d0, %1"
3078 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3079 (set_attr "simd" "yes,*,yes")
3080 (set_attr "fp" "*,yes,*")]
3081 )
3082
3083 (define_insn "*aarch64_combinez_be<mode>"
3084 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3085 (vec_concat:<VDBL>
3086 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3087 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3088 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3089 "@
3090 mov\\t%0.8b, %1.8b
3091 fmov\t%d0, %1
3092 ldr\\t%d0, %1"
3093 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3094 (set_attr "simd" "yes,*,yes")
3095 (set_attr "fp" "*,yes,*")]
3096 )
3097
3098 (define_expand "aarch64_combine<mode>"
3099 [(match_operand:<VDBL> 0 "register_operand")
3100 (match_operand:VDC 1 "register_operand")
3101 (match_operand:VDC 2 "register_operand")]
3102 "TARGET_SIMD"
3103 {
3104 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3105
3106 DONE;
3107 }
3108 )
3109
3110 (define_expand "aarch64_simd_combine<mode>"
3111 [(match_operand:<VDBL> 0 "register_operand")
3112 (match_operand:VDC 1 "register_operand")
3113 (match_operand:VDC 2 "register_operand")]
3114 "TARGET_SIMD"
3115 {
3116 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3117 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3118 DONE;
3119 }
3120 [(set_attr "type" "multiple")]
3121 )
3122
3123 ;; <su><addsub>l<q>.
3124
3125 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3126 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3127 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3128 (match_operand:VQW 1 "register_operand" "w")
3129 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3130 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3131 (match_operand:VQW 2 "register_operand" "w")
3132 (match_dup 3)))))]
3133 "TARGET_SIMD"
3134 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3135 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3136 )
3137
3138 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3139 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3140 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3141 (match_operand:VQW 1 "register_operand" "w")
3142 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3143 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3144 (match_operand:VQW 2 "register_operand" "w")
3145 (match_dup 3)))))]
3146 "TARGET_SIMD"
3147 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3148 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3149 )
3150
3151
3152 (define_expand "aarch64_saddl2<mode>"
3153 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3154 (match_operand:VQW 1 "register_operand" "w")
3155 (match_operand:VQW 2 "register_operand" "w")]
3156 "TARGET_SIMD"
3157 {
3158 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3159 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3160 operands[2], p));
3161 DONE;
3162 })
3163
3164 (define_expand "aarch64_uaddl2<mode>"
3165 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3166 (match_operand:VQW 1 "register_operand" "w")
3167 (match_operand:VQW 2 "register_operand" "w")]
3168 "TARGET_SIMD"
3169 {
3170 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3171 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3172 operands[2], p));
3173 DONE;
3174 })
3175
3176 (define_expand "aarch64_ssubl2<mode>"
3177 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3178 (match_operand:VQW 1 "register_operand" "w")
3179 (match_operand:VQW 2 "register_operand" "w")]
3180 "TARGET_SIMD"
3181 {
3182 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3183 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3184 operands[2], p));
3185 DONE;
3186 })
3187
3188 (define_expand "aarch64_usubl2<mode>"
3189 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3190 (match_operand:VQW 1 "register_operand" "w")
3191 (match_operand:VQW 2 "register_operand" "w")]
3192 "TARGET_SIMD"
3193 {
3194 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3195 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3196 operands[2], p));
3197 DONE;
3198 })
3199
3200 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3201 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3202 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3203 (match_operand:VD_BHSI 1 "register_operand" "w"))
3204 (ANY_EXTEND:<VWIDE>
3205 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3206 "TARGET_SIMD"
3207 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3208 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3209 )
3210
3211 ;; <su><addsub>w<q>.
3212
3213 (define_expand "widen_ssum<mode>3"
3214 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3215 (plus:<VDBLW> (sign_extend:<VDBLW>
3216 (match_operand:VQW 1 "register_operand" ""))
3217 (match_operand:<VDBLW> 2 "register_operand" "")))]
3218 "TARGET_SIMD"
3219 {
3220 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3221 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3222
3223 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3224 operands[1], p));
3225 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3226 DONE;
3227 }
3228 )
3229
3230 (define_expand "widen_ssum<mode>3"
3231 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3232 (plus:<VWIDE> (sign_extend:<VWIDE>
3233 (match_operand:VD_BHSI 1 "register_operand" ""))
3234 (match_operand:<VWIDE> 2 "register_operand" "")))]
3235 "TARGET_SIMD"
3236 {
3237 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3238 DONE;
3239 })
3240
3241 (define_expand "widen_usum<mode>3"
3242 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3243 (plus:<VDBLW> (zero_extend:<VDBLW>
3244 (match_operand:VQW 1 "register_operand" ""))
3245 (match_operand:<VDBLW> 2 "register_operand" "")))]
3246 "TARGET_SIMD"
3247 {
3248 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3249 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3250
3251 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3252 operands[1], p));
3253 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3254 DONE;
3255 }
3256 )
3257
3258 (define_expand "widen_usum<mode>3"
3259 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3260 (plus:<VWIDE> (zero_extend:<VWIDE>
3261 (match_operand:VD_BHSI 1 "register_operand" ""))
3262 (match_operand:<VWIDE> 2 "register_operand" "")))]
3263 "TARGET_SIMD"
3264 {
3265 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3266 DONE;
3267 })
3268
3269 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3270 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3271 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3272 (ANY_EXTEND:<VWIDE>
3273 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3274 "TARGET_SIMD"
3275 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3276 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3277 )
3278
3279 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3280 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3281 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3282 (ANY_EXTEND:<VWIDE>
3283 (vec_select:<VHALF>
3284 (match_operand:VQW 2 "register_operand" "w")
3285 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3286 "TARGET_SIMD"
3287 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3288 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3289 )
3290
3291 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3292 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3293 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3294 (ANY_EXTEND:<VWIDE>
3295 (vec_select:<VHALF>
3296 (match_operand:VQW 2 "register_operand" "w")
3297 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3298 "TARGET_SIMD"
3299 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3300 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3301 )
3302
3303 (define_expand "aarch64_saddw2<mode>"
3304 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3305 (match_operand:<VWIDE> 1 "register_operand" "w")
3306 (match_operand:VQW 2 "register_operand" "w")]
3307 "TARGET_SIMD"
3308 {
3309 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3310 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3311 operands[2], p));
3312 DONE;
3313 })
3314
3315 (define_expand "aarch64_uaddw2<mode>"
3316 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3317 (match_operand:<VWIDE> 1 "register_operand" "w")
3318 (match_operand:VQW 2 "register_operand" "w")]
3319 "TARGET_SIMD"
3320 {
3321 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3322 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3323 operands[2], p));
3324 DONE;
3325 })
3326
3327
3328 (define_expand "aarch64_ssubw2<mode>"
3329 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3330 (match_operand:<VWIDE> 1 "register_operand" "w")
3331 (match_operand:VQW 2 "register_operand" "w")]
3332 "TARGET_SIMD"
3333 {
3334 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3335 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3336 operands[2], p));
3337 DONE;
3338 })
3339
3340 (define_expand "aarch64_usubw2<mode>"
3341 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3342 (match_operand:<VWIDE> 1 "register_operand" "w")
3343 (match_operand:VQW 2 "register_operand" "w")]
3344 "TARGET_SIMD"
3345 {
3346 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3347 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3348 operands[2], p));
3349 DONE;
3350 })
3351
3352 ;; <su><r>h<addsub>.
3353
3354 (define_insn "aarch64_<sur>h<addsub><mode>"
3355 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3356 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3357 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3358 HADDSUB))]
3359 "TARGET_SIMD"
3360 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3361 [(set_attr "type" "neon_<addsub>_halve<q>")]
3362 )
3363
3364 ;; <r><addsub>hn<q>.
3365
3366 (define_insn "aarch64_<sur><addsub>hn<mode>"
3367 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3368 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3369 (match_operand:VQN 2 "register_operand" "w")]
3370 ADDSUBHN))]
3371 "TARGET_SIMD"
3372 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3373 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3374 )
3375
3376 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3377 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3378 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3379 (match_operand:VQN 2 "register_operand" "w")
3380 (match_operand:VQN 3 "register_operand" "w")]
3381 ADDSUBHN2))]
3382 "TARGET_SIMD"
3383 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3384 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3385 )
3386
3387 ;; pmul.
3388
3389 (define_insn "aarch64_pmul<mode>"
3390 [(set (match_operand:VB 0 "register_operand" "=w")
3391 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3392 (match_operand:VB 2 "register_operand" "w")]
3393 UNSPEC_PMUL))]
3394 "TARGET_SIMD"
3395 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3396 [(set_attr "type" "neon_mul_<Vetype><q>")]
3397 )
3398
3399 ;; fmulx.
3400
3401 (define_insn "aarch64_fmulx<mode>"
3402 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3403 (unspec:VHSDF_HSDF
3404 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3405 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3406 UNSPEC_FMULX))]
3407 "TARGET_SIMD"
3408 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3409 [(set_attr "type" "neon_fp_mul_<stype>")]
3410 )
3411
3412 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3413
3414 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3415 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3416 (unspec:VDQSF
3417 [(match_operand:VDQSF 1 "register_operand" "w")
3418 (vec_duplicate:VDQSF
3419 (vec_select:<VEL>
3420 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3421 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3422 UNSPEC_FMULX))]
3423 "TARGET_SIMD"
3424 {
3425 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3426 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3427 }
3428 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3429 )
3430
3431 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3432
3433 (define_insn "*aarch64_mulx_elt<mode>"
3434 [(set (match_operand:VDQF 0 "register_operand" "=w")
3435 (unspec:VDQF
3436 [(match_operand:VDQF 1 "register_operand" "w")
3437 (vec_duplicate:VDQF
3438 (vec_select:<VEL>
3439 (match_operand:VDQF 2 "register_operand" "w")
3440 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3441 UNSPEC_FMULX))]
3442 "TARGET_SIMD"
3443 {
3444 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3445 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3446 }
3447 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3448 )
3449
3450 ;; vmulxq_lane
3451
3452 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3453 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3454 (unspec:VHSDF
3455 [(match_operand:VHSDF 1 "register_operand" "w")
3456 (vec_duplicate:VHSDF
3457 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3458 UNSPEC_FMULX))]
3459 "TARGET_SIMD"
3460 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3461 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3462 )
3463
3464 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3465 ;; vmulxd_lane_f64 == vmulx_lane_f64
3466 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3467
3468 (define_insn "*aarch64_vgetfmulx<mode>"
3469 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3470 (unspec:<VEL>
3471 [(match_operand:<VEL> 1 "register_operand" "w")
3472 (vec_select:<VEL>
3473 (match_operand:VDQF 2 "register_operand" "w")
3474 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3475 UNSPEC_FMULX))]
3476 "TARGET_SIMD"
3477 {
3478 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3479 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3480 }
3481 [(set_attr "type" "fmul<Vetype>")]
3482 )
3483 ;; <su>q<addsub>
3484
3485 (define_insn "aarch64_<su_optab><optab><mode>"
3486 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3487 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3488 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3489 "TARGET_SIMD"
3490 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3491 [(set_attr "type" "neon_<optab><q>")]
3492 )
3493
3494 ;; suqadd and usqadd
3495
3496 (define_insn "aarch64_<sur>qadd<mode>"
3497 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3498 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3499 (match_operand:VSDQ_I 2 "register_operand" "w")]
3500 USSUQADD))]
3501 "TARGET_SIMD"
3502 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3503 [(set_attr "type" "neon_qadd<q>")]
3504 )
3505
3506 ;; sqmovun
3507
3508 (define_insn "aarch64_sqmovun<mode>"
3509 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3510 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3511 UNSPEC_SQXTUN))]
3512 "TARGET_SIMD"
3513 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3514 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3515 )
3516
3517 ;; sqmovn and uqmovn
3518
3519 (define_insn "aarch64_<sur>qmovn<mode>"
3520 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3521 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3522 SUQMOVN))]
3523 "TARGET_SIMD"
3524 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3525 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3526 )
3527
3528 ;; <su>q<absneg>
3529
3530 (define_insn "aarch64_s<optab><mode>"
3531 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3532 (UNQOPS:VSDQ_I
3533 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3534 "TARGET_SIMD"
3535 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3536 [(set_attr "type" "neon_<optab><q>")]
3537 )
3538
3539 ;; sq<r>dmulh.
3540
3541 (define_insn "aarch64_sq<r>dmulh<mode>"
3542 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3543 (unspec:VSDQ_HSI
3544 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3545 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3546 VQDMULH))]
3547 "TARGET_SIMD"
3548 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3549 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3550 )
3551
3552 ;; sq<r>dmulh_lane
3553
3554 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3555 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3556 (unspec:VDQHS
3557 [(match_operand:VDQHS 1 "register_operand" "w")
3558 (vec_select:<VEL>
3559 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3560 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3561 VQDMULH))]
3562 "TARGET_SIMD"
3563 "*
3564 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3565 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3566 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3567 )
3568
3569 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3570 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3571 (unspec:VDQHS
3572 [(match_operand:VDQHS 1 "register_operand" "w")
3573 (vec_select:<VEL>
3574 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3575 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3576 VQDMULH))]
3577 "TARGET_SIMD"
3578 "*
3579 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3580 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3581 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3582 )
3583
3584 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3585 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3586 (unspec:SD_HSI
3587 [(match_operand:SD_HSI 1 "register_operand" "w")
3588 (vec_select:<VEL>
3589 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3590 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3591 VQDMULH))]
3592 "TARGET_SIMD"
3593 "*
3594 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3595 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3596 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3597 )
3598
3599 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3600 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3601 (unspec:SD_HSI
3602 [(match_operand:SD_HSI 1 "register_operand" "w")
3603 (vec_select:<VEL>
3604 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3605 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3606 VQDMULH))]
3607 "TARGET_SIMD"
3608 "*
3609 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3610 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3611 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3612 )
3613
3614 ;; sqrdml[as]h.
3615
3616 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3617 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3618 (unspec:VSDQ_HSI
3619 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3620 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3621 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3622 SQRDMLH_AS))]
3623 "TARGET_SIMD_RDMA"
3624 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3625 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3626 )
3627
3628 ;; sqrdml[as]h_lane.
3629
3630 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3631 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3632 (unspec:VDQHS
3633 [(match_operand:VDQHS 1 "register_operand" "0")
3634 (match_operand:VDQHS 2 "register_operand" "w")
3635 (vec_select:<VEL>
3636 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3637 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3638 SQRDMLH_AS))]
3639 "TARGET_SIMD_RDMA"
3640 {
3641 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3642 return
3643 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3644 }
3645 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3646 )
3647
3648 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3649 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3650 (unspec:SD_HSI
3651 [(match_operand:SD_HSI 1 "register_operand" "0")
3652 (match_operand:SD_HSI 2 "register_operand" "w")
3653 (vec_select:<VEL>
3654 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3655 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3656 SQRDMLH_AS))]
3657 "TARGET_SIMD_RDMA"
3658 {
3659 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3660 return
3661 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3662 }
3663 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3664 )
3665
3666 ;; sqrdml[as]h_laneq.
3667
3668 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3669 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3670 (unspec:VDQHS
3671 [(match_operand:VDQHS 1 "register_operand" "0")
3672 (match_operand:VDQHS 2 "register_operand" "w")
3673 (vec_select:<VEL>
3674 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3675 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3676 SQRDMLH_AS))]
3677 "TARGET_SIMD_RDMA"
3678 {
3679 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3680 return
3681 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3682 }
3683 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3684 )
3685
3686 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3687 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3688 (unspec:SD_HSI
3689 [(match_operand:SD_HSI 1 "register_operand" "0")
3690 (match_operand:SD_HSI 2 "register_operand" "w")
3691 (vec_select:<VEL>
3692 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3693 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3694 SQRDMLH_AS))]
3695 "TARGET_SIMD_RDMA"
3696 {
3697 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3698 return
3699 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3700 }
3701 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3702 )
3703
3704 ;; vqdml[sa]l
3705
3706 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3707 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3708 (SBINQOPS:<VWIDE>
3709 (match_operand:<VWIDE> 1 "register_operand" "0")
3710 (ss_ashift:<VWIDE>
3711 (mult:<VWIDE>
3712 (sign_extend:<VWIDE>
3713 (match_operand:VSD_HSI 2 "register_operand" "w"))
3714 (sign_extend:<VWIDE>
3715 (match_operand:VSD_HSI 3 "register_operand" "w")))
3716 (const_int 1))))]
3717 "TARGET_SIMD"
3718 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3719 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3720 )
3721
3722 ;; vqdml[sa]l_lane
3723
3724 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3725 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3726 (SBINQOPS:<VWIDE>
3727 (match_operand:<VWIDE> 1 "register_operand" "0")
3728 (ss_ashift:<VWIDE>
3729 (mult:<VWIDE>
3730 (sign_extend:<VWIDE>
3731 (match_operand:VD_HSI 2 "register_operand" "w"))
3732 (sign_extend:<VWIDE>
3733 (vec_duplicate:VD_HSI
3734 (vec_select:<VEL>
3735 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3736 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3737 ))
3738 (const_int 1))))]
3739 "TARGET_SIMD"
3740 {
3741 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3742 return
3743 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3744 }
3745 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3746 )
3747
3748 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3749 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3750 (SBINQOPS:<VWIDE>
3751 (match_operand:<VWIDE> 1 "register_operand" "0")
3752 (ss_ashift:<VWIDE>
3753 (mult:<VWIDE>
3754 (sign_extend:<VWIDE>
3755 (match_operand:VD_HSI 2 "register_operand" "w"))
3756 (sign_extend:<VWIDE>
3757 (vec_duplicate:VD_HSI
3758 (vec_select:<VEL>
3759 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3760 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3761 ))
3762 (const_int 1))))]
3763 "TARGET_SIMD"
3764 {
3765 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3766 return
3767 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3768 }
3769 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3770 )
3771
3772 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3773 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3774 (SBINQOPS:<VWIDE>
3775 (match_operand:<VWIDE> 1 "register_operand" "0")
3776 (ss_ashift:<VWIDE>
3777 (mult:<VWIDE>
3778 (sign_extend:<VWIDE>
3779 (match_operand:SD_HSI 2 "register_operand" "w"))
3780 (sign_extend:<VWIDE>
3781 (vec_select:<VEL>
3782 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3783 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3784 )
3785 (const_int 1))))]
3786 "TARGET_SIMD"
3787 {
3788 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3789 return
3790 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3791 }
3792 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3793 )
3794
3795 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3796 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3797 (SBINQOPS:<VWIDE>
3798 (match_operand:<VWIDE> 1 "register_operand" "0")
3799 (ss_ashift:<VWIDE>
3800 (mult:<VWIDE>
3801 (sign_extend:<VWIDE>
3802 (match_operand:SD_HSI 2 "register_operand" "w"))
3803 (sign_extend:<VWIDE>
3804 (vec_select:<VEL>
3805 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3806 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3807 )
3808 (const_int 1))))]
3809 "TARGET_SIMD"
3810 {
3811 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3812 return
3813 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3814 }
3815 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3816 )
3817
3818 ;; vqdml[sa]l_n
3819
3820 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3821 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3822 (SBINQOPS:<VWIDE>
3823 (match_operand:<VWIDE> 1 "register_operand" "0")
3824 (ss_ashift:<VWIDE>
3825 (mult:<VWIDE>
3826 (sign_extend:<VWIDE>
3827 (match_operand:VD_HSI 2 "register_operand" "w"))
3828 (sign_extend:<VWIDE>
3829 (vec_duplicate:VD_HSI
3830 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3831 (const_int 1))))]
3832 "TARGET_SIMD"
3833 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3834 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3835 )
3836
3837 ;; sqdml[as]l2
3838
3839 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3840 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3841 (SBINQOPS:<VWIDE>
3842 (match_operand:<VWIDE> 1 "register_operand" "0")
3843 (ss_ashift:<VWIDE>
3844 (mult:<VWIDE>
3845 (sign_extend:<VWIDE>
3846 (vec_select:<VHALF>
3847 (match_operand:VQ_HSI 2 "register_operand" "w")
3848 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3849 (sign_extend:<VWIDE>
3850 (vec_select:<VHALF>
3851 (match_operand:VQ_HSI 3 "register_operand" "w")
3852 (match_dup 4))))
3853 (const_int 1))))]
3854 "TARGET_SIMD"
3855 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3856 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3857 )
3858
3859 (define_expand "aarch64_sqdmlal2<mode>"
3860 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3861 (match_operand:<VWIDE> 1 "register_operand" "w")
3862 (match_operand:VQ_HSI 2 "register_operand" "w")
3863 (match_operand:VQ_HSI 3 "register_operand" "w")]
3864 "TARGET_SIMD"
3865 {
3866 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3867 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3868 operands[2], operands[3], p));
3869 DONE;
3870 })
3871
3872 (define_expand "aarch64_sqdmlsl2<mode>"
3873 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3874 (match_operand:<VWIDE> 1 "register_operand" "w")
3875 (match_operand:VQ_HSI 2 "register_operand" "w")
3876 (match_operand:VQ_HSI 3 "register_operand" "w")]
3877 "TARGET_SIMD"
3878 {
3879 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3880 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3881 operands[2], operands[3], p));
3882 DONE;
3883 })
3884
3885 ;; vqdml[sa]l2_lane
3886
3887 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3888 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3889 (SBINQOPS:<VWIDE>
3890 (match_operand:<VWIDE> 1 "register_operand" "0")
3891 (ss_ashift:<VWIDE>
3892 (mult:<VWIDE>
3893 (sign_extend:<VWIDE>
3894 (vec_select:<VHALF>
3895 (match_operand:VQ_HSI 2 "register_operand" "w")
3896 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3897 (sign_extend:<VWIDE>
3898 (vec_duplicate:<VHALF>
3899 (vec_select:<VEL>
3900 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3901 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3902 ))))
3903 (const_int 1))))]
3904 "TARGET_SIMD"
3905 {
3906 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3907 return
3908 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3909 }
3910 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3911 )
3912
3913 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3914 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3915 (SBINQOPS:<VWIDE>
3916 (match_operand:<VWIDE> 1 "register_operand" "0")
3917 (ss_ashift:<VWIDE>
3918 (mult:<VWIDE>
3919 (sign_extend:<VWIDE>
3920 (vec_select:<VHALF>
3921 (match_operand:VQ_HSI 2 "register_operand" "w")
3922 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3923 (sign_extend:<VWIDE>
3924 (vec_duplicate:<VHALF>
3925 (vec_select:<VEL>
3926 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3927 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3928 ))))
3929 (const_int 1))))]
3930 "TARGET_SIMD"
3931 {
3932 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3933 return
3934 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3935 }
3936 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3937 )
3938
3939 (define_expand "aarch64_sqdmlal2_lane<mode>"
3940 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3941 (match_operand:<VWIDE> 1 "register_operand" "w")
3942 (match_operand:VQ_HSI 2 "register_operand" "w")
3943 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3944 (match_operand:SI 4 "immediate_operand" "i")]
3945 "TARGET_SIMD"
3946 {
3947 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3948 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3949 operands[2], operands[3],
3950 operands[4], p));
3951 DONE;
3952 })
3953
3954 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3955 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3956 (match_operand:<VWIDE> 1 "register_operand" "w")
3957 (match_operand:VQ_HSI 2 "register_operand" "w")
3958 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3959 (match_operand:SI 4 "immediate_operand" "i")]
3960 "TARGET_SIMD"
3961 {
3962 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3963 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3964 operands[2], operands[3],
3965 operands[4], p));
3966 DONE;
3967 })
3968
3969 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3970 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3971 (match_operand:<VWIDE> 1 "register_operand" "w")
3972 (match_operand:VQ_HSI 2 "register_operand" "w")
3973 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3974 (match_operand:SI 4 "immediate_operand" "i")]
3975 "TARGET_SIMD"
3976 {
3977 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3978 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3979 operands[2], operands[3],
3980 operands[4], p));
3981 DONE;
3982 })
3983
3984 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3985 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3986 (match_operand:<VWIDE> 1 "register_operand" "w")
3987 (match_operand:VQ_HSI 2 "register_operand" "w")
3988 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3989 (match_operand:SI 4 "immediate_operand" "i")]
3990 "TARGET_SIMD"
3991 {
3992 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3993 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3994 operands[2], operands[3],
3995 operands[4], p));
3996 DONE;
3997 })
3998
3999 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4000 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4001 (SBINQOPS:<VWIDE>
4002 (match_operand:<VWIDE> 1 "register_operand" "0")
4003 (ss_ashift:<VWIDE>
4004 (mult:<VWIDE>
4005 (sign_extend:<VWIDE>
4006 (vec_select:<VHALF>
4007 (match_operand:VQ_HSI 2 "register_operand" "w")
4008 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4009 (sign_extend:<VWIDE>
4010 (vec_duplicate:<VHALF>
4011 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4012 (const_int 1))))]
4013 "TARGET_SIMD"
4014 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4015 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4016 )
4017
4018 (define_expand "aarch64_sqdmlal2_n<mode>"
4019 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4020 (match_operand:<VWIDE> 1 "register_operand" "w")
4021 (match_operand:VQ_HSI 2 "register_operand" "w")
4022 (match_operand:<VEL> 3 "register_operand" "w")]
4023 "TARGET_SIMD"
4024 {
4025 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4026 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4027 operands[2], operands[3],
4028 p));
4029 DONE;
4030 })
4031
4032 (define_expand "aarch64_sqdmlsl2_n<mode>"
4033 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4034 (match_operand:<VWIDE> 1 "register_operand" "w")
4035 (match_operand:VQ_HSI 2 "register_operand" "w")
4036 (match_operand:<VEL> 3 "register_operand" "w")]
4037 "TARGET_SIMD"
4038 {
4039 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4040 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4041 operands[2], operands[3],
4042 p));
4043 DONE;
4044 })
4045
4046 ;; vqdmull
4047
4048 (define_insn "aarch64_sqdmull<mode>"
4049 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4050 (ss_ashift:<VWIDE>
4051 (mult:<VWIDE>
4052 (sign_extend:<VWIDE>
4053 (match_operand:VSD_HSI 1 "register_operand" "w"))
4054 (sign_extend:<VWIDE>
4055 (match_operand:VSD_HSI 2 "register_operand" "w")))
4056 (const_int 1)))]
4057 "TARGET_SIMD"
4058 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4059 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4060 )
4061
4062 ;; vqdmull_lane
4063
4064 (define_insn "aarch64_sqdmull_lane<mode>"
4065 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4066 (ss_ashift:<VWIDE>
4067 (mult:<VWIDE>
4068 (sign_extend:<VWIDE>
4069 (match_operand:VD_HSI 1 "register_operand" "w"))
4070 (sign_extend:<VWIDE>
4071 (vec_duplicate:VD_HSI
4072 (vec_select:<VEL>
4073 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4074 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4075 ))
4076 (const_int 1)))]
4077 "TARGET_SIMD"
4078 {
4079 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4080 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4081 }
4082 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4083 )
4084
4085 (define_insn "aarch64_sqdmull_laneq<mode>"
4086 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4087 (ss_ashift:<VWIDE>
4088 (mult:<VWIDE>
4089 (sign_extend:<VWIDE>
4090 (match_operand:VD_HSI 1 "register_operand" "w"))
4091 (sign_extend:<VWIDE>
4092 (vec_duplicate:VD_HSI
4093 (vec_select:<VEL>
4094 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4095 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4096 ))
4097 (const_int 1)))]
4098 "TARGET_SIMD"
4099 {
4100 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4101 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4102 }
4103 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4104 )
4105
4106 (define_insn "aarch64_sqdmull_lane<mode>"
4107 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4108 (ss_ashift:<VWIDE>
4109 (mult:<VWIDE>
4110 (sign_extend:<VWIDE>
4111 (match_operand:SD_HSI 1 "register_operand" "w"))
4112 (sign_extend:<VWIDE>
4113 (vec_select:<VEL>
4114 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4115 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4116 ))
4117 (const_int 1)))]
4118 "TARGET_SIMD"
4119 {
4120 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4121 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4122 }
4123 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4124 )
4125
4126 (define_insn "aarch64_sqdmull_laneq<mode>"
4127 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4128 (ss_ashift:<VWIDE>
4129 (mult:<VWIDE>
4130 (sign_extend:<VWIDE>
4131 (match_operand:SD_HSI 1 "register_operand" "w"))
4132 (sign_extend:<VWIDE>
4133 (vec_select:<VEL>
4134 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4135 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4136 ))
4137 (const_int 1)))]
4138 "TARGET_SIMD"
4139 {
4140 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4141 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4142 }
4143 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4144 )
4145
4146 ;; vqdmull_n
4147
4148 (define_insn "aarch64_sqdmull_n<mode>"
4149 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4150 (ss_ashift:<VWIDE>
4151 (mult:<VWIDE>
4152 (sign_extend:<VWIDE>
4153 (match_operand:VD_HSI 1 "register_operand" "w"))
4154 (sign_extend:<VWIDE>
4155 (vec_duplicate:VD_HSI
4156 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4157 )
4158 (const_int 1)))]
4159 "TARGET_SIMD"
4160 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4161 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4162 )
4163
4164 ;; vqdmull2
4165
4166
4167
4168 (define_insn "aarch64_sqdmull2<mode>_internal"
4169 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4170 (ss_ashift:<VWIDE>
4171 (mult:<VWIDE>
4172 (sign_extend:<VWIDE>
4173 (vec_select:<VHALF>
4174 (match_operand:VQ_HSI 1 "register_operand" "w")
4175 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4176 (sign_extend:<VWIDE>
4177 (vec_select:<VHALF>
4178 (match_operand:VQ_HSI 2 "register_operand" "w")
4179 (match_dup 3)))
4180 )
4181 (const_int 1)))]
4182 "TARGET_SIMD"
4183 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4184 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4185 )
4186
4187 (define_expand "aarch64_sqdmull2<mode>"
4188 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4189 (match_operand:VQ_HSI 1 "register_operand" "w")
4190 (match_operand:VQ_HSI 2 "register_operand" "w")]
4191 "TARGET_SIMD"
4192 {
4193 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4195 operands[2], p));
4196 DONE;
4197 })
4198
4199 ;; vqdmull2_lane
4200
4201 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4202 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4203 (ss_ashift:<VWIDE>
4204 (mult:<VWIDE>
4205 (sign_extend:<VWIDE>
4206 (vec_select:<VHALF>
4207 (match_operand:VQ_HSI 1 "register_operand" "w")
4208 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4209 (sign_extend:<VWIDE>
4210 (vec_duplicate:<VHALF>
4211 (vec_select:<VEL>
4212 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4213 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4214 ))
4215 (const_int 1)))]
4216 "TARGET_SIMD"
4217 {
4218 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4219 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4220 }
4221 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4222 )
4223
4224 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4225 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4226 (ss_ashift:<VWIDE>
4227 (mult:<VWIDE>
4228 (sign_extend:<VWIDE>
4229 (vec_select:<VHALF>
4230 (match_operand:VQ_HSI 1 "register_operand" "w")
4231 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4232 (sign_extend:<VWIDE>
4233 (vec_duplicate:<VHALF>
4234 (vec_select:<VEL>
4235 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4236 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4237 ))
4238 (const_int 1)))]
4239 "TARGET_SIMD"
4240 {
4241 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4242 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4243 }
4244 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4245 )
4246
4247 (define_expand "aarch64_sqdmull2_lane<mode>"
4248 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4249 (match_operand:VQ_HSI 1 "register_operand" "w")
4250 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4251 (match_operand:SI 3 "immediate_operand" "i")]
4252 "TARGET_SIMD"
4253 {
4254 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4255 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4256 operands[2], operands[3],
4257 p));
4258 DONE;
4259 })
4260
4261 (define_expand "aarch64_sqdmull2_laneq<mode>"
4262 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4263 (match_operand:VQ_HSI 1 "register_operand" "w")
4264 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4265 (match_operand:SI 3 "immediate_operand" "i")]
4266 "TARGET_SIMD"
4267 {
4268 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4269 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4270 operands[2], operands[3],
4271 p));
4272 DONE;
4273 })
4274
4275 ;; vqdmull2_n
4276
4277 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4278 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4279 (ss_ashift:<VWIDE>
4280 (mult:<VWIDE>
4281 (sign_extend:<VWIDE>
4282 (vec_select:<VHALF>
4283 (match_operand:VQ_HSI 1 "register_operand" "w")
4284 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4285 (sign_extend:<VWIDE>
4286 (vec_duplicate:<VHALF>
4287 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4288 )
4289 (const_int 1)))]
4290 "TARGET_SIMD"
4291 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4292 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4293 )
4294
4295 (define_expand "aarch64_sqdmull2_n<mode>"
4296 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4297 (match_operand:VQ_HSI 1 "register_operand" "w")
4298 (match_operand:<VEL> 2 "register_operand" "w")]
4299 "TARGET_SIMD"
4300 {
4301 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4302 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4303 operands[2], p));
4304 DONE;
4305 })
4306
4307 ;; vshl
4308
4309 (define_insn "aarch64_<sur>shl<mode>"
4310 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4311 (unspec:VSDQ_I_DI
4312 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4313 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4314 VSHL))]
4315 "TARGET_SIMD"
4316 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4317 [(set_attr "type" "neon_shift_reg<q>")]
4318 )
4319
4320
4321 ;; vqshl
4322
4323 (define_insn "aarch64_<sur>q<r>shl<mode>"
4324 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4325 (unspec:VSDQ_I
4326 [(match_operand:VSDQ_I 1 "register_operand" "w")
4327 (match_operand:VSDQ_I 2 "register_operand" "w")]
4328 VQSHL))]
4329 "TARGET_SIMD"
4330 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4331 [(set_attr "type" "neon_sat_shift_reg<q>")]
4332 )
4333
4334 ;; vshll_n
4335
4336 (define_insn "aarch64_<sur>shll_n<mode>"
4337 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4338 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4339 (match_operand:SI 2
4340 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4341 VSHLL))]
4342 "TARGET_SIMD"
4343 {
4344 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4345 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4346 else
4347 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4348 }
4349 [(set_attr "type" "neon_shift_imm_long")]
4350 )
4351
4352 ;; vshll_high_n
4353
4354 (define_insn "aarch64_<sur>shll2_n<mode>"
4355 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4356 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4357 (match_operand:SI 2 "immediate_operand" "i")]
4358 VSHLL))]
4359 "TARGET_SIMD"
4360 {
4361 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4362 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4363 else
4364 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4365 }
4366 [(set_attr "type" "neon_shift_imm_long")]
4367 )
4368
4369 ;; vrshr_n
4370
4371 (define_insn "aarch64_<sur>shr_n<mode>"
4372 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4373 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4374 (match_operand:SI 2
4375 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4376 VRSHR_N))]
4377 "TARGET_SIMD"
4378 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4379 [(set_attr "type" "neon_sat_shift_imm<q>")]
4380 )
4381
4382 ;; v(r)sra_n
4383
4384 (define_insn "aarch64_<sur>sra_n<mode>"
4385 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4386 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4387 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4388 (match_operand:SI 3
4389 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4390 VSRA))]
4391 "TARGET_SIMD"
4392 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4393 [(set_attr "type" "neon_shift_acc<q>")]
4394 )
4395
4396 ;; vs<lr>i_n
4397
4398 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4399 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4400 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4401 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4402 (match_operand:SI 3
4403 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4404 VSLRI))]
4405 "TARGET_SIMD"
4406 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4407 [(set_attr "type" "neon_shift_imm<q>")]
4408 )
4409
4410 ;; vqshl(u)
4411
4412 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4413 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4414 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4415 (match_operand:SI 2
4416 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4417 VQSHL_N))]
4418 "TARGET_SIMD"
4419 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4420 [(set_attr "type" "neon_sat_shift_imm<q>")]
4421 )
4422
4423
4424 ;; vq(r)shr(u)n_n
4425
4426 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4427 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4428 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4429 (match_operand:SI 2
4430 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4431 VQSHRN_N))]
4432 "TARGET_SIMD"
4433 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4434 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4435 )
4436
4437
4438 ;; cm(eq|ge|gt|lt|le)
4439 ;; Note, we have constraints for Dz and Z as different expanders
4440 ;; have different ideas of what should be passed to this pattern.
4441
4442 (define_insn "aarch64_cm<optab><mode>"
4443 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4444 (neg:<V_INT_EQUIV>
4445 (COMPARISONS:<V_INT_EQUIV>
4446 (match_operand:VDQ_I 1 "register_operand" "w,w")
4447 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4448 )))]
4449 "TARGET_SIMD"
4450 "@
4451 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4452 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4453 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4454 )
4455
4456 (define_insn_and_split "aarch64_cm<optab>di"
4457 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4458 (neg:DI
4459 (COMPARISONS:DI
4460 (match_operand:DI 1 "register_operand" "w,w,r")
4461 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4462 )))
4463 (clobber (reg:CC CC_REGNUM))]
4464 "TARGET_SIMD"
4465 "#"
4466 "&& reload_completed"
4467 [(set (match_operand:DI 0 "register_operand")
4468 (neg:DI
4469 (COMPARISONS:DI
4470 (match_operand:DI 1 "register_operand")
4471 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4472 )))]
4473 {
4474 /* If we are in the general purpose register file,
4475 we split to a sequence of comparison and store. */
4476 if (GP_REGNUM_P (REGNO (operands[0]))
4477 && GP_REGNUM_P (REGNO (operands[1])))
4478 {
4479 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4480 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4481 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4482 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4483 DONE;
4484 }
4485 /* Otherwise, we expand to a similar pattern which does not
4486 clobber CC_REGNUM. */
4487 }
4488 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4489 )
4490
4491 (define_insn "*aarch64_cm<optab>di"
4492 [(set (match_operand:DI 0 "register_operand" "=w,w")
4493 (neg:DI
4494 (COMPARISONS:DI
4495 (match_operand:DI 1 "register_operand" "w,w")
4496 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4497 )))]
4498 "TARGET_SIMD && reload_completed"
4499 "@
4500 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4501 cm<optab>\t%d0, %d1, #0"
4502 [(set_attr "type" "neon_compare, neon_compare_zero")]
4503 )
4504
4505 ;; cm(hs|hi)
4506
4507 (define_insn "aarch64_cm<optab><mode>"
4508 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4509 (neg:<V_INT_EQUIV>
4510 (UCOMPARISONS:<V_INT_EQUIV>
4511 (match_operand:VDQ_I 1 "register_operand" "w")
4512 (match_operand:VDQ_I 2 "register_operand" "w")
4513 )))]
4514 "TARGET_SIMD"
4515 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4516 [(set_attr "type" "neon_compare<q>")]
4517 )
4518
4519 (define_insn_and_split "aarch64_cm<optab>di"
4520 [(set (match_operand:DI 0 "register_operand" "=w,r")
4521 (neg:DI
4522 (UCOMPARISONS:DI
4523 (match_operand:DI 1 "register_operand" "w,r")
4524 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4525 )))
4526 (clobber (reg:CC CC_REGNUM))]
4527 "TARGET_SIMD"
4528 "#"
4529 "&& reload_completed"
4530 [(set (match_operand:DI 0 "register_operand")
4531 (neg:DI
4532 (UCOMPARISONS:DI
4533 (match_operand:DI 1 "register_operand")
4534 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4535 )))]
4536 {
4537 /* If we are in the general purpose register file,
4538 we split to a sequence of comparison and store. */
4539 if (GP_REGNUM_P (REGNO (operands[0]))
4540 && GP_REGNUM_P (REGNO (operands[1])))
4541 {
4542 machine_mode mode = CCmode;
4543 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4544 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4545 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4546 DONE;
4547 }
4548 /* Otherwise, we expand to a similar pattern which does not
4549 clobber CC_REGNUM. */
4550 }
4551 [(set_attr "type" "neon_compare,multiple")]
4552 )
4553
4554 (define_insn "*aarch64_cm<optab>di"
4555 [(set (match_operand:DI 0 "register_operand" "=w")
4556 (neg:DI
4557 (UCOMPARISONS:DI
4558 (match_operand:DI 1 "register_operand" "w")
4559 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4560 )))]
4561 "TARGET_SIMD && reload_completed"
4562 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4563 [(set_attr "type" "neon_compare")]
4564 )
4565
4566 ;; cmtst
4567
4568 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4569 ;; we don't have any insns using ne, and aarch64_vcond outputs
4570 ;; not (neg (eq (and x y) 0))
4571 ;; which is rewritten by simplify_rtx as
4572 ;; plus (eq (and x y) 0) -1.
4573
4574 (define_insn "aarch64_cmtst<mode>"
4575 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4576 (plus:<V_INT_EQUIV>
4577 (eq:<V_INT_EQUIV>
4578 (and:VDQ_I
4579 (match_operand:VDQ_I 1 "register_operand" "w")
4580 (match_operand:VDQ_I 2 "register_operand" "w"))
4581 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4582 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4583 ]
4584 "TARGET_SIMD"
4585 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4586 [(set_attr "type" "neon_tst<q>")]
4587 )
4588
4589 (define_insn_and_split "aarch64_cmtstdi"
4590 [(set (match_operand:DI 0 "register_operand" "=w,r")
4591 (neg:DI
4592 (ne:DI
4593 (and:DI
4594 (match_operand:DI 1 "register_operand" "w,r")
4595 (match_operand:DI 2 "register_operand" "w,r"))
4596 (const_int 0))))
4597 (clobber (reg:CC CC_REGNUM))]
4598 "TARGET_SIMD"
4599 "#"
4600 "&& reload_completed"
4601 [(set (match_operand:DI 0 "register_operand")
4602 (neg:DI
4603 (ne:DI
4604 (and:DI
4605 (match_operand:DI 1 "register_operand")
4606 (match_operand:DI 2 "register_operand"))
4607 (const_int 0))))]
4608 {
4609 /* If we are in the general purpose register file,
4610 we split to a sequence of comparison and store. */
4611 if (GP_REGNUM_P (REGNO (operands[0]))
4612 && GP_REGNUM_P (REGNO (operands[1])))
4613 {
4614 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4615 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4616 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4617 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4618 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4619 DONE;
4620 }
4621 /* Otherwise, we expand to a similar pattern which does not
4622 clobber CC_REGNUM. */
4623 }
4624 [(set_attr "type" "neon_tst,multiple")]
4625 )
4626
4627 (define_insn "*aarch64_cmtstdi"
4628 [(set (match_operand:DI 0 "register_operand" "=w")
4629 (neg:DI
4630 (ne:DI
4631 (and:DI
4632 (match_operand:DI 1 "register_operand" "w")
4633 (match_operand:DI 2 "register_operand" "w"))
4634 (const_int 0))))]
4635 "TARGET_SIMD"
4636 "cmtst\t%d0, %d1, %d2"
4637 [(set_attr "type" "neon_tst")]
4638 )
4639
4640 ;; fcm(eq|ge|gt|le|lt)
4641
4642 (define_insn "aarch64_cm<optab><mode>"
4643 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4644 (neg:<V_INT_EQUIV>
4645 (COMPARISONS:<V_INT_EQUIV>
4646 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4647 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4648 )))]
4649 "TARGET_SIMD"
4650 "@
4651 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4652 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4653 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4654 )
4655
4656 ;; fac(ge|gt)
4657 ;; Note we can also handle what would be fac(le|lt) by
4658 ;; generating fac(ge|gt).
4659
4660 (define_insn "aarch64_fac<optab><mode>"
4661 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4662 (neg:<V_INT_EQUIV>
4663 (FAC_COMPARISONS:<V_INT_EQUIV>
4664 (abs:VHSDF_HSDF
4665 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4666 (abs:VHSDF_HSDF
4667 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4668 )))]
4669 "TARGET_SIMD"
4670 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4671 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4672 )
4673
4674 ;; addp
4675
4676 (define_insn "aarch64_addp<mode>"
4677 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4678 (unspec:VD_BHSI
4679 [(match_operand:VD_BHSI 1 "register_operand" "w")
4680 (match_operand:VD_BHSI 2 "register_operand" "w")]
4681 UNSPEC_ADDP))]
4682 "TARGET_SIMD"
4683 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4684 [(set_attr "type" "neon_reduc_add<q>")]
4685 )
4686
4687 (define_insn "aarch64_addpdi"
4688 [(set (match_operand:DI 0 "register_operand" "=w")
4689 (unspec:DI
4690 [(match_operand:V2DI 1 "register_operand" "w")]
4691 UNSPEC_ADDP))]
4692 "TARGET_SIMD"
4693 "addp\t%d0, %1.2d"
4694 [(set_attr "type" "neon_reduc_add")]
4695 )
4696
4697 ;; sqrt
4698
4699 (define_expand "sqrt<mode>2"
4700 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4701 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4702 "TARGET_SIMD"
4703 {
4704 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4705 DONE;
4706 })
4707
4708 (define_insn "*sqrt<mode>2"
4709 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4710 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4711 "TARGET_SIMD"
4712 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4713 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4714 )
4715
4716 ;; Patterns for vector struct loads and stores.
4717
4718 (define_insn "aarch64_simd_ld2<mode>"
4719 [(set (match_operand:OI 0 "register_operand" "=w")
4720 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4721 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4722 UNSPEC_LD2))]
4723 "TARGET_SIMD"
4724 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4725 [(set_attr "type" "neon_load2_2reg<q>")]
4726 )
4727
4728 (define_insn "aarch64_simd_ld2r<mode>"
4729 [(set (match_operand:OI 0 "register_operand" "=w")
4730 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4731 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4732 UNSPEC_LD2_DUP))]
4733 "TARGET_SIMD"
4734 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4735 [(set_attr "type" "neon_load2_all_lanes<q>")]
4736 )
4737
4738 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4739 [(set (match_operand:OI 0 "register_operand" "=w")
4740 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4741 (match_operand:OI 2 "register_operand" "0")
4742 (match_operand:SI 3 "immediate_operand" "i")
4743 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4744 UNSPEC_LD2_LANE))]
4745 "TARGET_SIMD"
4746 {
4747 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4748 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4749 }
4750 [(set_attr "type" "neon_load2_one_lane")]
4751 )
4752
4753 (define_expand "vec_load_lanesoi<mode>"
4754 [(set (match_operand:OI 0 "register_operand" "=w")
4755 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4756 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4757 UNSPEC_LD2))]
4758 "TARGET_SIMD"
4759 {
4760 if (BYTES_BIG_ENDIAN)
4761 {
4762 rtx tmp = gen_reg_rtx (OImode);
4763 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4764 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4765 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4766 }
4767 else
4768 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4769 DONE;
4770 })
4771
4772 (define_insn "aarch64_simd_st2<mode>"
4773 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4774 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4775 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4776 UNSPEC_ST2))]
4777 "TARGET_SIMD"
4778 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4779 [(set_attr "type" "neon_store2_2reg<q>")]
4780 )
4781
4782 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4783 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4784 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4785 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4786 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4787 (match_operand:SI 2 "immediate_operand" "i")]
4788 UNSPEC_ST2_LANE))]
4789 "TARGET_SIMD"
4790 {
4791 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4792 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4793 }
4794 [(set_attr "type" "neon_store2_one_lane<q>")]
4795 )
4796
4797 (define_expand "vec_store_lanesoi<mode>"
4798 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4799 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4800 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4801 UNSPEC_ST2))]
4802 "TARGET_SIMD"
4803 {
4804 if (BYTES_BIG_ENDIAN)
4805 {
4806 rtx tmp = gen_reg_rtx (OImode);
4807 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4808 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4809 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4810 }
4811 else
4812 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4813 DONE;
4814 })
4815
4816 (define_insn "aarch64_simd_ld3<mode>"
4817 [(set (match_operand:CI 0 "register_operand" "=w")
4818 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4819 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4820 UNSPEC_LD3))]
4821 "TARGET_SIMD"
4822 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4823 [(set_attr "type" "neon_load3_3reg<q>")]
4824 )
4825
4826 (define_insn "aarch64_simd_ld3r<mode>"
4827 [(set (match_operand:CI 0 "register_operand" "=w")
4828 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4829 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4830 UNSPEC_LD3_DUP))]
4831 "TARGET_SIMD"
4832 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4833 [(set_attr "type" "neon_load3_all_lanes<q>")]
4834 )
4835
4836 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4837 [(set (match_operand:CI 0 "register_operand" "=w")
4838 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4839 (match_operand:CI 2 "register_operand" "0")
4840 (match_operand:SI 3 "immediate_operand" "i")
4841 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4842 UNSPEC_LD3_LANE))]
4843 "TARGET_SIMD"
4844 {
4845 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4846 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4847 }
4848 [(set_attr "type" "neon_load3_one_lane")]
4849 )
4850
4851 (define_expand "vec_load_lanesci<mode>"
4852 [(set (match_operand:CI 0 "register_operand" "=w")
4853 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4854 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4855 UNSPEC_LD3))]
4856 "TARGET_SIMD"
4857 {
4858 if (BYTES_BIG_ENDIAN)
4859 {
4860 rtx tmp = gen_reg_rtx (CImode);
4861 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4862 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4863 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4864 }
4865 else
4866 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4867 DONE;
4868 })
4869
4870 (define_insn "aarch64_simd_st3<mode>"
4871 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4872 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4873 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4874 UNSPEC_ST3))]
4875 "TARGET_SIMD"
4876 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4877 [(set_attr "type" "neon_store3_3reg<q>")]
4878 )
4879
4880 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4881 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4882 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4883 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4884 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4885 (match_operand:SI 2 "immediate_operand" "i")]
4886 UNSPEC_ST3_LANE))]
4887 "TARGET_SIMD"
4888 {
4889 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4890 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4891 }
4892 [(set_attr "type" "neon_store3_one_lane<q>")]
4893 )
4894
4895 (define_expand "vec_store_lanesci<mode>"
4896 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4897 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4898 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4899 UNSPEC_ST3))]
4900 "TARGET_SIMD"
4901 {
4902 if (BYTES_BIG_ENDIAN)
4903 {
4904 rtx tmp = gen_reg_rtx (CImode);
4905 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4906 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4907 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4908 }
4909 else
4910 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4911 DONE;
4912 })
4913
4914 (define_insn "aarch64_simd_ld4<mode>"
4915 [(set (match_operand:XI 0 "register_operand" "=w")
4916 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4917 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4918 UNSPEC_LD4))]
4919 "TARGET_SIMD"
4920 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4921 [(set_attr "type" "neon_load4_4reg<q>")]
4922 )
4923
4924 (define_insn "aarch64_simd_ld4r<mode>"
4925 [(set (match_operand:XI 0 "register_operand" "=w")
4926 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4927 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4928 UNSPEC_LD4_DUP))]
4929 "TARGET_SIMD"
4930 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4931 [(set_attr "type" "neon_load4_all_lanes<q>")]
4932 )
4933
4934 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4935 [(set (match_operand:XI 0 "register_operand" "=w")
4936 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4937 (match_operand:XI 2 "register_operand" "0")
4938 (match_operand:SI 3 "immediate_operand" "i")
4939 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4940 UNSPEC_LD4_LANE))]
4941 "TARGET_SIMD"
4942 {
4943 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4944 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4945 }
4946 [(set_attr "type" "neon_load4_one_lane")]
4947 )
4948
4949 (define_expand "vec_load_lanesxi<mode>"
4950 [(set (match_operand:XI 0 "register_operand" "=w")
4951 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4952 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4953 UNSPEC_LD4))]
4954 "TARGET_SIMD"
4955 {
4956 if (BYTES_BIG_ENDIAN)
4957 {
4958 rtx tmp = gen_reg_rtx (XImode);
4959 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4960 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4961 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4962 }
4963 else
4964 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4965 DONE;
4966 })
4967
4968 (define_insn "aarch64_simd_st4<mode>"
4969 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4970 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4971 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4972 UNSPEC_ST4))]
4973 "TARGET_SIMD"
4974 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4975 [(set_attr "type" "neon_store4_4reg<q>")]
4976 )
4977
4978 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4979 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4980 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4981 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4982 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4983 (match_operand:SI 2 "immediate_operand" "i")]
4984 UNSPEC_ST4_LANE))]
4985 "TARGET_SIMD"
4986 {
4987 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4988 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4989 }
4990 [(set_attr "type" "neon_store4_one_lane<q>")]
4991 )
4992
4993 (define_expand "vec_store_lanesxi<mode>"
4994 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4995 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4996 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4997 UNSPEC_ST4))]
4998 "TARGET_SIMD"
4999 {
5000 if (BYTES_BIG_ENDIAN)
5001 {
5002 rtx tmp = gen_reg_rtx (XImode);
5003 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5004 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5005 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5006 }
5007 else
5008 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5009 DONE;
5010 })
5011
5012 (define_insn_and_split "aarch64_rev_reglist<mode>"
5013 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5014 (unspec:VSTRUCT
5015 [(match_operand:VSTRUCT 1 "register_operand" "w")
5016 (match_operand:V16QI 2 "register_operand" "w")]
5017 UNSPEC_REV_REGLIST))]
5018 "TARGET_SIMD"
5019 "#"
5020 "&& reload_completed"
5021 [(const_int 0)]
5022 {
5023 int i;
5024 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5025 for (i = 0; i < nregs; i++)
5026 {
5027 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5028 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5029 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5030 }
5031 DONE;
5032 }
5033 [(set_attr "type" "neon_tbl1_q")
5034 (set_attr "length" "<insn_count>")]
5035 )
5036
5037 ;; Reload patterns for AdvSIMD register list operands.
5038
5039 (define_expand "mov<mode>"
5040 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5041 (match_operand:VSTRUCT 1 "general_operand" ""))]
5042 "TARGET_SIMD"
5043 {
5044 if (can_create_pseudo_p ())
5045 {
5046 if (GET_CODE (operands[0]) != REG)
5047 operands[1] = force_reg (<MODE>mode, operands[1]);
5048 }
5049 })
5050
5051 (define_insn "*aarch64_mov<mode>"
5052 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5053 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5054 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5055 && (register_operand (operands[0], <MODE>mode)
5056 || register_operand (operands[1], <MODE>mode))"
5057 "@
5058 #
5059 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5060 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5061 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5062 neon_load<nregs>_<nregs>reg_q")
5063 (set_attr "length" "<insn_count>,4,4")]
5064 )
5065
5066 (define_insn "aarch64_be_ld1<mode>"
5067 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5068 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5069 "aarch64_simd_struct_operand" "Utv")]
5070 UNSPEC_LD1))]
5071 "TARGET_SIMD"
5072 "ld1\\t{%0<Vmtype>}, %1"
5073 [(set_attr "type" "neon_load1_1reg<q>")]
5074 )
5075
5076 (define_insn "aarch64_be_st1<mode>"
5077 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5078 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5079 UNSPEC_ST1))]
5080 "TARGET_SIMD"
5081 "st1\\t{%1<Vmtype>}, %0"
5082 [(set_attr "type" "neon_store1_1reg<q>")]
5083 )
5084
5085 (define_insn "*aarch64_be_movoi"
5086 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5087 (match_operand:OI 1 "general_operand" " w,w,m"))]
5088 "TARGET_SIMD && BYTES_BIG_ENDIAN
5089 && (register_operand (operands[0], OImode)
5090 || register_operand (operands[1], OImode))"
5091 "@
5092 #
5093 stp\\t%q1, %R1, %0
5094 ldp\\t%q0, %R0, %1"
5095 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5096 (set_attr "length" "8,4,4")]
5097 )
5098
5099 (define_insn "*aarch64_be_movci"
5100 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5101 (match_operand:CI 1 "general_operand" " w,w,o"))]
5102 "TARGET_SIMD && BYTES_BIG_ENDIAN
5103 && (register_operand (operands[0], CImode)
5104 || register_operand (operands[1], CImode))"
5105 "#"
5106 [(set_attr "type" "multiple")
5107 (set_attr "length" "12,4,4")]
5108 )
5109
5110 (define_insn "*aarch64_be_movxi"
5111 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5112 (match_operand:XI 1 "general_operand" " w,w,o"))]
5113 "TARGET_SIMD && BYTES_BIG_ENDIAN
5114 && (register_operand (operands[0], XImode)
5115 || register_operand (operands[1], XImode))"
5116 "#"
5117 [(set_attr "type" "multiple")
5118 (set_attr "length" "16,4,4")]
5119 )
5120
5121 (define_split
5122 [(set (match_operand:OI 0 "register_operand")
5123 (match_operand:OI 1 "register_operand"))]
5124 "TARGET_SIMD && reload_completed"
5125 [(const_int 0)]
5126 {
5127 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5128 DONE;
5129 })
5130
5131 (define_split
5132 [(set (match_operand:CI 0 "nonimmediate_operand")
5133 (match_operand:CI 1 "general_operand"))]
5134 "TARGET_SIMD && reload_completed"
5135 [(const_int 0)]
5136 {
5137 if (register_operand (operands[0], CImode)
5138 && register_operand (operands[1], CImode))
5139 {
5140 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5141 DONE;
5142 }
5143 else if (BYTES_BIG_ENDIAN)
5144 {
5145 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5146 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5147 emit_move_insn (gen_lowpart (V16QImode,
5148 simplify_gen_subreg (TImode, operands[0],
5149 CImode, 32)),
5150 gen_lowpart (V16QImode,
5151 simplify_gen_subreg (TImode, operands[1],
5152 CImode, 32)));
5153 DONE;
5154 }
5155 else
5156 FAIL;
5157 })
5158
5159 (define_split
5160 [(set (match_operand:XI 0 "nonimmediate_operand")
5161 (match_operand:XI 1 "general_operand"))]
5162 "TARGET_SIMD && reload_completed"
5163 [(const_int 0)]
5164 {
5165 if (register_operand (operands[0], XImode)
5166 && register_operand (operands[1], XImode))
5167 {
5168 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5169 DONE;
5170 }
5171 else if (BYTES_BIG_ENDIAN)
5172 {
5173 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5174 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5175 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5176 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5177 DONE;
5178 }
5179 else
5180 FAIL;
5181 })
5182
5183 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5184 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5185 (match_operand:DI 1 "register_operand" "w")
5186 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5187 "TARGET_SIMD"
5188 {
5189 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5190 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5191 * <VSTRUCT:nregs>);
5192
5193 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5194 mem));
5195 DONE;
5196 })
5197
5198 (define_insn "aarch64_ld2<mode>_dreg"
5199 [(set (match_operand:OI 0 "register_operand" "=w")
5200 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5201 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5202 UNSPEC_LD2_DREG))]
5203 "TARGET_SIMD"
5204 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5205 [(set_attr "type" "neon_load2_2reg<q>")]
5206 )
5207
5208 (define_insn "aarch64_ld2<mode>_dreg"
5209 [(set (match_operand:OI 0 "register_operand" "=w")
5210 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5211 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5212 UNSPEC_LD2_DREG))]
5213 "TARGET_SIMD"
5214 "ld1\\t{%S0.1d - %T0.1d}, %1"
5215 [(set_attr "type" "neon_load1_2reg<q>")]
5216 )
5217
5218 (define_insn "aarch64_ld3<mode>_dreg"
5219 [(set (match_operand:CI 0 "register_operand" "=w")
5220 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5221 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5222 UNSPEC_LD3_DREG))]
5223 "TARGET_SIMD"
5224 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5225 [(set_attr "type" "neon_load3_3reg<q>")]
5226 )
5227
5228 (define_insn "aarch64_ld3<mode>_dreg"
5229 [(set (match_operand:CI 0 "register_operand" "=w")
5230 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5231 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5232 UNSPEC_LD3_DREG))]
5233 "TARGET_SIMD"
5234 "ld1\\t{%S0.1d - %U0.1d}, %1"
5235 [(set_attr "type" "neon_load1_3reg<q>")]
5236 )
5237
5238 (define_insn "aarch64_ld4<mode>_dreg"
5239 [(set (match_operand:XI 0 "register_operand" "=w")
5240 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5241 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5242 UNSPEC_LD4_DREG))]
5243 "TARGET_SIMD"
5244 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5245 [(set_attr "type" "neon_load4_4reg<q>")]
5246 )
5247
5248 (define_insn "aarch64_ld4<mode>_dreg"
5249 [(set (match_operand:XI 0 "register_operand" "=w")
5250 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5251 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252 UNSPEC_LD4_DREG))]
5253 "TARGET_SIMD"
5254 "ld1\\t{%S0.1d - %V0.1d}, %1"
5255 [(set_attr "type" "neon_load1_4reg<q>")]
5256 )
5257
5258 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5259 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5260 (match_operand:DI 1 "register_operand" "r")
5261 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5262 "TARGET_SIMD"
5263 {
5264 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5265 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5266
5267 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5268 DONE;
5269 })
5270
5271 (define_expand "aarch64_ld1<VALL_F16:mode>"
5272 [(match_operand:VALL_F16 0 "register_operand")
5273 (match_operand:DI 1 "register_operand")]
5274 "TARGET_SIMD"
5275 {
5276 machine_mode mode = <VALL_F16:MODE>mode;
5277 rtx mem = gen_rtx_MEM (mode, operands[1]);
5278
5279 if (BYTES_BIG_ENDIAN)
5280 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5281 else
5282 emit_move_insn (operands[0], mem);
5283 DONE;
5284 })
5285
5286 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5287 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5288 (match_operand:DI 1 "register_operand" "r")
5289 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5290 "TARGET_SIMD"
5291 {
5292 machine_mode mode = <VSTRUCT:MODE>mode;
5293 rtx mem = gen_rtx_MEM (mode, operands[1]);
5294
5295 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5296 DONE;
5297 })
5298
5299 (define_expand "aarch64_ld1x2<VQ:mode>"
5300 [(match_operand:OI 0 "register_operand" "=w")
5301 (match_operand:DI 1 "register_operand" "r")
5302 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5303 "TARGET_SIMD"
5304 {
5305 machine_mode mode = OImode;
5306 rtx mem = gen_rtx_MEM (mode, operands[1]);
5307
5308 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5309 DONE;
5310 })
5311
5312 (define_expand "aarch64_ld1x2<VDC:mode>"
5313 [(match_operand:OI 0 "register_operand" "=w")
5314 (match_operand:DI 1 "register_operand" "r")
5315 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5316 "TARGET_SIMD"
5317 {
5318 machine_mode mode = OImode;
5319 rtx mem = gen_rtx_MEM (mode, operands[1]);
5320
5321 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5322 DONE;
5323 })
5324
5325
5326 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5327 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5328 (match_operand:DI 1 "register_operand" "w")
5329 (match_operand:VSTRUCT 2 "register_operand" "0")
5330 (match_operand:SI 3 "immediate_operand" "i")
5331 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5332 "TARGET_SIMD"
5333 {
5334 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5335 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5336 * <VSTRUCT:nregs>);
5337
5338 aarch64_simd_lane_bounds (operands[3], 0,
5339 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5340 NULL);
5341 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5342 operands[0], mem, operands[2], operands[3]));
5343 DONE;
5344 })
5345
5346 ;; Expanders for builtins to extract vector registers from large
5347 ;; opaque integer modes.
5348
5349 ;; D-register list.
5350
5351 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5352 [(match_operand:VDC 0 "register_operand" "=w")
5353 (match_operand:VSTRUCT 1 "register_operand" "w")
5354 (match_operand:SI 2 "immediate_operand" "i")]
5355 "TARGET_SIMD"
5356 {
5357 int part = INTVAL (operands[2]);
5358 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5359 int offset = part * 16;
5360
5361 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5362 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5363 DONE;
5364 })
5365
5366 ;; Q-register list.
5367
5368 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5369 [(match_operand:VQ 0 "register_operand" "=w")
5370 (match_operand:VSTRUCT 1 "register_operand" "w")
5371 (match_operand:SI 2 "immediate_operand" "i")]
5372 "TARGET_SIMD"
5373 {
5374 int part = INTVAL (operands[2]);
5375 int offset = part * 16;
5376
5377 emit_move_insn (operands[0],
5378 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5379 DONE;
5380 })
5381
5382 ;; Permuted-store expanders for neon intrinsics.
5383
5384 ;; Permute instructions
5385
5386 ;; vec_perm support
5387
5388 (define_expand "vec_perm<mode>"
5389 [(match_operand:VB 0 "register_operand")
5390 (match_operand:VB 1 "register_operand")
5391 (match_operand:VB 2 "register_operand")
5392 (match_operand:VB 3 "register_operand")]
5393 "TARGET_SIMD"
5394 {
5395 aarch64_expand_vec_perm (operands[0], operands[1],
5396 operands[2], operands[3], <nunits>);
5397 DONE;
5398 })
5399
5400 (define_insn "aarch64_tbl1<mode>"
5401 [(set (match_operand:VB 0 "register_operand" "=w")
5402 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5403 (match_operand:VB 2 "register_operand" "w")]
5404 UNSPEC_TBL))]
5405 "TARGET_SIMD"
5406 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5407 [(set_attr "type" "neon_tbl1<q>")]
5408 )
5409
5410 ;; Two source registers.
5411
5412 (define_insn "aarch64_tbl2v16qi"
5413 [(set (match_operand:V16QI 0 "register_operand" "=w")
5414 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5415 (match_operand:V16QI 2 "register_operand" "w")]
5416 UNSPEC_TBL))]
5417 "TARGET_SIMD"
5418 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5419 [(set_attr "type" "neon_tbl2_q")]
5420 )
5421
5422 (define_insn "aarch64_tbl3<mode>"
5423 [(set (match_operand:VB 0 "register_operand" "=w")
5424 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5425 (match_operand:VB 2 "register_operand" "w")]
5426 UNSPEC_TBL))]
5427 "TARGET_SIMD"
5428 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5429 [(set_attr "type" "neon_tbl3")]
5430 )
5431
5432 (define_insn "aarch64_tbx4<mode>"
5433 [(set (match_operand:VB 0 "register_operand" "=w")
5434 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5435 (match_operand:OI 2 "register_operand" "w")
5436 (match_operand:VB 3 "register_operand" "w")]
5437 UNSPEC_TBX))]
5438 "TARGET_SIMD"
5439 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5440 [(set_attr "type" "neon_tbl4")]
5441 )
5442
5443 ;; Three source registers.
5444
5445 (define_insn "aarch64_qtbl3<mode>"
5446 [(set (match_operand:VB 0 "register_operand" "=w")
5447 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5448 (match_operand:VB 2 "register_operand" "w")]
5449 UNSPEC_TBL))]
5450 "TARGET_SIMD"
5451 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5452 [(set_attr "type" "neon_tbl3")]
5453 )
5454
5455 (define_insn "aarch64_qtbx3<mode>"
5456 [(set (match_operand:VB 0 "register_operand" "=w")
5457 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5458 (match_operand:CI 2 "register_operand" "w")
5459 (match_operand:VB 3 "register_operand" "w")]
5460 UNSPEC_TBX))]
5461 "TARGET_SIMD"
5462 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5463 [(set_attr "type" "neon_tbl3")]
5464 )
5465
5466 ;; Four source registers.
5467
5468 (define_insn "aarch64_qtbl4<mode>"
5469 [(set (match_operand:VB 0 "register_operand" "=w")
5470 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5471 (match_operand:VB 2 "register_operand" "w")]
5472 UNSPEC_TBL))]
5473 "TARGET_SIMD"
5474 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5475 [(set_attr "type" "neon_tbl4")]
5476 )
5477
5478 (define_insn "aarch64_qtbx4<mode>"
5479 [(set (match_operand:VB 0 "register_operand" "=w")
5480 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5481 (match_operand:XI 2 "register_operand" "w")
5482 (match_operand:VB 3 "register_operand" "w")]
5483 UNSPEC_TBX))]
5484 "TARGET_SIMD"
5485 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5486 [(set_attr "type" "neon_tbl4")]
5487 )
5488
5489 (define_insn_and_split "aarch64_combinev16qi"
5490 [(set (match_operand:OI 0 "register_operand" "=w")
5491 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5492 (match_operand:V16QI 2 "register_operand" "w")]
5493 UNSPEC_CONCAT))]
5494 "TARGET_SIMD"
5495 "#"
5496 "&& reload_completed"
5497 [(const_int 0)]
5498 {
5499 aarch64_split_combinev16qi (operands);
5500 DONE;
5501 }
5502 [(set_attr "type" "multiple")]
5503 )
5504
5505 ;; This instruction's pattern is generated directly by
5506 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5507 ;; need corresponding changes there.
5508 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5509 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5510 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5511 (match_operand:VALL_F16 2 "register_operand" "w")]
5512 PERMUTE))]
5513 "TARGET_SIMD"
5514 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5515 [(set_attr "type" "neon_permute<q>")]
5516 )
5517
5518 ;; This instruction's pattern is generated directly by
5519 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5520 ;; need corresponding changes there. Note that the immediate (third)
5521 ;; operand is a lane index not a byte index.
5522 (define_insn "aarch64_ext<mode>"
5523 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5524 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5525 (match_operand:VALL_F16 2 "register_operand" "w")
5526 (match_operand:SI 3 "immediate_operand" "i")]
5527 UNSPEC_EXT))]
5528 "TARGET_SIMD"
5529 {
5530 operands[3] = GEN_INT (INTVAL (operands[3])
5531 * GET_MODE_UNIT_SIZE (<MODE>mode));
5532 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5533 }
5534 [(set_attr "type" "neon_ext<q>")]
5535 )
5536
5537 ;; This instruction's pattern is generated directly by
5538 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5539 ;; need corresponding changes there.
5540 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5541 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5542 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5543 REVERSE))]
5544 "TARGET_SIMD"
5545 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5546 [(set_attr "type" "neon_rev<q>")]
5547 )
5548
5549 (define_insn "aarch64_st2<mode>_dreg"
5550 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5551 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5552 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5553 UNSPEC_ST2))]
5554 "TARGET_SIMD"
5555 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5556 [(set_attr "type" "neon_store2_2reg")]
5557 )
5558
5559 (define_insn "aarch64_st2<mode>_dreg"
5560 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5561 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5562 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5563 UNSPEC_ST2))]
5564 "TARGET_SIMD"
5565 "st1\\t{%S1.1d - %T1.1d}, %0"
5566 [(set_attr "type" "neon_store1_2reg")]
5567 )
5568
5569 (define_insn "aarch64_st3<mode>_dreg"
5570 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5571 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5572 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5573 UNSPEC_ST3))]
5574 "TARGET_SIMD"
5575 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5576 [(set_attr "type" "neon_store3_3reg")]
5577 )
5578
5579 (define_insn "aarch64_st3<mode>_dreg"
5580 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5581 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5582 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5583 UNSPEC_ST3))]
5584 "TARGET_SIMD"
5585 "st1\\t{%S1.1d - %U1.1d}, %0"
5586 [(set_attr "type" "neon_store1_3reg")]
5587 )
5588
5589 (define_insn "aarch64_st4<mode>_dreg"
5590 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5591 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5592 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5593 UNSPEC_ST4))]
5594 "TARGET_SIMD"
5595 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5596 [(set_attr "type" "neon_store4_4reg")]
5597 )
5598
5599 (define_insn "aarch64_st4<mode>_dreg"
5600 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5601 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5602 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5603 UNSPEC_ST4))]
5604 "TARGET_SIMD"
5605 "st1\\t{%S1.1d - %V1.1d}, %0"
5606 [(set_attr "type" "neon_store1_4reg")]
5607 )
5608
5609 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5610 [(match_operand:DI 0 "register_operand" "r")
5611 (match_operand:VSTRUCT 1 "register_operand" "w")
5612 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5613 "TARGET_SIMD"
5614 {
5615 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5616 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5617
5618 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5619 DONE;
5620 })
5621
5622 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5623 [(match_operand:DI 0 "register_operand" "r")
5624 (match_operand:VSTRUCT 1 "register_operand" "w")
5625 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5626 "TARGET_SIMD"
5627 {
5628 machine_mode mode = <VSTRUCT:MODE>mode;
5629 rtx mem = gen_rtx_MEM (mode, operands[0]);
5630
5631 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5632 DONE;
5633 })
5634
5635 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5636 [(match_operand:DI 0 "register_operand" "r")
5637 (match_operand:VSTRUCT 1 "register_operand" "w")
5638 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5639 (match_operand:SI 2 "immediate_operand")]
5640 "TARGET_SIMD"
5641 {
5642 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5643 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5644 * <VSTRUCT:nregs>);
5645
5646 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5647 mem, operands[1], operands[2]));
5648 DONE;
5649 })
5650
5651 (define_expand "aarch64_st1<VALL_F16:mode>"
5652 [(match_operand:DI 0 "register_operand")
5653 (match_operand:VALL_F16 1 "register_operand")]
5654 "TARGET_SIMD"
5655 {
5656 machine_mode mode = <VALL_F16:MODE>mode;
5657 rtx mem = gen_rtx_MEM (mode, operands[0]);
5658
5659 if (BYTES_BIG_ENDIAN)
5660 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5661 else
5662 emit_move_insn (mem, operands[1]);
5663 DONE;
5664 })
5665
5666 ;; Expander for builtins to insert vector registers into large
5667 ;; opaque integer modes.
5668
5669 ;; Q-register list. We don't need a D-reg inserter as we zero
5670 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5671
5672 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5673 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5674 (match_operand:VSTRUCT 1 "register_operand" "0")
5675 (match_operand:VQ 2 "register_operand" "w")
5676 (match_operand:SI 3 "immediate_operand" "i")]
5677 "TARGET_SIMD"
5678 {
5679 int part = INTVAL (operands[3]);
5680 int offset = part * 16;
5681
5682 emit_move_insn (operands[0], operands[1]);
5683 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5684 operands[2]);
5685 DONE;
5686 })
5687
5688 ;; Standard pattern name vec_init<mode><Vel>.
5689
5690 (define_expand "vec_init<mode><Vel>"
5691 [(match_operand:VALL_F16 0 "register_operand" "")
5692 (match_operand 1 "" "")]
5693 "TARGET_SIMD"
5694 {
5695 aarch64_expand_vector_init (operands[0], operands[1]);
5696 DONE;
5697 })
5698
5699 (define_insn "*aarch64_simd_ld1r<mode>"
5700 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5701 (vec_duplicate:VALL_F16
5702 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5703 "TARGET_SIMD"
5704 "ld1r\\t{%0.<Vtype>}, %1"
5705 [(set_attr "type" "neon_load1_all_lanes")]
5706 )
5707
5708 (define_insn "aarch64_simd_ld1<mode>_x2"
5709 [(set (match_operand:OI 0 "register_operand" "=w")
5710 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5711 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5712 UNSPEC_LD1))]
5713 "TARGET_SIMD"
5714 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5715 [(set_attr "type" "neon_load1_2reg<q>")]
5716 )
5717
5718 (define_insn "aarch64_simd_ld1<mode>_x2"
5719 [(set (match_operand:OI 0 "register_operand" "=w")
5720 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5721 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5722 UNSPEC_LD1))]
5723 "TARGET_SIMD"
5724 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5725 [(set_attr "type" "neon_load1_2reg<q>")]
5726 )
5727
5728
5729 (define_insn "aarch64_frecpe<mode>"
5730 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5731 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5732 UNSPEC_FRECPE))]
5733 "TARGET_SIMD"
5734 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5735 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5736 )
5737
5738 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5739 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5740 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5741 FRECP))]
5742 "TARGET_SIMD"
5743 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5744 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5745 )
5746
5747 (define_insn "aarch64_frecps<mode>"
5748 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5749 (unspec:VHSDF_HSDF
5750 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5751 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5752 UNSPEC_FRECPS))]
5753 "TARGET_SIMD"
5754 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5755 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5756 )
5757
5758 (define_insn "aarch64_urecpe<mode>"
5759 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5760 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5761 UNSPEC_URECPE))]
5762 "TARGET_SIMD"
5763 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5764 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5765
5766 ;; Standard pattern name vec_extract<mode><Vel>.
5767
5768 (define_expand "vec_extract<mode><Vel>"
5769 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5770 (match_operand:VALL_F16 1 "register_operand" "")
5771 (match_operand:SI 2 "immediate_operand" "")]
5772 "TARGET_SIMD"
5773 {
5774 emit_insn
5775 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5776 DONE;
5777 })
5778
5779 ;; aes
5780
5781 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5782 [(set (match_operand:V16QI 0 "register_operand" "=w")
5783 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5784 (match_operand:V16QI 2 "register_operand" "w")]
5785 CRYPTO_AES))]
5786 "TARGET_SIMD && TARGET_CRYPTO"
5787 "aes<aes_op>\\t%0.16b, %2.16b"
5788 [(set_attr "type" "crypto_aese")]
5789 )
5790
5791 ;; When AES/AESMC fusion is enabled we want the register allocation to
5792 ;; look like:
5793 ;; AESE Vn, _
5794 ;; AESMC Vn, Vn
5795 ;; So prefer to tie operand 1 to operand 0 when fusing.
5796
5797 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5798 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5799 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5800 CRYPTO_AESMC))]
5801 "TARGET_SIMD && TARGET_CRYPTO"
5802 "aes<aesmc_op>\\t%0.16b, %1.16b"
5803 [(set_attr "type" "crypto_aesmc")
5804 (set_attr_alternative "enabled"
5805 [(if_then_else (match_test
5806 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5807 (const_string "yes" )
5808 (const_string "no"))
5809 (const_string "yes")])]
5810 )
5811
5812 ;; sha1
5813
5814 (define_insn "aarch64_crypto_sha1hsi"
5815 [(set (match_operand:SI 0 "register_operand" "=w")
5816 (unspec:SI [(match_operand:SI 1
5817 "register_operand" "w")]
5818 UNSPEC_SHA1H))]
5819 "TARGET_SIMD && TARGET_CRYPTO"
5820 "sha1h\\t%s0, %s1"
5821 [(set_attr "type" "crypto_sha1_fast")]
5822 )
5823
5824 (define_insn "aarch64_crypto_sha1hv4si"
5825 [(set (match_operand:SI 0 "register_operand" "=w")
5826 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5827 (parallel [(const_int 0)]))]
5828 UNSPEC_SHA1H))]
5829 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5830 "sha1h\\t%s0, %s1"
5831 [(set_attr "type" "crypto_sha1_fast")]
5832 )
5833
5834 (define_insn "aarch64_be_crypto_sha1hv4si"
5835 [(set (match_operand:SI 0 "register_operand" "=w")
5836 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5837 (parallel [(const_int 3)]))]
5838 UNSPEC_SHA1H))]
5839 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5840 "sha1h\\t%s0, %s1"
5841 [(set_attr "type" "crypto_sha1_fast")]
5842 )
5843
5844 (define_insn "aarch64_crypto_sha1su1v4si"
5845 [(set (match_operand:V4SI 0 "register_operand" "=w")
5846 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5847 (match_operand:V4SI 2 "register_operand" "w")]
5848 UNSPEC_SHA1SU1))]
5849 "TARGET_SIMD && TARGET_CRYPTO"
5850 "sha1su1\\t%0.4s, %2.4s"
5851 [(set_attr "type" "crypto_sha1_fast")]
5852 )
5853
5854 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5855 [(set (match_operand:V4SI 0 "register_operand" "=w")
5856 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5857 (match_operand:SI 2 "register_operand" "w")
5858 (match_operand:V4SI 3 "register_operand" "w")]
5859 CRYPTO_SHA1))]
5860 "TARGET_SIMD && TARGET_CRYPTO"
5861 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5862 [(set_attr "type" "crypto_sha1_slow")]
5863 )
5864
5865 (define_insn "aarch64_crypto_sha1su0v4si"
5866 [(set (match_operand:V4SI 0 "register_operand" "=w")
5867 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5868 (match_operand:V4SI 2 "register_operand" "w")
5869 (match_operand:V4SI 3 "register_operand" "w")]
5870 UNSPEC_SHA1SU0))]
5871 "TARGET_SIMD && TARGET_CRYPTO"
5872 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5873 [(set_attr "type" "crypto_sha1_xor")]
5874 )
5875
5876 ;; sha256
5877
5878 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5879 [(set (match_operand:V4SI 0 "register_operand" "=w")
5880 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5881 (match_operand:V4SI 2 "register_operand" "w")
5882 (match_operand:V4SI 3 "register_operand" "w")]
5883 CRYPTO_SHA256))]
5884 "TARGET_SIMD && TARGET_CRYPTO"
5885 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5886 [(set_attr "type" "crypto_sha256_slow")]
5887 )
5888
5889 (define_insn "aarch64_crypto_sha256su0v4si"
5890 [(set (match_operand:V4SI 0 "register_operand" "=w")
5891 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5892 (match_operand:V4SI 2 "register_operand" "w")]
5893 UNSPEC_SHA256SU0))]
5894 "TARGET_SIMD &&TARGET_CRYPTO"
5895 "sha256su0\\t%0.4s, %2.4s"
5896 [(set_attr "type" "crypto_sha256_fast")]
5897 )
5898
5899 (define_insn "aarch64_crypto_sha256su1v4si"
5900 [(set (match_operand:V4SI 0 "register_operand" "=w")
5901 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5902 (match_operand:V4SI 2 "register_operand" "w")
5903 (match_operand:V4SI 3 "register_operand" "w")]
5904 UNSPEC_SHA256SU1))]
5905 "TARGET_SIMD &&TARGET_CRYPTO"
5906 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5907 [(set_attr "type" "crypto_sha256_slow")]
5908 )
5909
5910 ;; pmull
5911
5912 (define_insn "aarch64_crypto_pmulldi"
5913 [(set (match_operand:TI 0 "register_operand" "=w")
5914 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5915 (match_operand:DI 2 "register_operand" "w")]
5916 UNSPEC_PMULL))]
5917 "TARGET_SIMD && TARGET_CRYPTO"
5918 "pmull\\t%0.1q, %1.1d, %2.1d"
5919 [(set_attr "type" "crypto_pmull")]
5920 )
5921
5922 (define_insn "aarch64_crypto_pmullv2di"
5923 [(set (match_operand:TI 0 "register_operand" "=w")
5924 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5925 (match_operand:V2DI 2 "register_operand" "w")]
5926 UNSPEC_PMULL2))]
5927 "TARGET_SIMD && TARGET_CRYPTO"
5928 "pmull2\\t%0.1q, %1.2d, %2.2d"
5929 [(set_attr "type" "crypto_pmull")]
5930 )