]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
[AArch64] Pass number of units to aarch64_simd_vect_par_cnst_half
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((GET_MODE_SIZE (<MODE>mode) == 16
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || GET_MODE_SIZE (<MODE>mode) == 8)))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1],
124 <MODE>mode, 64);
125 default: gcc_unreachable ();
126 }
127 }
128 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
129 neon_logic<q>, neon_to_gp<q>, f_mcr,\
130 mov_reg, neon_move<q>")]
131 )
132
133 (define_insn "*aarch64_simd_mov<VQ:mode>"
134 [(set (match_operand:VQ 0 "nonimmediate_operand"
135 "=w, Umq, m, w, ?r, ?w, ?r, w")
136 (match_operand:VQ 1 "general_operand"
137 "m, Dz, w, w, w, r, r, Dn"))]
138 "TARGET_SIMD
139 && (register_operand (operands[0], <MODE>mode)
140 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 {
142 switch (which_alternative)
143 {
144 case 0:
145 return "ldr\t%q0, %1";
146 case 1:
147 return "stp\txzr, xzr, %0";
148 case 2:
149 return "str\t%q1, %0";
150 case 3:
151 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
152 case 4:
153 case 5:
154 case 6:
155 return "#";
156 case 7:
157 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
158 default:
159 gcc_unreachable ();
160 }
161 }
162 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
163 neon_logic<q>, multiple, multiple,\
164 multiple, neon_move<q>")
165 (set_attr "length" "4,4,4,4,8,8,8,4")]
166 )
167
168 ;; When storing lane zero we can use the normal STR and its more permissive
169 ;; addressing modes.
170
171 (define_insn "aarch64_store_lane0<mode>"
172 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
173 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
174 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 "TARGET_SIMD
176 && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
177 "str\\t%<Vetype>1, %0"
178 [(set_attr "type" "neon_store1_1reg<q>")]
179 )
180
181 (define_insn "load_pair<mode>"
182 [(set (match_operand:VD 0 "register_operand" "=w")
183 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
184 (set (match_operand:VD 2 "register_operand" "=w")
185 (match_operand:VD 3 "memory_operand" "m"))]
186 "TARGET_SIMD
187 && rtx_equal_p (XEXP (operands[3], 0),
188 plus_constant (Pmode,
189 XEXP (operands[1], 0),
190 GET_MODE_SIZE (<MODE>mode)))"
191 "ldp\\t%d0, %d2, %1"
192 [(set_attr "type" "neon_ldp")]
193 )
194
195 (define_insn "store_pair<mode>"
196 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
197 (match_operand:VD 1 "register_operand" "w"))
198 (set (match_operand:VD 2 "memory_operand" "=m")
199 (match_operand:VD 3 "register_operand" "w"))]
200 "TARGET_SIMD
201 && rtx_equal_p (XEXP (operands[2], 0),
202 plus_constant (Pmode,
203 XEXP (operands[0], 0),
204 GET_MODE_SIZE (<MODE>mode)))"
205 "stp\\t%d1, %d3, %0"
206 [(set_attr "type" "neon_stp")]
207 )
208
209 (define_split
210 [(set (match_operand:VQ 0 "register_operand" "")
211 (match_operand:VQ 1 "register_operand" ""))]
212 "TARGET_SIMD && reload_completed
213 && GP_REGNUM_P (REGNO (operands[0]))
214 && GP_REGNUM_P (REGNO (operands[1]))"
215 [(const_int 0)]
216 {
217 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
218 DONE;
219 })
220
221 (define_split
222 [(set (match_operand:VQ 0 "register_operand" "")
223 (match_operand:VQ 1 "register_operand" ""))]
224 "TARGET_SIMD && reload_completed
225 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
226 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
227 [(const_int 0)]
228 {
229 aarch64_split_simd_move (operands[0], operands[1]);
230 DONE;
231 })
232
233 (define_expand "aarch64_split_simd_mov<mode>"
234 [(set (match_operand:VQ 0)
235 (match_operand:VQ 1))]
236 "TARGET_SIMD"
237 {
238 rtx dst = operands[0];
239 rtx src = operands[1];
240
241 if (GP_REGNUM_P (REGNO (src)))
242 {
243 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
244 rtx src_high_part = gen_highpart (<VHALF>mode, src);
245
246 emit_insn
247 (gen_move_lo_quad_<mode> (dst, src_low_part));
248 emit_insn
249 (gen_move_hi_quad_<mode> (dst, src_high_part));
250 }
251
252 else
253 {
254 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
255 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
256 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
257 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
258
259 emit_insn
260 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
261 emit_insn
262 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
263 }
264 DONE;
265 }
266 )
267
268 (define_insn "aarch64_simd_mov_from_<mode>low"
269 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
270 (vec_select:<VHALF>
271 (match_operand:VQ 1 "register_operand" "w")
272 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
273 "TARGET_SIMD && reload_completed"
274 "umov\t%0, %1.d[0]"
275 [(set_attr "type" "neon_to_gp<q>")
276 (set_attr "length" "4")
277 ])
278
279 (define_insn "aarch64_simd_mov_from_<mode>high"
280 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
281 (vec_select:<VHALF>
282 (match_operand:VQ 1 "register_operand" "w")
283 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
284 "TARGET_SIMD && reload_completed"
285 "umov\t%0, %1.d[1]"
286 [(set_attr "type" "neon_to_gp<q>")
287 (set_attr "length" "4")
288 ])
289
290 (define_insn "orn<mode>3"
291 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
292 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
293 (match_operand:VDQ_I 2 "register_operand" "w")))]
294 "TARGET_SIMD"
295 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
296 [(set_attr "type" "neon_logic<q>")]
297 )
298
299 (define_insn "bic<mode>3"
300 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
301 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
302 (match_operand:VDQ_I 2 "register_operand" "w")))]
303 "TARGET_SIMD"
304 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
305 [(set_attr "type" "neon_logic<q>")]
306 )
307
308 (define_insn "add<mode>3"
309 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
310 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
311 (match_operand:VDQ_I 2 "register_operand" "w")))]
312 "TARGET_SIMD"
313 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
314 [(set_attr "type" "neon_add<q>")]
315 )
316
317 (define_insn "sub<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
323 [(set_attr "type" "neon_sub<q>")]
324 )
325
326 (define_insn "mul<mode>3"
327 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
328 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
329 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
332 [(set_attr "type" "neon_mul_<Vetype><q>")]
333 )
334
335 (define_insn "bswap<mode>2"
336 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
337 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
338 "TARGET_SIMD"
339 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
340 [(set_attr "type" "neon_rev<q>")]
341 )
342
343 (define_insn "aarch64_rbit<mode>"
344 [(set (match_operand:VB 0 "register_operand" "=w")
345 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
346 UNSPEC_RBIT))]
347 "TARGET_SIMD"
348 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
349 [(set_attr "type" "neon_rbit")]
350 )
351
352 (define_expand "ctz<mode>2"
353 [(set (match_operand:VS 0 "register_operand")
354 (ctz:VS (match_operand:VS 1 "register_operand")))]
355 "TARGET_SIMD"
356 {
357 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
358 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
359 <MODE>mode, 0);
360 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
361 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
362 DONE;
363 }
364 )
365
366 (define_expand "xorsign<mode>3"
367 [(match_operand:VHSDF 0 "register_operand")
368 (match_operand:VHSDF 1 "register_operand")
369 (match_operand:VHSDF 2 "register_operand")]
370 "TARGET_SIMD"
371 {
372
373 machine_mode imode = <V_INT_EQUIV>mode;
374 rtx v_bitmask = gen_reg_rtx (imode);
375 rtx op1x = gen_reg_rtx (imode);
376 rtx op2x = gen_reg_rtx (imode);
377
378 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
379 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
380
381 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
382
383 emit_move_insn (v_bitmask,
384 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
385 HOST_WIDE_INT_M1U << bits));
386
387 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
388 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
389 emit_move_insn (operands[0],
390 lowpart_subreg (<MODE>mode, op1x, imode));
391 DONE;
392 }
393 )
394
395 ;; These instructions map to the __builtins for the Dot Product operations.
396 (define_insn "aarch64_<sur>dot<vsi2qi>"
397 [(set (match_operand:VS 0 "register_operand" "=w")
398 (plus:VS (match_operand:VS 1 "register_operand" "0")
399 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
400 (match_operand:<VSI2QI> 3 "register_operand" "w")]
401 DOTPROD)))]
402 "TARGET_DOTPROD"
403 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
404 [(set_attr "type" "neon_dot")]
405 )
406
407 ;; These expands map to the Dot Product optab the vectorizer checks for.
408 ;; The auto-vectorizer expects a dot product builtin that also does an
409 ;; accumulation into the provided register.
410 ;; Given the following pattern
411 ;;
412 ;; for (i=0; i<len; i++) {
413 ;; c = a[i] * b[i];
414 ;; r += c;
415 ;; }
416 ;; return result;
417 ;;
418 ;; This can be auto-vectorized to
419 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
420 ;;
421 ;; given enough iterations. However the vectorizer can keep unrolling the loop
422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
424 ;; ...
425 ;;
426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
427 (define_expand "<sur>dot_prod<vsi2qi>"
428 [(set (match_operand:VS 0 "register_operand")
429 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
430 (match_operand:<VSI2QI> 2 "register_operand")]
431 DOTPROD)
432 (match_operand:VS 3 "register_operand")))]
433 "TARGET_DOTPROD"
434 {
435 emit_insn (
436 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
437 operands[2]));
438 emit_insn (gen_rtx_SET (operands[0], operands[3]));
439 DONE;
440 })
441
442 ;; These instructions map to the __builtins for the Dot Product
443 ;; indexed operations.
444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
445 [(set (match_operand:VS 0 "register_operand" "=w")
446 (plus:VS (match_operand:VS 1 "register_operand" "0")
447 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
448 (match_operand:V8QI 3 "register_operand" "<h_con>")
449 (match_operand:SI 4 "immediate_operand" "i")]
450 DOTPROD)))]
451 "TARGET_DOTPROD"
452 {
453 operands[4]
454 = GEN_INT (ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
455 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
456 }
457 [(set_attr "type" "neon_dot")]
458 )
459
460 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
461 [(set (match_operand:VS 0 "register_operand" "=w")
462 (plus:VS (match_operand:VS 1 "register_operand" "0")
463 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
464 (match_operand:V16QI 3 "register_operand" "<h_con>")
465 (match_operand:SI 4 "immediate_operand" "i")]
466 DOTPROD)))]
467 "TARGET_DOTPROD"
468 {
469 operands[4]
470 = GEN_INT (ENDIAN_LANE_N (V16QImode, INTVAL (operands[4])));
471 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
472 }
473 [(set_attr "type" "neon_dot")]
474 )
475
476 (define_expand "copysign<mode>3"
477 [(match_operand:VHSDF 0 "register_operand")
478 (match_operand:VHSDF 1 "register_operand")
479 (match_operand:VHSDF 2 "register_operand")]
480 "TARGET_FLOAT && TARGET_SIMD"
481 {
482 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
483 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
484
485 emit_move_insn (v_bitmask,
486 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
487 HOST_WIDE_INT_M1U << bits));
488 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
489 operands[2], operands[1]));
490 DONE;
491 }
492 )
493
494 (define_insn "*aarch64_mul3_elt<mode>"
495 [(set (match_operand:VMUL 0 "register_operand" "=w")
496 (mult:VMUL
497 (vec_duplicate:VMUL
498 (vec_select:<VEL>
499 (match_operand:VMUL 1 "register_operand" "<h_con>")
500 (parallel [(match_operand:SI 2 "immediate_operand")])))
501 (match_operand:VMUL 3 "register_operand" "w")))]
502 "TARGET_SIMD"
503 {
504 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
505 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
506 }
507 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
508 )
509
510 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
511 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
512 (mult:VMUL_CHANGE_NLANES
513 (vec_duplicate:VMUL_CHANGE_NLANES
514 (vec_select:<VEL>
515 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
516 (parallel [(match_operand:SI 2 "immediate_operand")])))
517 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
518 "TARGET_SIMD"
519 {
520 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
521 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
522 }
523 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
524 )
525
526 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
527 [(set (match_operand:VMUL 0 "register_operand" "=w")
528 (mult:VMUL
529 (vec_duplicate:VMUL
530 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
531 (match_operand:VMUL 2 "register_operand" "w")))]
532 "TARGET_SIMD"
533 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
534 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
535 )
536
537 (define_insn "aarch64_rsqrte<mode>"
538 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
539 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
540 UNSPEC_RSQRTE))]
541 "TARGET_SIMD"
542 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
543 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
544
545 (define_insn "aarch64_rsqrts<mode>"
546 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
547 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
548 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
549 UNSPEC_RSQRTS))]
550 "TARGET_SIMD"
551 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
552 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
553
554 (define_expand "rsqrt<mode>2"
555 [(set (match_operand:VALLF 0 "register_operand" "=w")
556 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
557 UNSPEC_RSQRT))]
558 "TARGET_SIMD"
559 {
560 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
561 DONE;
562 })
563
564 (define_insn "*aarch64_mul3_elt_to_64v2df"
565 [(set (match_operand:DF 0 "register_operand" "=w")
566 (mult:DF
567 (vec_select:DF
568 (match_operand:V2DF 1 "register_operand" "w")
569 (parallel [(match_operand:SI 2 "immediate_operand")]))
570 (match_operand:DF 3 "register_operand" "w")))]
571 "TARGET_SIMD"
572 {
573 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
574 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
575 }
576 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
577 )
578
579 (define_insn "neg<mode>2"
580 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
581 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
582 "TARGET_SIMD"
583 "neg\t%0.<Vtype>, %1.<Vtype>"
584 [(set_attr "type" "neon_neg<q>")]
585 )
586
587 (define_insn "abs<mode>2"
588 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
589 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
590 "TARGET_SIMD"
591 "abs\t%0.<Vtype>, %1.<Vtype>"
592 [(set_attr "type" "neon_abs<q>")]
593 )
594
595 ;; The intrinsic version of integer ABS must not be allowed to
596 ;; combine with any operation with an integerated ABS step, such
597 ;; as SABD.
598 (define_insn "aarch64_abs<mode>"
599 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
600 (unspec:VSDQ_I_DI
601 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
602 UNSPEC_ABS))]
603 "TARGET_SIMD"
604 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
605 [(set_attr "type" "neon_abs<q>")]
606 )
607
608 (define_insn "abd<mode>_3"
609 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
610 (abs:VDQ_BHSI (minus:VDQ_BHSI
611 (match_operand:VDQ_BHSI 1 "register_operand" "w")
612 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
613 "TARGET_SIMD"
614 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
615 [(set_attr "type" "neon_abd<q>")]
616 )
617
618 (define_insn "aba<mode>_3"
619 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
620 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
621 (match_operand:VDQ_BHSI 1 "register_operand" "w")
622 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
623 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
624 "TARGET_SIMD"
625 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
626 [(set_attr "type" "neon_arith_acc<q>")]
627 )
628
629 (define_insn "fabd<mode>3"
630 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
631 (abs:VHSDF_HSDF
632 (minus:VHSDF_HSDF
633 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
634 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
635 "TARGET_SIMD"
636 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
637 [(set_attr "type" "neon_fp_abd_<stype><q>")]
638 )
639
640 ;; For AND (vector, register) and BIC (vector, immediate)
641 (define_insn "and<mode>3"
642 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
643 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
644 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
645 "TARGET_SIMD"
646 {
647 switch (which_alternative)
648 {
649 case 0:
650 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
651 case 1:
652 return aarch64_output_simd_mov_immediate (operands[2],
653 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
654 default:
655 gcc_unreachable ();
656 }
657 }
658 [(set_attr "type" "neon_logic<q>")]
659 )
660
661 ;; For ORR (vector, register) and ORR (vector, immediate)
662 (define_insn "ior<mode>3"
663 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
664 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
665 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
666 "TARGET_SIMD"
667 {
668 switch (which_alternative)
669 {
670 case 0:
671 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
672 case 1:
673 return aarch64_output_simd_mov_immediate (operands[2],
674 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
675 default:
676 gcc_unreachable ();
677 }
678 }
679 [(set_attr "type" "neon_logic<q>")]
680 )
681
682 (define_insn "xor<mode>3"
683 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
684 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
685 (match_operand:VDQ_I 2 "register_operand" "w")))]
686 "TARGET_SIMD"
687 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
688 [(set_attr "type" "neon_logic<q>")]
689 )
690
691 (define_insn "one_cmpl<mode>2"
692 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
693 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
694 "TARGET_SIMD"
695 "not\t%0.<Vbtype>, %1.<Vbtype>"
696 [(set_attr "type" "neon_logic<q>")]
697 )
698
699 (define_insn "aarch64_simd_vec_set<mode>"
700 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
701 (vec_merge:VDQ_BHSI
702 (vec_duplicate:VDQ_BHSI
703 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
704 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
705 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
706 "TARGET_SIMD"
707 {
708 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
709 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
710 switch (which_alternative)
711 {
712 case 0:
713 return "ins\\t%0.<Vetype>[%p2], %w1";
714 case 1:
715 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
716 case 2:
717 return "ld1\\t{%0.<Vetype>}[%p2], %1";
718 default:
719 gcc_unreachable ();
720 }
721 }
722 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
723 )
724
725 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
726 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
727 (vec_merge:VALL_F16
728 (vec_duplicate:VALL_F16
729 (vec_select:<VEL>
730 (match_operand:VALL_F16 3 "register_operand" "w")
731 (parallel
732 [(match_operand:SI 4 "immediate_operand" "i")])))
733 (match_operand:VALL_F16 1 "register_operand" "0")
734 (match_operand:SI 2 "immediate_operand" "i")))]
735 "TARGET_SIMD"
736 {
737 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
738 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
739 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
740
741 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
742 }
743 [(set_attr "type" "neon_ins<q>")]
744 )
745
746 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
747 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
748 (vec_merge:VALL_F16_NO_V2Q
749 (vec_duplicate:VALL_F16_NO_V2Q
750 (vec_select:<VEL>
751 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
752 (parallel
753 [(match_operand:SI 4 "immediate_operand" "i")])))
754 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
755 (match_operand:SI 2 "immediate_operand" "i")))]
756 "TARGET_SIMD"
757 {
758 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
759 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
760 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
761 INTVAL (operands[4])));
762
763 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
764 }
765 [(set_attr "type" "neon_ins<q>")]
766 )
767
768 (define_insn "aarch64_simd_lshr<mode>"
769 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
772 "TARGET_SIMD"
773 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
774 [(set_attr "type" "neon_shift_imm<q>")]
775 )
776
777 (define_insn "aarch64_simd_ashr<mode>"
778 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
780 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
781 "TARGET_SIMD"
782 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
783 [(set_attr "type" "neon_shift_imm<q>")]
784 )
785
786 (define_insn "aarch64_simd_imm_shl<mode>"
787 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
788 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
789 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
790 "TARGET_SIMD"
791 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
792 [(set_attr "type" "neon_shift_imm<q>")]
793 )
794
795 (define_insn "aarch64_simd_reg_sshl<mode>"
796 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
797 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
798 (match_operand:VDQ_I 2 "register_operand" "w")))]
799 "TARGET_SIMD"
800 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
801 [(set_attr "type" "neon_shift_reg<q>")]
802 )
803
804 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
805 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
806 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
807 (match_operand:VDQ_I 2 "register_operand" "w")]
808 UNSPEC_ASHIFT_UNSIGNED))]
809 "TARGET_SIMD"
810 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
811 [(set_attr "type" "neon_shift_reg<q>")]
812 )
813
814 (define_insn "aarch64_simd_reg_shl<mode>_signed"
815 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
816 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
817 (match_operand:VDQ_I 2 "register_operand" "w")]
818 UNSPEC_ASHIFT_SIGNED))]
819 "TARGET_SIMD"
820 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
821 [(set_attr "type" "neon_shift_reg<q>")]
822 )
823
824 (define_expand "ashl<mode>3"
825 [(match_operand:VDQ_I 0 "register_operand" "")
826 (match_operand:VDQ_I 1 "register_operand" "")
827 (match_operand:SI 2 "general_operand" "")]
828 "TARGET_SIMD"
829 {
830 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
831 int shift_amount;
832
833 if (CONST_INT_P (operands[2]))
834 {
835 shift_amount = INTVAL (operands[2]);
836 if (shift_amount >= 0 && shift_amount < bit_width)
837 {
838 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
839 shift_amount);
840 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
841 operands[1],
842 tmp));
843 DONE;
844 }
845 else
846 {
847 operands[2] = force_reg (SImode, operands[2]);
848 }
849 }
850 else if (MEM_P (operands[2]))
851 {
852 operands[2] = force_reg (SImode, operands[2]);
853 }
854
855 if (REG_P (operands[2]))
856 {
857 rtx tmp = gen_reg_rtx (<MODE>mode);
858 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
859 convert_to_mode (<VEL>mode,
860 operands[2],
861 0)));
862 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
863 tmp));
864 DONE;
865 }
866 else
867 FAIL;
868 }
869 )
870
871 (define_expand "lshr<mode>3"
872 [(match_operand:VDQ_I 0 "register_operand" "")
873 (match_operand:VDQ_I 1 "register_operand" "")
874 (match_operand:SI 2 "general_operand" "")]
875 "TARGET_SIMD"
876 {
877 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
878 int shift_amount;
879
880 if (CONST_INT_P (operands[2]))
881 {
882 shift_amount = INTVAL (operands[2]);
883 if (shift_amount > 0 && shift_amount <= bit_width)
884 {
885 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
886 shift_amount);
887 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
888 operands[1],
889 tmp));
890 DONE;
891 }
892 else
893 operands[2] = force_reg (SImode, operands[2]);
894 }
895 else if (MEM_P (operands[2]))
896 {
897 operands[2] = force_reg (SImode, operands[2]);
898 }
899
900 if (REG_P (operands[2]))
901 {
902 rtx tmp = gen_reg_rtx (SImode);
903 rtx tmp1 = gen_reg_rtx (<MODE>mode);
904 emit_insn (gen_negsi2 (tmp, operands[2]));
905 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
906 convert_to_mode (<VEL>mode,
907 tmp, 0)));
908 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
909 operands[1],
910 tmp1));
911 DONE;
912 }
913 else
914 FAIL;
915 }
916 )
917
918 (define_expand "ashr<mode>3"
919 [(match_operand:VDQ_I 0 "register_operand" "")
920 (match_operand:VDQ_I 1 "register_operand" "")
921 (match_operand:SI 2 "general_operand" "")]
922 "TARGET_SIMD"
923 {
924 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
925 int shift_amount;
926
927 if (CONST_INT_P (operands[2]))
928 {
929 shift_amount = INTVAL (operands[2]);
930 if (shift_amount > 0 && shift_amount <= bit_width)
931 {
932 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
933 shift_amount);
934 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
935 operands[1],
936 tmp));
937 DONE;
938 }
939 else
940 operands[2] = force_reg (SImode, operands[2]);
941 }
942 else if (MEM_P (operands[2]))
943 {
944 operands[2] = force_reg (SImode, operands[2]);
945 }
946
947 if (REG_P (operands[2]))
948 {
949 rtx tmp = gen_reg_rtx (SImode);
950 rtx tmp1 = gen_reg_rtx (<MODE>mode);
951 emit_insn (gen_negsi2 (tmp, operands[2]));
952 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
953 convert_to_mode (<VEL>mode,
954 tmp, 0)));
955 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
956 operands[1],
957 tmp1));
958 DONE;
959 }
960 else
961 FAIL;
962 }
963 )
964
965 (define_expand "vashl<mode>3"
966 [(match_operand:VDQ_I 0 "register_operand" "")
967 (match_operand:VDQ_I 1 "register_operand" "")
968 (match_operand:VDQ_I 2 "register_operand" "")]
969 "TARGET_SIMD"
970 {
971 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
972 operands[2]));
973 DONE;
974 })
975
976 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
977 ;; Negating individual lanes most certainly offsets the
978 ;; gain from vectorization.
979 (define_expand "vashr<mode>3"
980 [(match_operand:VDQ_BHSI 0 "register_operand" "")
981 (match_operand:VDQ_BHSI 1 "register_operand" "")
982 (match_operand:VDQ_BHSI 2 "register_operand" "")]
983 "TARGET_SIMD"
984 {
985 rtx neg = gen_reg_rtx (<MODE>mode);
986 emit (gen_neg<mode>2 (neg, operands[2]));
987 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
988 neg));
989 DONE;
990 })
991
992 ;; DI vector shift
993 (define_expand "aarch64_ashr_simddi"
994 [(match_operand:DI 0 "register_operand" "=w")
995 (match_operand:DI 1 "register_operand" "w")
996 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
997 "TARGET_SIMD"
998 {
999 /* An arithmetic shift right by 64 fills the result with copies of the sign
1000 bit, just like asr by 63 - however the standard pattern does not handle
1001 a shift by 64. */
1002 if (INTVAL (operands[2]) == 64)
1003 operands[2] = GEN_INT (63);
1004 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1005 DONE;
1006 }
1007 )
1008
1009 (define_expand "vlshr<mode>3"
1010 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1011 (match_operand:VDQ_BHSI 1 "register_operand" "")
1012 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1013 "TARGET_SIMD"
1014 {
1015 rtx neg = gen_reg_rtx (<MODE>mode);
1016 emit (gen_neg<mode>2 (neg, operands[2]));
1017 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1018 neg));
1019 DONE;
1020 })
1021
1022 (define_expand "aarch64_lshr_simddi"
1023 [(match_operand:DI 0 "register_operand" "=w")
1024 (match_operand:DI 1 "register_operand" "w")
1025 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1026 "TARGET_SIMD"
1027 {
1028 if (INTVAL (operands[2]) == 64)
1029 emit_move_insn (operands[0], const0_rtx);
1030 else
1031 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1032 DONE;
1033 }
1034 )
1035
1036 (define_expand "vec_set<mode>"
1037 [(match_operand:VDQ_BHSI 0 "register_operand")
1038 (match_operand:<VEL> 1 "register_operand")
1039 (match_operand:SI 2 "immediate_operand")]
1040 "TARGET_SIMD"
1041 {
1042 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1043 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1044 GEN_INT (elem), operands[0]));
1045 DONE;
1046 }
1047 )
1048
1049 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1050 (define_insn "vec_shr_<mode>"
1051 [(set (match_operand:VD 0 "register_operand" "=w")
1052 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1053 (match_operand:SI 2 "immediate_operand" "i")]
1054 UNSPEC_VEC_SHR))]
1055 "TARGET_SIMD"
1056 {
1057 if (BYTES_BIG_ENDIAN)
1058 return "shl %d0, %d1, %2";
1059 else
1060 return "ushr %d0, %d1, %2";
1061 }
1062 [(set_attr "type" "neon_shift_imm")]
1063 )
1064
1065 (define_insn "aarch64_simd_vec_setv2di"
1066 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1067 (vec_merge:V2DI
1068 (vec_duplicate:V2DI
1069 (match_operand:DI 1 "register_operand" "r,w"))
1070 (match_operand:V2DI 3 "register_operand" "0,0")
1071 (match_operand:SI 2 "immediate_operand" "i,i")))]
1072 "TARGET_SIMD"
1073 {
1074 int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1075 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1076 switch (which_alternative)
1077 {
1078 case 0:
1079 return "ins\\t%0.d[%p2], %1";
1080 case 1:
1081 return "ins\\t%0.d[%p2], %1.d[0]";
1082 default:
1083 gcc_unreachable ();
1084 }
1085 }
1086 [(set_attr "type" "neon_from_gp, neon_ins_q")]
1087 )
1088
1089 (define_expand "vec_setv2di"
1090 [(match_operand:V2DI 0 "register_operand")
1091 (match_operand:DI 1 "register_operand")
1092 (match_operand:SI 2 "immediate_operand")]
1093 "TARGET_SIMD"
1094 {
1095 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1096 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1097 GEN_INT (elem), operands[0]));
1098 DONE;
1099 }
1100 )
1101
1102 (define_insn "aarch64_simd_vec_set<mode>"
1103 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1104 (vec_merge:VDQF_F16
1105 (vec_duplicate:VDQF_F16
1106 (match_operand:<VEL> 1 "register_operand" "w"))
1107 (match_operand:VDQF_F16 3 "register_operand" "0")
1108 (match_operand:SI 2 "immediate_operand" "i")))]
1109 "TARGET_SIMD"
1110 {
1111 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1112
1113 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1114 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1115 }
1116 [(set_attr "type" "neon_ins<q>")]
1117 )
1118
1119 (define_expand "vec_set<mode>"
1120 [(match_operand:VDQF_F16 0 "register_operand" "+w")
1121 (match_operand:<VEL> 1 "register_operand" "w")
1122 (match_operand:SI 2 "immediate_operand" "")]
1123 "TARGET_SIMD"
1124 {
1125 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1126 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1127 GEN_INT (elem), operands[0]));
1128 DONE;
1129 }
1130 )
1131
1132
1133 (define_insn "aarch64_mla<mode>"
1134 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1135 (plus:VDQ_BHSI (mult:VDQ_BHSI
1136 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1137 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1138 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1139 "TARGET_SIMD"
1140 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1141 [(set_attr "type" "neon_mla_<Vetype><q>")]
1142 )
1143
1144 (define_insn "*aarch64_mla_elt<mode>"
1145 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1146 (plus:VDQHS
1147 (mult:VDQHS
1148 (vec_duplicate:VDQHS
1149 (vec_select:<VEL>
1150 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1151 (parallel [(match_operand:SI 2 "immediate_operand")])))
1152 (match_operand:VDQHS 3 "register_operand" "w"))
1153 (match_operand:VDQHS 4 "register_operand" "0")))]
1154 "TARGET_SIMD"
1155 {
1156 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1157 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1158 }
1159 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1160 )
1161
1162 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1163 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1164 (plus:VDQHS
1165 (mult:VDQHS
1166 (vec_duplicate:VDQHS
1167 (vec_select:<VEL>
1168 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1169 (parallel [(match_operand:SI 2 "immediate_operand")])))
1170 (match_operand:VDQHS 3 "register_operand" "w"))
1171 (match_operand:VDQHS 4 "register_operand" "0")))]
1172 "TARGET_SIMD"
1173 {
1174 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1175 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1176 }
1177 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1178 )
1179
1180 (define_insn "*aarch64_mla_elt_merge<mode>"
1181 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1182 (plus:VDQHS
1183 (mult:VDQHS (vec_duplicate:VDQHS
1184 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1185 (match_operand:VDQHS 2 "register_operand" "w"))
1186 (match_operand:VDQHS 3 "register_operand" "0")))]
1187 "TARGET_SIMD"
1188 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1189 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1190 )
1191
1192 (define_insn "aarch64_mls<mode>"
1193 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1194 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1195 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1196 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1197 "TARGET_SIMD"
1198 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1199 [(set_attr "type" "neon_mla_<Vetype><q>")]
1200 )
1201
1202 (define_insn "*aarch64_mls_elt<mode>"
1203 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1204 (minus:VDQHS
1205 (match_operand:VDQHS 4 "register_operand" "0")
1206 (mult:VDQHS
1207 (vec_duplicate:VDQHS
1208 (vec_select:<VEL>
1209 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1210 (parallel [(match_operand:SI 2 "immediate_operand")])))
1211 (match_operand:VDQHS 3 "register_operand" "w"))))]
1212 "TARGET_SIMD"
1213 {
1214 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1215 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1216 }
1217 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1218 )
1219
1220 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1221 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1222 (minus:VDQHS
1223 (match_operand:VDQHS 4 "register_operand" "0")
1224 (mult:VDQHS
1225 (vec_duplicate:VDQHS
1226 (vec_select:<VEL>
1227 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1228 (parallel [(match_operand:SI 2 "immediate_operand")])))
1229 (match_operand:VDQHS 3 "register_operand" "w"))))]
1230 "TARGET_SIMD"
1231 {
1232 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1233 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1234 }
1235 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1236 )
1237
1238 (define_insn "*aarch64_mls_elt_merge<mode>"
1239 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1240 (minus:VDQHS
1241 (match_operand:VDQHS 1 "register_operand" "0")
1242 (mult:VDQHS (vec_duplicate:VDQHS
1243 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1244 (match_operand:VDQHS 3 "register_operand" "w"))))]
1245 "TARGET_SIMD"
1246 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1247 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1248 )
1249
1250 ;; Max/Min operations.
1251 (define_insn "<su><maxmin><mode>3"
1252 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1253 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1254 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1255 "TARGET_SIMD"
1256 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1257 [(set_attr "type" "neon_minmax<q>")]
1258 )
1259
1260 (define_expand "<su><maxmin>v2di3"
1261 [(set (match_operand:V2DI 0 "register_operand" "")
1262 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1263 (match_operand:V2DI 2 "register_operand" "")))]
1264 "TARGET_SIMD"
1265 {
1266 enum rtx_code cmp_operator;
1267 rtx cmp_fmt;
1268
1269 switch (<CODE>)
1270 {
1271 case UMIN:
1272 cmp_operator = LTU;
1273 break;
1274 case SMIN:
1275 cmp_operator = LT;
1276 break;
1277 case UMAX:
1278 cmp_operator = GTU;
1279 break;
1280 case SMAX:
1281 cmp_operator = GT;
1282 break;
1283 default:
1284 gcc_unreachable ();
1285 }
1286
1287 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1288 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1289 operands[2], cmp_fmt, operands[1], operands[2]));
1290 DONE;
1291 })
1292
1293 ;; Pairwise Integer Max/Min operations.
1294 (define_insn "aarch64_<maxmin_uns>p<mode>"
1295 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1296 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1297 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1298 MAXMINV))]
1299 "TARGET_SIMD"
1300 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1301 [(set_attr "type" "neon_minmax<q>")]
1302 )
1303
1304 ;; Pairwise FP Max/Min operations.
1305 (define_insn "aarch64_<maxmin_uns>p<mode>"
1306 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1307 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1308 (match_operand:VHSDF 2 "register_operand" "w")]
1309 FMAXMINV))]
1310 "TARGET_SIMD"
1311 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1312 [(set_attr "type" "neon_minmax<q>")]
1313 )
1314
1315 ;; vec_concat gives a new vector with the low elements from operand 1, and
1316 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1317 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1318 ;; What that means, is that the RTL descriptions of the below patterns
1319 ;; need to change depending on endianness.
1320
1321 ;; Move to the low architectural bits of the register.
1322 ;; On little-endian this is { operand, zeroes }
1323 ;; On big-endian this is { zeroes, operand }
1324
1325 (define_insn "move_lo_quad_internal_<mode>"
1326 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1327 (vec_concat:VQ_NO2E
1328 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1329 (vec_duplicate:<VHALF> (const_int 0))))]
1330 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1331 "@
1332 dup\\t%d0, %1.d[0]
1333 fmov\\t%d0, %1
1334 dup\\t%d0, %1"
1335 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1336 (set_attr "simd" "yes,*,yes")
1337 (set_attr "fp" "*,yes,*")
1338 (set_attr "length" "4")]
1339 )
1340
1341 (define_insn "move_lo_quad_internal_<mode>"
1342 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1343 (vec_concat:VQ_2E
1344 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1345 (const_int 0)))]
1346 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1347 "@
1348 dup\\t%d0, %1.d[0]
1349 fmov\\t%d0, %1
1350 dup\\t%d0, %1"
1351 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1352 (set_attr "simd" "yes,*,yes")
1353 (set_attr "fp" "*,yes,*")
1354 (set_attr "length" "4")]
1355 )
1356
1357 (define_insn "move_lo_quad_internal_be_<mode>"
1358 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1359 (vec_concat:VQ_NO2E
1360 (vec_duplicate:<VHALF> (const_int 0))
1361 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1362 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1363 "@
1364 dup\\t%d0, %1.d[0]
1365 fmov\\t%d0, %1
1366 dup\\t%d0, %1"
1367 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1368 (set_attr "simd" "yes,*,yes")
1369 (set_attr "fp" "*,yes,*")
1370 (set_attr "length" "4")]
1371 )
1372
1373 (define_insn "move_lo_quad_internal_be_<mode>"
1374 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1375 (vec_concat:VQ_2E
1376 (const_int 0)
1377 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1378 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1379 "@
1380 dup\\t%d0, %1.d[0]
1381 fmov\\t%d0, %1
1382 dup\\t%d0, %1"
1383 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1384 (set_attr "simd" "yes,*,yes")
1385 (set_attr "fp" "*,yes,*")
1386 (set_attr "length" "4")]
1387 )
1388
1389 (define_expand "move_lo_quad_<mode>"
1390 [(match_operand:VQ 0 "register_operand")
1391 (match_operand:VQ 1 "register_operand")]
1392 "TARGET_SIMD"
1393 {
1394 if (BYTES_BIG_ENDIAN)
1395 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1396 else
1397 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1398 DONE;
1399 }
1400 )
1401
1402 ;; Move operand1 to the high architectural bits of the register, keeping
1403 ;; the low architectural bits of operand2.
1404 ;; For little-endian this is { operand2, operand1 }
1405 ;; For big-endian this is { operand1, operand2 }
1406
1407 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1408 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1409 (vec_concat:VQ
1410 (vec_select:<VHALF>
1411 (match_dup 0)
1412 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1413 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1414 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1415 "@
1416 ins\\t%0.d[1], %1.d[0]
1417 ins\\t%0.d[1], %1"
1418 [(set_attr "type" "neon_ins")]
1419 )
1420
1421 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1422 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1423 (vec_concat:VQ
1424 (match_operand:<VHALF> 1 "register_operand" "w,r")
1425 (vec_select:<VHALF>
1426 (match_dup 0)
1427 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1428 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1429 "@
1430 ins\\t%0.d[1], %1.d[0]
1431 ins\\t%0.d[1], %1"
1432 [(set_attr "type" "neon_ins")]
1433 )
1434
1435 (define_expand "move_hi_quad_<mode>"
1436 [(match_operand:VQ 0 "register_operand" "")
1437 (match_operand:<VHALF> 1 "register_operand" "")]
1438 "TARGET_SIMD"
1439 {
1440 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1441 if (BYTES_BIG_ENDIAN)
1442 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1443 operands[1], p));
1444 else
1445 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1446 operands[1], p));
1447 DONE;
1448 })
1449
1450 ;; Narrowing operations.
1451
1452 ;; For doubles.
1453 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1454 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1455 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1456 "TARGET_SIMD"
1457 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1458 [(set_attr "type" "neon_shift_imm_narrow_q")]
1459 )
1460
1461 (define_expand "vec_pack_trunc_<mode>"
1462 [(match_operand:<VNARROWD> 0 "register_operand" "")
1463 (match_operand:VDN 1 "register_operand" "")
1464 (match_operand:VDN 2 "register_operand" "")]
1465 "TARGET_SIMD"
1466 {
1467 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1468 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1469 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1470
1471 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1472 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1473 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1474 DONE;
1475 })
1476
1477 ;; For quads.
1478
1479 (define_insn "vec_pack_trunc_<mode>"
1480 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1481 (vec_concat:<VNARROWQ2>
1482 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1483 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1484 "TARGET_SIMD"
1485 {
1486 if (BYTES_BIG_ENDIAN)
1487 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1488 else
1489 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1490 }
1491 [(set_attr "type" "multiple")
1492 (set_attr "length" "8")]
1493 )
1494
1495 ;; Widening operations.
1496
1497 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1498 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1499 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1500 (match_operand:VQW 1 "register_operand" "w")
1501 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1502 )))]
1503 "TARGET_SIMD"
1504 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1505 [(set_attr "type" "neon_shift_imm_long")]
1506 )
1507
1508 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1509 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1510 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1511 (match_operand:VQW 1 "register_operand" "w")
1512 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1513 )))]
1514 "TARGET_SIMD"
1515 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1516 [(set_attr "type" "neon_shift_imm_long")]
1517 )
1518
1519 (define_expand "vec_unpack<su>_hi_<mode>"
1520 [(match_operand:<VWIDE> 0 "register_operand" "")
1521 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1522 "TARGET_SIMD"
1523 {
1524 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1525 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1526 operands[1], p));
1527 DONE;
1528 }
1529 )
1530
1531 (define_expand "vec_unpack<su>_lo_<mode>"
1532 [(match_operand:<VWIDE> 0 "register_operand" "")
1533 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1534 "TARGET_SIMD"
1535 {
1536 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1537 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1538 operands[1], p));
1539 DONE;
1540 }
1541 )
1542
1543 ;; Widening arithmetic.
1544
1545 (define_insn "*aarch64_<su>mlal_lo<mode>"
1546 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1547 (plus:<VWIDE>
1548 (mult:<VWIDE>
1549 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1550 (match_operand:VQW 2 "register_operand" "w")
1551 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1552 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1553 (match_operand:VQW 4 "register_operand" "w")
1554 (match_dup 3))))
1555 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1556 "TARGET_SIMD"
1557 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1558 [(set_attr "type" "neon_mla_<Vetype>_long")]
1559 )
1560
1561 (define_insn "*aarch64_<su>mlal_hi<mode>"
1562 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1563 (plus:<VWIDE>
1564 (mult:<VWIDE>
1565 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1566 (match_operand:VQW 2 "register_operand" "w")
1567 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1568 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1569 (match_operand:VQW 4 "register_operand" "w")
1570 (match_dup 3))))
1571 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1572 "TARGET_SIMD"
1573 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1574 [(set_attr "type" "neon_mla_<Vetype>_long")]
1575 )
1576
1577 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1578 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1579 (minus:<VWIDE>
1580 (match_operand:<VWIDE> 1 "register_operand" "0")
1581 (mult:<VWIDE>
1582 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1583 (match_operand:VQW 2 "register_operand" "w")
1584 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1585 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1586 (match_operand:VQW 4 "register_operand" "w")
1587 (match_dup 3))))))]
1588 "TARGET_SIMD"
1589 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1590 [(set_attr "type" "neon_mla_<Vetype>_long")]
1591 )
1592
1593 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1594 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1595 (minus:<VWIDE>
1596 (match_operand:<VWIDE> 1 "register_operand" "0")
1597 (mult:<VWIDE>
1598 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599 (match_operand:VQW 2 "register_operand" "w")
1600 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1601 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1602 (match_operand:VQW 4 "register_operand" "w")
1603 (match_dup 3))))))]
1604 "TARGET_SIMD"
1605 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1606 [(set_attr "type" "neon_mla_<Vetype>_long")]
1607 )
1608
1609 (define_insn "*aarch64_<su>mlal<mode>"
1610 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1611 (plus:<VWIDE>
1612 (mult:<VWIDE>
1613 (ANY_EXTEND:<VWIDE>
1614 (match_operand:VD_BHSI 1 "register_operand" "w"))
1615 (ANY_EXTEND:<VWIDE>
1616 (match_operand:VD_BHSI 2 "register_operand" "w")))
1617 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1618 "TARGET_SIMD"
1619 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1620 [(set_attr "type" "neon_mla_<Vetype>_long")]
1621 )
1622
1623 (define_insn "*aarch64_<su>mlsl<mode>"
1624 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1625 (minus:<VWIDE>
1626 (match_operand:<VWIDE> 1 "register_operand" "0")
1627 (mult:<VWIDE>
1628 (ANY_EXTEND:<VWIDE>
1629 (match_operand:VD_BHSI 2 "register_operand" "w"))
1630 (ANY_EXTEND:<VWIDE>
1631 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1632 "TARGET_SIMD"
1633 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1634 [(set_attr "type" "neon_mla_<Vetype>_long")]
1635 )
1636
1637 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1638 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1639 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1640 (match_operand:VQW 1 "register_operand" "w")
1641 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1642 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1643 (match_operand:VQW 2 "register_operand" "w")
1644 (match_dup 3)))))]
1645 "TARGET_SIMD"
1646 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1647 [(set_attr "type" "neon_mul_<Vetype>_long")]
1648 )
1649
1650 (define_expand "vec_widen_<su>mult_lo_<mode>"
1651 [(match_operand:<VWIDE> 0 "register_operand" "")
1652 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1653 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1654 "TARGET_SIMD"
1655 {
1656 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1657 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1658 operands[1],
1659 operands[2], p));
1660 DONE;
1661 }
1662 )
1663
1664 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1665 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1666 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1667 (match_operand:VQW 1 "register_operand" "w")
1668 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1669 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1670 (match_operand:VQW 2 "register_operand" "w")
1671 (match_dup 3)))))]
1672 "TARGET_SIMD"
1673 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1674 [(set_attr "type" "neon_mul_<Vetype>_long")]
1675 )
1676
1677 (define_expand "vec_widen_<su>mult_hi_<mode>"
1678 [(match_operand:<VWIDE> 0 "register_operand" "")
1679 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1680 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1681 "TARGET_SIMD"
1682 {
1683 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1684 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1685 operands[1],
1686 operands[2], p));
1687 DONE;
1688
1689 }
1690 )
1691
1692 ;; FP vector operations.
1693 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1694 ;; double-precision (64-bit) floating-point data types and arithmetic as
1695 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1696 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1697 ;;
1698 ;; Floating-point operations can raise an exception. Vectorizing such
1699 ;; operations are safe because of reasons explained below.
1700 ;;
1701 ;; ARMv8 permits an extension to enable trapped floating-point
1702 ;; exception handling, however this is an optional feature. In the
1703 ;; event of a floating-point exception being raised by vectorised
1704 ;; code then:
1705 ;; 1. If trapped floating-point exceptions are available, then a trap
1706 ;; will be taken when any lane raises an enabled exception. A trap
1707 ;; handler may determine which lane raised the exception.
1708 ;; 2. Alternatively a sticky exception flag is set in the
1709 ;; floating-point status register (FPSR). Software may explicitly
1710 ;; test the exception flags, in which case the tests will either
1711 ;; prevent vectorisation, allowing precise identification of the
1712 ;; failing operation, or if tested outside of vectorisable regions
1713 ;; then the specific operation and lane are not of interest.
1714
1715 ;; FP arithmetic operations.
1716
1717 (define_insn "add<mode>3"
1718 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1719 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1720 (match_operand:VHSDF 2 "register_operand" "w")))]
1721 "TARGET_SIMD"
1722 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1723 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1724 )
1725
1726 (define_insn "sub<mode>3"
1727 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1728 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1729 (match_operand:VHSDF 2 "register_operand" "w")))]
1730 "TARGET_SIMD"
1731 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1732 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1733 )
1734
1735 (define_insn "mul<mode>3"
1736 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1737 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1738 (match_operand:VHSDF 2 "register_operand" "w")))]
1739 "TARGET_SIMD"
1740 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1741 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1742 )
1743
1744 (define_expand "div<mode>3"
1745 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1746 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1747 (match_operand:VHSDF 2 "register_operand" "w")))]
1748 "TARGET_SIMD"
1749 {
1750 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1751 DONE;
1752
1753 operands[1] = force_reg (<MODE>mode, operands[1]);
1754 })
1755
1756 (define_insn "*div<mode>3"
1757 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1758 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1759 (match_operand:VHSDF 2 "register_operand" "w")))]
1760 "TARGET_SIMD"
1761 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1762 [(set_attr "type" "neon_fp_div_<stype><q>")]
1763 )
1764
1765 (define_insn "neg<mode>2"
1766 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1767 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1768 "TARGET_SIMD"
1769 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1770 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1771 )
1772
1773 (define_insn "abs<mode>2"
1774 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1775 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1776 "TARGET_SIMD"
1777 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1778 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1779 )
1780
1781 (define_insn "fma<mode>4"
1782 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1783 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1784 (match_operand:VHSDF 2 "register_operand" "w")
1785 (match_operand:VHSDF 3 "register_operand" "0")))]
1786 "TARGET_SIMD"
1787 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1788 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1789 )
1790
1791 (define_insn "*aarch64_fma4_elt<mode>"
1792 [(set (match_operand:VDQF 0 "register_operand" "=w")
1793 (fma:VDQF
1794 (vec_duplicate:VDQF
1795 (vec_select:<VEL>
1796 (match_operand:VDQF 1 "register_operand" "<h_con>")
1797 (parallel [(match_operand:SI 2 "immediate_operand")])))
1798 (match_operand:VDQF 3 "register_operand" "w")
1799 (match_operand:VDQF 4 "register_operand" "0")))]
1800 "TARGET_SIMD"
1801 {
1802 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1803 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1804 }
1805 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1806 )
1807
1808 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1809 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1810 (fma:VDQSF
1811 (vec_duplicate:VDQSF
1812 (vec_select:<VEL>
1813 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1814 (parallel [(match_operand:SI 2 "immediate_operand")])))
1815 (match_operand:VDQSF 3 "register_operand" "w")
1816 (match_operand:VDQSF 4 "register_operand" "0")))]
1817 "TARGET_SIMD"
1818 {
1819 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1820 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1821 }
1822 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1823 )
1824
1825 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1826 [(set (match_operand:VMUL 0 "register_operand" "=w")
1827 (fma:VMUL
1828 (vec_duplicate:VMUL
1829 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1830 (match_operand:VMUL 2 "register_operand" "w")
1831 (match_operand:VMUL 3 "register_operand" "0")))]
1832 "TARGET_SIMD"
1833 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1834 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1835 )
1836
1837 (define_insn "*aarch64_fma4_elt_to_64v2df"
1838 [(set (match_operand:DF 0 "register_operand" "=w")
1839 (fma:DF
1840 (vec_select:DF
1841 (match_operand:V2DF 1 "register_operand" "w")
1842 (parallel [(match_operand:SI 2 "immediate_operand")]))
1843 (match_operand:DF 3 "register_operand" "w")
1844 (match_operand:DF 4 "register_operand" "0")))]
1845 "TARGET_SIMD"
1846 {
1847 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1848 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1849 }
1850 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1851 )
1852
1853 (define_insn "fnma<mode>4"
1854 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1855 (fma:VHSDF
1856 (match_operand:VHSDF 1 "register_operand" "w")
1857 (neg:VHSDF
1858 (match_operand:VHSDF 2 "register_operand" "w"))
1859 (match_operand:VHSDF 3 "register_operand" "0")))]
1860 "TARGET_SIMD"
1861 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1862 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1863 )
1864
1865 (define_insn "*aarch64_fnma4_elt<mode>"
1866 [(set (match_operand:VDQF 0 "register_operand" "=w")
1867 (fma:VDQF
1868 (neg:VDQF
1869 (match_operand:VDQF 3 "register_operand" "w"))
1870 (vec_duplicate:VDQF
1871 (vec_select:<VEL>
1872 (match_operand:VDQF 1 "register_operand" "<h_con>")
1873 (parallel [(match_operand:SI 2 "immediate_operand")])))
1874 (match_operand:VDQF 4 "register_operand" "0")))]
1875 "TARGET_SIMD"
1876 {
1877 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1878 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1879 }
1880 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1881 )
1882
1883 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1884 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1885 (fma:VDQSF
1886 (neg:VDQSF
1887 (match_operand:VDQSF 3 "register_operand" "w"))
1888 (vec_duplicate:VDQSF
1889 (vec_select:<VEL>
1890 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1891 (parallel [(match_operand:SI 2 "immediate_operand")])))
1892 (match_operand:VDQSF 4 "register_operand" "0")))]
1893 "TARGET_SIMD"
1894 {
1895 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1896 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1897 }
1898 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1899 )
1900
1901 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1902 [(set (match_operand:VMUL 0 "register_operand" "=w")
1903 (fma:VMUL
1904 (neg:VMUL
1905 (match_operand:VMUL 2 "register_operand" "w"))
1906 (vec_duplicate:VMUL
1907 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1908 (match_operand:VMUL 3 "register_operand" "0")))]
1909 "TARGET_SIMD"
1910 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1911 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1912 )
1913
1914 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1915 [(set (match_operand:DF 0 "register_operand" "=w")
1916 (fma:DF
1917 (vec_select:DF
1918 (match_operand:V2DF 1 "register_operand" "w")
1919 (parallel [(match_operand:SI 2 "immediate_operand")]))
1920 (neg:DF
1921 (match_operand:DF 3 "register_operand" "w"))
1922 (match_operand:DF 4 "register_operand" "0")))]
1923 "TARGET_SIMD"
1924 {
1925 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1926 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1927 }
1928 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1929 )
1930
1931 ;; Vector versions of the floating-point frint patterns.
1932 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1933 (define_insn "<frint_pattern><mode>2"
1934 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1935 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1936 FRINT))]
1937 "TARGET_SIMD"
1938 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1939 [(set_attr "type" "neon_fp_round_<stype><q>")]
1940 )
1941
1942 ;; Vector versions of the fcvt standard patterns.
1943 ;; Expands to lbtrunc, lround, lceil, lfloor
1944 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1945 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1946 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1947 [(match_operand:VHSDF 1 "register_operand" "w")]
1948 FCVT)))]
1949 "TARGET_SIMD"
1950 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1951 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1952 )
1953
1954 ;; HF Scalar variants of related SIMD instructions.
1955 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1956 [(set (match_operand:HI 0 "register_operand" "=w")
1957 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1958 FCVT)))]
1959 "TARGET_SIMD_F16INST"
1960 "fcvt<frint_suffix><su>\t%h0, %h1"
1961 [(set_attr "type" "neon_fp_to_int_s")]
1962 )
1963
1964 (define_insn "<optab>_trunchfhi2"
1965 [(set (match_operand:HI 0 "register_operand" "=w")
1966 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1967 "TARGET_SIMD_F16INST"
1968 "fcvtz<su>\t%h0, %h1"
1969 [(set_attr "type" "neon_fp_to_int_s")]
1970 )
1971
1972 (define_insn "<optab>hihf2"
1973 [(set (match_operand:HF 0 "register_operand" "=w")
1974 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1975 "TARGET_SIMD_F16INST"
1976 "<su_optab>cvtf\t%h0, %h1"
1977 [(set_attr "type" "neon_int_to_fp_s")]
1978 )
1979
1980 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1981 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1982 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1983 [(mult:VDQF
1984 (match_operand:VDQF 1 "register_operand" "w")
1985 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1986 UNSPEC_FRINTZ)))]
1987 "TARGET_SIMD
1988 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1989 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1990 {
1991 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1992 char buf[64];
1993 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1994 output_asm_insn (buf, operands);
1995 return "";
1996 }
1997 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1998 )
1999
2000 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2001 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2002 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2003 [(match_operand:VHSDF 1 "register_operand")]
2004 UNSPEC_FRINTZ)))]
2005 "TARGET_SIMD"
2006 {})
2007
2008 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2009 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2010 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2011 [(match_operand:VHSDF 1 "register_operand")]
2012 UNSPEC_FRINTZ)))]
2013 "TARGET_SIMD"
2014 {})
2015
2016 (define_expand "ftrunc<VHSDF:mode>2"
2017 [(set (match_operand:VHSDF 0 "register_operand")
2018 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2019 UNSPEC_FRINTZ))]
2020 "TARGET_SIMD"
2021 {})
2022
2023 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2024 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2025 (FLOATUORS:VHSDF
2026 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2027 "TARGET_SIMD"
2028 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2029 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2030 )
2031
2032 ;; Conversions between vectors of floats and doubles.
2033 ;; Contains a mix of patterns to match standard pattern names
2034 ;; and those for intrinsics.
2035
2036 ;; Float widening operations.
2037
2038 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2039 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2040 (float_extend:<VWIDE> (vec_select:<VHALF>
2041 (match_operand:VQ_HSF 1 "register_operand" "w")
2042 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2043 )))]
2044 "TARGET_SIMD"
2045 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2046 [(set_attr "type" "neon_fp_cvt_widen_s")]
2047 )
2048
2049 ;; Convert between fixed-point and floating-point (vector modes)
2050
2051 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2052 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2053 (unspec:<VHSDF:FCVT_TARGET>
2054 [(match_operand:VHSDF 1 "register_operand" "w")
2055 (match_operand:SI 2 "immediate_operand" "i")]
2056 FCVT_F2FIXED))]
2057 "TARGET_SIMD"
2058 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2059 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2060 )
2061
2062 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2063 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2064 (unspec:<VDQ_HSDI:FCVT_TARGET>
2065 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2066 (match_operand:SI 2 "immediate_operand" "i")]
2067 FCVT_FIXED2F))]
2068 "TARGET_SIMD"
2069 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2070 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2071 )
2072
2073 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2074 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2075 ;; the meaning of HI and LO changes depending on the target endianness.
2076 ;; While elsewhere we map the higher numbered elements of a vector to
2077 ;; the lower architectural lanes of the vector, for these patterns we want
2078 ;; to always treat "hi" as referring to the higher architectural lanes.
2079 ;; Consequently, while the patterns below look inconsistent with our
2080 ;; other big-endian patterns their behavior is as required.
2081
2082 (define_expand "vec_unpacks_lo_<mode>"
2083 [(match_operand:<VWIDE> 0 "register_operand" "")
2084 (match_operand:VQ_HSF 1 "register_operand" "")]
2085 "TARGET_SIMD"
2086 {
2087 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2088 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2089 operands[1], p));
2090 DONE;
2091 }
2092 )
2093
2094 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2095 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2096 (float_extend:<VWIDE> (vec_select:<VHALF>
2097 (match_operand:VQ_HSF 1 "register_operand" "w")
2098 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2099 )))]
2100 "TARGET_SIMD"
2101 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2102 [(set_attr "type" "neon_fp_cvt_widen_s")]
2103 )
2104
2105 (define_expand "vec_unpacks_hi_<mode>"
2106 [(match_operand:<VWIDE> 0 "register_operand" "")
2107 (match_operand:VQ_HSF 1 "register_operand" "")]
2108 "TARGET_SIMD"
2109 {
2110 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2111 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2112 operands[1], p));
2113 DONE;
2114 }
2115 )
2116 (define_insn "aarch64_float_extend_lo_<Vwide>"
2117 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2118 (float_extend:<VWIDE>
2119 (match_operand:VDF 1 "register_operand" "w")))]
2120 "TARGET_SIMD"
2121 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2122 [(set_attr "type" "neon_fp_cvt_widen_s")]
2123 )
2124
2125 ;; Float narrowing operations.
2126
2127 (define_insn "aarch64_float_truncate_lo_<mode>"
2128 [(set (match_operand:VDF 0 "register_operand" "=w")
2129 (float_truncate:VDF
2130 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2131 "TARGET_SIMD"
2132 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2133 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2134 )
2135
2136 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2137 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2138 (vec_concat:<VDBL>
2139 (match_operand:VDF 1 "register_operand" "0")
2140 (float_truncate:VDF
2141 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2142 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2143 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2144 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2145 )
2146
2147 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2148 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2149 (vec_concat:<VDBL>
2150 (float_truncate:VDF
2151 (match_operand:<VWIDE> 2 "register_operand" "w"))
2152 (match_operand:VDF 1 "register_operand" "0")))]
2153 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2154 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2155 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2156 )
2157
2158 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2159 [(match_operand:<VDBL> 0 "register_operand" "=w")
2160 (match_operand:VDF 1 "register_operand" "0")
2161 (match_operand:<VWIDE> 2 "register_operand" "w")]
2162 "TARGET_SIMD"
2163 {
2164 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2165 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2166 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2167 emit_insn (gen (operands[0], operands[1], operands[2]));
2168 DONE;
2169 }
2170 )
2171
2172 (define_expand "vec_pack_trunc_v2df"
2173 [(set (match_operand:V4SF 0 "register_operand")
2174 (vec_concat:V4SF
2175 (float_truncate:V2SF
2176 (match_operand:V2DF 1 "register_operand"))
2177 (float_truncate:V2SF
2178 (match_operand:V2DF 2 "register_operand"))
2179 ))]
2180 "TARGET_SIMD"
2181 {
2182 rtx tmp = gen_reg_rtx (V2SFmode);
2183 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2184 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2185
2186 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2187 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2188 tmp, operands[hi]));
2189 DONE;
2190 }
2191 )
2192
2193 (define_expand "vec_pack_trunc_df"
2194 [(set (match_operand:V2SF 0 "register_operand")
2195 (vec_concat:V2SF
2196 (float_truncate:SF
2197 (match_operand:DF 1 "register_operand"))
2198 (float_truncate:SF
2199 (match_operand:DF 2 "register_operand"))
2200 ))]
2201 "TARGET_SIMD"
2202 {
2203 rtx tmp = gen_reg_rtx (V2SFmode);
2204 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2205 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2206
2207 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2208 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2209 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2210 DONE;
2211 }
2212 )
2213
2214 ;; FP Max/Min
2215 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2216 ;; expression like:
2217 ;; a = (b < c) ? b : c;
2218 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2219 ;; either explicitly or indirectly via -ffast-math.
2220 ;;
2221 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2222 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2223 ;; operand will be returned when both operands are zero (i.e. they may not
2224 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2225 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2226 ;; NaNs.
2227
2228 (define_insn "<su><maxmin><mode>3"
2229 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2230 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2231 (match_operand:VHSDF 2 "register_operand" "w")))]
2232 "TARGET_SIMD"
2233 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2234 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2235 )
2236
2237 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2238 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2239 ;; which implement the IEEE fmax ()/fmin () functions.
2240 (define_insn "<maxmin_uns><mode>3"
2241 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2242 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2243 (match_operand:VHSDF 2 "register_operand" "w")]
2244 FMAXMIN_UNS))]
2245 "TARGET_SIMD"
2246 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2247 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2248 )
2249
2250 ;; 'across lanes' add.
2251
2252 (define_expand "reduc_plus_scal_<mode>"
2253 [(match_operand:<VEL> 0 "register_operand" "=w")
2254 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2255 UNSPEC_ADDV)]
2256 "TARGET_SIMD"
2257 {
2258 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2259 rtx scratch = gen_reg_rtx (<MODE>mode);
2260 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2261 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2262 DONE;
2263 }
2264 )
2265
2266 (define_insn "aarch64_faddp<mode>"
2267 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2268 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2269 (match_operand:VHSDF 2 "register_operand" "w")]
2270 UNSPEC_FADDV))]
2271 "TARGET_SIMD"
2272 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2273 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2274 )
2275
2276 (define_insn "aarch64_reduc_plus_internal<mode>"
2277 [(set (match_operand:VDQV 0 "register_operand" "=w")
2278 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2279 UNSPEC_ADDV))]
2280 "TARGET_SIMD"
2281 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2282 [(set_attr "type" "neon_reduc_add<q>")]
2283 )
2284
2285 (define_insn "aarch64_reduc_plus_internalv2si"
2286 [(set (match_operand:V2SI 0 "register_operand" "=w")
2287 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2288 UNSPEC_ADDV))]
2289 "TARGET_SIMD"
2290 "addp\\t%0.2s, %1.2s, %1.2s"
2291 [(set_attr "type" "neon_reduc_add")]
2292 )
2293
2294 (define_insn "reduc_plus_scal_<mode>"
2295 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2296 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2297 UNSPEC_FADDV))]
2298 "TARGET_SIMD"
2299 "faddp\\t%<Vetype>0, %1.<Vtype>"
2300 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2301 )
2302
2303 (define_expand "reduc_plus_scal_v4sf"
2304 [(set (match_operand:SF 0 "register_operand")
2305 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2306 UNSPEC_FADDV))]
2307 "TARGET_SIMD"
2308 {
2309 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2310 rtx scratch = gen_reg_rtx (V4SFmode);
2311 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2312 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2313 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2314 DONE;
2315 })
2316
2317 (define_insn "clrsb<mode>2"
2318 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2319 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2320 "TARGET_SIMD"
2321 "cls\\t%0.<Vtype>, %1.<Vtype>"
2322 [(set_attr "type" "neon_cls<q>")]
2323 )
2324
2325 (define_insn "clz<mode>2"
2326 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2327 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2328 "TARGET_SIMD"
2329 "clz\\t%0.<Vtype>, %1.<Vtype>"
2330 [(set_attr "type" "neon_cls<q>")]
2331 )
2332
2333 (define_insn "popcount<mode>2"
2334 [(set (match_operand:VB 0 "register_operand" "=w")
2335 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2336 "TARGET_SIMD"
2337 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2338 [(set_attr "type" "neon_cnt<q>")]
2339 )
2340
2341 ;; 'across lanes' max and min ops.
2342
2343 ;; Template for outputting a scalar, so we can create __builtins which can be
2344 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2345 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2346 [(match_operand:<VEL> 0 "register_operand")
2347 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2348 FMAXMINV)]
2349 "TARGET_SIMD"
2350 {
2351 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2352 rtx scratch = gen_reg_rtx (<MODE>mode);
2353 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2354 operands[1]));
2355 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2356 DONE;
2357 }
2358 )
2359
2360 ;; Likewise for integer cases, signed and unsigned.
2361 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2362 [(match_operand:<VEL> 0 "register_operand")
2363 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2364 MAXMINV)]
2365 "TARGET_SIMD"
2366 {
2367 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2368 rtx scratch = gen_reg_rtx (<MODE>mode);
2369 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2370 operands[1]));
2371 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2372 DONE;
2373 }
2374 )
2375
2376 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2377 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2378 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2379 MAXMINV))]
2380 "TARGET_SIMD"
2381 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2382 [(set_attr "type" "neon_reduc_minmax<q>")]
2383 )
2384
2385 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2386 [(set (match_operand:V2SI 0 "register_operand" "=w")
2387 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2388 MAXMINV))]
2389 "TARGET_SIMD"
2390 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2391 [(set_attr "type" "neon_reduc_minmax")]
2392 )
2393
2394 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2395 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2396 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2397 FMAXMINV))]
2398 "TARGET_SIMD"
2399 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2400 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2401 )
2402
2403 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2404 ;; allocation.
2405 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2406 ;; to select.
2407 ;;
2408 ;; Thus our BSL is of the form:
2409 ;; op0 = bsl (mask, op2, op3)
2410 ;; We can use any of:
2411 ;;
2412 ;; if (op0 = mask)
2413 ;; bsl mask, op1, op2
2414 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2415 ;; bit op0, op2, mask
2416 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2417 ;; bif op0, op1, mask
2418 ;;
2419 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2420 ;; Some forms of straight-line code may generate the equivalent form
2421 ;; in *aarch64_simd_bsl<mode>_alt.
2422
2423 (define_insn "aarch64_simd_bsl<mode>_internal"
2424 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2425 (xor:VSDQ_I_DI
2426 (and:VSDQ_I_DI
2427 (xor:VSDQ_I_DI
2428 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2429 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2430 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2431 (match_dup:<V_INT_EQUIV> 3)
2432 ))]
2433 "TARGET_SIMD"
2434 "@
2435 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2436 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2437 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2438 [(set_attr "type" "neon_bsl<q>")]
2439 )
2440
2441 ;; We need this form in addition to the above pattern to match the case
2442 ;; when combine tries merging three insns such that the second operand of
2443 ;; the outer XOR matches the second operand of the inner XOR rather than
2444 ;; the first. The two are equivalent but since recog doesn't try all
2445 ;; permutations of commutative operations, we have to have a separate pattern.
2446
2447 (define_insn "*aarch64_simd_bsl<mode>_alt"
2448 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2449 (xor:VSDQ_I_DI
2450 (and:VSDQ_I_DI
2451 (xor:VSDQ_I_DI
2452 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2453 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2454 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2455 (match_dup:VSDQ_I_DI 2)))]
2456 "TARGET_SIMD"
2457 "@
2458 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2459 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2460 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2461 [(set_attr "type" "neon_bsl<q>")]
2462 )
2463
2464 (define_expand "aarch64_simd_bsl<mode>"
2465 [(match_operand:VALLDIF 0 "register_operand")
2466 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2467 (match_operand:VALLDIF 2 "register_operand")
2468 (match_operand:VALLDIF 3 "register_operand")]
2469 "TARGET_SIMD"
2470 {
2471 /* We can't alias operands together if they have different modes. */
2472 rtx tmp = operands[0];
2473 if (FLOAT_MODE_P (<MODE>mode))
2474 {
2475 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2476 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2477 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2478 }
2479 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2480 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2481 operands[1],
2482 operands[2],
2483 operands[3]));
2484 if (tmp != operands[0])
2485 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2486
2487 DONE;
2488 })
2489
2490 (define_expand "vcond_mask_<mode><v_int_equiv>"
2491 [(match_operand:VALLDI 0 "register_operand")
2492 (match_operand:VALLDI 1 "nonmemory_operand")
2493 (match_operand:VALLDI 2 "nonmemory_operand")
2494 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2495 "TARGET_SIMD"
2496 {
2497 /* If we have (a = (P) ? -1 : 0);
2498 Then we can simply move the generated mask (result must be int). */
2499 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2500 && operands[2] == CONST0_RTX (<MODE>mode))
2501 emit_move_insn (operands[0], operands[3]);
2502 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2503 else if (operands[1] == CONST0_RTX (<MODE>mode)
2504 && operands[2] == CONSTM1_RTX (<MODE>mode))
2505 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2506 else
2507 {
2508 if (!REG_P (operands[1]))
2509 operands[1] = force_reg (<MODE>mode, operands[1]);
2510 if (!REG_P (operands[2]))
2511 operands[2] = force_reg (<MODE>mode, operands[2]);
2512 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2513 operands[1], operands[2]));
2514 }
2515
2516 DONE;
2517 })
2518
2519 ;; Patterns comparing two vectors to produce a mask.
2520
2521 (define_expand "vec_cmp<mode><mode>"
2522 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2523 (match_operator 1 "comparison_operator"
2524 [(match_operand:VSDQ_I_DI 2 "register_operand")
2525 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2526 "TARGET_SIMD"
2527 {
2528 rtx mask = operands[0];
2529 enum rtx_code code = GET_CODE (operands[1]);
2530
2531 switch (code)
2532 {
2533 case NE:
2534 case LE:
2535 case LT:
2536 case GE:
2537 case GT:
2538 case EQ:
2539 if (operands[3] == CONST0_RTX (<MODE>mode))
2540 break;
2541
2542 /* Fall through. */
2543 default:
2544 if (!REG_P (operands[3]))
2545 operands[3] = force_reg (<MODE>mode, operands[3]);
2546
2547 break;
2548 }
2549
2550 switch (code)
2551 {
2552 case LT:
2553 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2554 break;
2555
2556 case GE:
2557 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2558 break;
2559
2560 case LE:
2561 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2562 break;
2563
2564 case GT:
2565 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2566 break;
2567
2568 case LTU:
2569 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2570 break;
2571
2572 case GEU:
2573 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2574 break;
2575
2576 case LEU:
2577 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2578 break;
2579
2580 case GTU:
2581 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2582 break;
2583
2584 case NE:
2585 /* Handle NE as !EQ. */
2586 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2587 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2588 break;
2589
2590 case EQ:
2591 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2592 break;
2593
2594 default:
2595 gcc_unreachable ();
2596 }
2597
2598 DONE;
2599 })
2600
2601 (define_expand "vec_cmp<mode><v_int_equiv>"
2602 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2603 (match_operator 1 "comparison_operator"
2604 [(match_operand:VDQF 2 "register_operand")
2605 (match_operand:VDQF 3 "nonmemory_operand")]))]
2606 "TARGET_SIMD"
2607 {
2608 int use_zero_form = 0;
2609 enum rtx_code code = GET_CODE (operands[1]);
2610 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2611
2612 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2613
2614 switch (code)
2615 {
2616 case LE:
2617 case LT:
2618 case GE:
2619 case GT:
2620 case EQ:
2621 if (operands[3] == CONST0_RTX (<MODE>mode))
2622 {
2623 use_zero_form = 1;
2624 break;
2625 }
2626 /* Fall through. */
2627 default:
2628 if (!REG_P (operands[3]))
2629 operands[3] = force_reg (<MODE>mode, operands[3]);
2630
2631 break;
2632 }
2633
2634 switch (code)
2635 {
2636 case LT:
2637 if (use_zero_form)
2638 {
2639 comparison = gen_aarch64_cmlt<mode>;
2640 break;
2641 }
2642 /* Fall through. */
2643 case UNGE:
2644 std::swap (operands[2], operands[3]);
2645 /* Fall through. */
2646 case UNLE:
2647 case GT:
2648 comparison = gen_aarch64_cmgt<mode>;
2649 break;
2650 case LE:
2651 if (use_zero_form)
2652 {
2653 comparison = gen_aarch64_cmle<mode>;
2654 break;
2655 }
2656 /* Fall through. */
2657 case UNGT:
2658 std::swap (operands[2], operands[3]);
2659 /* Fall through. */
2660 case UNLT:
2661 case GE:
2662 comparison = gen_aarch64_cmge<mode>;
2663 break;
2664 case NE:
2665 case EQ:
2666 comparison = gen_aarch64_cmeq<mode>;
2667 break;
2668 case UNEQ:
2669 case ORDERED:
2670 case UNORDERED:
2671 break;
2672 default:
2673 gcc_unreachable ();
2674 }
2675
2676 switch (code)
2677 {
2678 case UNGE:
2679 case UNGT:
2680 case UNLE:
2681 case UNLT:
2682 case NE:
2683 /* FCM returns false for lanes which are unordered, so if we use
2684 the inverse of the comparison we actually want to emit, then
2685 invert the result, we will end up with the correct result.
2686 Note that a NE NaN and NaN NE b are true for all a, b.
2687
2688 Our transformations are:
2689 a UNGE b -> !(b GT a)
2690 a UNGT b -> !(b GE a)
2691 a UNLE b -> !(a GT b)
2692 a UNLT b -> !(a GE b)
2693 a NE b -> !(a EQ b) */
2694 gcc_assert (comparison != NULL);
2695 emit_insn (comparison (operands[0], operands[2], operands[3]));
2696 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2697 break;
2698
2699 case LT:
2700 case LE:
2701 case GT:
2702 case GE:
2703 case EQ:
2704 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2705 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2706 a GE b -> a GE b
2707 a GT b -> a GT b
2708 a LE b -> b GE a
2709 a LT b -> b GT a
2710 a EQ b -> a EQ b */
2711 gcc_assert (comparison != NULL);
2712 emit_insn (comparison (operands[0], operands[2], operands[3]));
2713 break;
2714
2715 case UNEQ:
2716 /* We first check (a > b || b > a) which is !UNEQ, inverting
2717 this result will then give us (a == b || a UNORDERED b). */
2718 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2719 operands[2], operands[3]));
2720 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2721 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2722 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2723 break;
2724
2725 case UNORDERED:
2726 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2727 UNORDERED as !ORDERED. */
2728 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2729 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2730 operands[3], operands[2]));
2731 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2732 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2733 break;
2734
2735 case ORDERED:
2736 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2737 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2738 operands[3], operands[2]));
2739 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2740 break;
2741
2742 default:
2743 gcc_unreachable ();
2744 }
2745
2746 DONE;
2747 })
2748
2749 (define_expand "vec_cmpu<mode><mode>"
2750 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2751 (match_operator 1 "comparison_operator"
2752 [(match_operand:VSDQ_I_DI 2 "register_operand")
2753 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2754 "TARGET_SIMD"
2755 {
2756 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2757 operands[2], operands[3]));
2758 DONE;
2759 })
2760
2761 (define_expand "vcond<mode><mode>"
2762 [(set (match_operand:VALLDI 0 "register_operand")
2763 (if_then_else:VALLDI
2764 (match_operator 3 "comparison_operator"
2765 [(match_operand:VALLDI 4 "register_operand")
2766 (match_operand:VALLDI 5 "nonmemory_operand")])
2767 (match_operand:VALLDI 1 "nonmemory_operand")
2768 (match_operand:VALLDI 2 "nonmemory_operand")))]
2769 "TARGET_SIMD"
2770 {
2771 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2772 enum rtx_code code = GET_CODE (operands[3]);
2773
2774 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2775 it as well as switch operands 1/2 in order to avoid the additional
2776 NOT instruction. */
2777 if (code == NE)
2778 {
2779 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2780 operands[4], operands[5]);
2781 std::swap (operands[1], operands[2]);
2782 }
2783 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2784 operands[4], operands[5]));
2785 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2786 operands[2], mask));
2787
2788 DONE;
2789 })
2790
2791 (define_expand "vcond<v_cmp_mixed><mode>"
2792 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2793 (if_then_else:<V_cmp_mixed>
2794 (match_operator 3 "comparison_operator"
2795 [(match_operand:VDQF_COND 4 "register_operand")
2796 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2797 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2798 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2799 "TARGET_SIMD"
2800 {
2801 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2802 enum rtx_code code = GET_CODE (operands[3]);
2803
2804 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2805 it as well as switch operands 1/2 in order to avoid the additional
2806 NOT instruction. */
2807 if (code == NE)
2808 {
2809 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2810 operands[4], operands[5]);
2811 std::swap (operands[1], operands[2]);
2812 }
2813 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2814 operands[4], operands[5]));
2815 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2816 operands[0], operands[1],
2817 operands[2], mask));
2818
2819 DONE;
2820 })
2821
2822 (define_expand "vcondu<mode><mode>"
2823 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2824 (if_then_else:VSDQ_I_DI
2825 (match_operator 3 "comparison_operator"
2826 [(match_operand:VSDQ_I_DI 4 "register_operand")
2827 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2828 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2829 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2830 "TARGET_SIMD"
2831 {
2832 rtx mask = gen_reg_rtx (<MODE>mode);
2833 enum rtx_code code = GET_CODE (operands[3]);
2834
2835 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2836 it as well as switch operands 1/2 in order to avoid the additional
2837 NOT instruction. */
2838 if (code == NE)
2839 {
2840 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2841 operands[4], operands[5]);
2842 std::swap (operands[1], operands[2]);
2843 }
2844 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2845 operands[4], operands[5]));
2846 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2847 operands[2], mask));
2848 DONE;
2849 })
2850
2851 (define_expand "vcondu<mode><v_cmp_mixed>"
2852 [(set (match_operand:VDQF 0 "register_operand")
2853 (if_then_else:VDQF
2854 (match_operator 3 "comparison_operator"
2855 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2856 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2857 (match_operand:VDQF 1 "nonmemory_operand")
2858 (match_operand:VDQF 2 "nonmemory_operand")))]
2859 "TARGET_SIMD"
2860 {
2861 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2862 enum rtx_code code = GET_CODE (operands[3]);
2863
2864 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2865 it as well as switch operands 1/2 in order to avoid the additional
2866 NOT instruction. */
2867 if (code == NE)
2868 {
2869 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2870 operands[4], operands[5]);
2871 std::swap (operands[1], operands[2]);
2872 }
2873 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2874 mask, operands[3],
2875 operands[4], operands[5]));
2876 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2877 operands[2], mask));
2878 DONE;
2879 })
2880
2881 ;; Patterns for AArch64 SIMD Intrinsics.
2882
2883 ;; Lane extraction with sign extension to general purpose register.
2884 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2885 [(set (match_operand:GPI 0 "register_operand" "=r")
2886 (sign_extend:GPI
2887 (vec_select:<VEL>
2888 (match_operand:VDQQH 1 "register_operand" "w")
2889 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2890 "TARGET_SIMD"
2891 {
2892 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2893 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2894 }
2895 [(set_attr "type" "neon_to_gp<q>")]
2896 )
2897
2898 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2899 [(set (match_operand:SI 0 "register_operand" "=r")
2900 (zero_extend:SI
2901 (vec_select:<VEL>
2902 (match_operand:VDQQH 1 "register_operand" "w")
2903 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2904 "TARGET_SIMD"
2905 {
2906 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2907 return "umov\\t%w0, %1.<Vetype>[%2]";
2908 }
2909 [(set_attr "type" "neon_to_gp<q>")]
2910 )
2911
2912 ;; Lane extraction of a value, neither sign nor zero extension
2913 ;; is guaranteed so upper bits should be considered undefined.
2914 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2915 (define_insn "aarch64_get_lane<mode>"
2916 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2917 (vec_select:<VEL>
2918 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2919 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2920 "TARGET_SIMD"
2921 {
2922 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2923 switch (which_alternative)
2924 {
2925 case 0:
2926 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2927 case 1:
2928 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2929 case 2:
2930 return "st1\\t{%1.<Vetype>}[%2], %0";
2931 default:
2932 gcc_unreachable ();
2933 }
2934 }
2935 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2936 )
2937
2938 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2939 ;; dest vector.
2940
2941 (define_insn "*aarch64_combinez<mode>"
2942 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2943 (vec_concat:<VDBL>
2944 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2945 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2946 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2947 "@
2948 mov\\t%0.8b, %1.8b
2949 fmov\t%d0, %1
2950 ldr\\t%d0, %1"
2951 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2952 (set_attr "simd" "yes,*,yes")
2953 (set_attr "fp" "*,yes,*")]
2954 )
2955
2956 (define_insn "*aarch64_combinez_be<mode>"
2957 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2958 (vec_concat:<VDBL>
2959 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2960 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2961 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2962 "@
2963 mov\\t%0.8b, %1.8b
2964 fmov\t%d0, %1
2965 ldr\\t%d0, %1"
2966 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2967 (set_attr "simd" "yes,*,yes")
2968 (set_attr "fp" "*,yes,*")]
2969 )
2970
2971 (define_expand "aarch64_combine<mode>"
2972 [(match_operand:<VDBL> 0 "register_operand")
2973 (match_operand:VDC 1 "register_operand")
2974 (match_operand:VDC 2 "register_operand")]
2975 "TARGET_SIMD"
2976 {
2977 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2978
2979 DONE;
2980 }
2981 )
2982
2983 (define_expand "aarch64_simd_combine<mode>"
2984 [(match_operand:<VDBL> 0 "register_operand")
2985 (match_operand:VDC 1 "register_operand")
2986 (match_operand:VDC 2 "register_operand")]
2987 "TARGET_SIMD"
2988 {
2989 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2990 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2991 DONE;
2992 }
2993 [(set_attr "type" "multiple")]
2994 )
2995
2996 ;; <su><addsub>l<q>.
2997
2998 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2999 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3000 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3001 (match_operand:VQW 1 "register_operand" "w")
3002 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3003 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3004 (match_operand:VQW 2 "register_operand" "w")
3005 (match_dup 3)))))]
3006 "TARGET_SIMD"
3007 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3008 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3009 )
3010
3011 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3012 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3013 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3014 (match_operand:VQW 1 "register_operand" "w")
3015 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3016 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3017 (match_operand:VQW 2 "register_operand" "w")
3018 (match_dup 3)))))]
3019 "TARGET_SIMD"
3020 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3021 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3022 )
3023
3024
3025 (define_expand "aarch64_saddl2<mode>"
3026 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3027 (match_operand:VQW 1 "register_operand" "w")
3028 (match_operand:VQW 2 "register_operand" "w")]
3029 "TARGET_SIMD"
3030 {
3031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3032 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3033 operands[2], p));
3034 DONE;
3035 })
3036
3037 (define_expand "aarch64_uaddl2<mode>"
3038 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3039 (match_operand:VQW 1 "register_operand" "w")
3040 (match_operand:VQW 2 "register_operand" "w")]
3041 "TARGET_SIMD"
3042 {
3043 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3044 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3045 operands[2], p));
3046 DONE;
3047 })
3048
3049 (define_expand "aarch64_ssubl2<mode>"
3050 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3051 (match_operand:VQW 1 "register_operand" "w")
3052 (match_operand:VQW 2 "register_operand" "w")]
3053 "TARGET_SIMD"
3054 {
3055 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3056 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3057 operands[2], p));
3058 DONE;
3059 })
3060
3061 (define_expand "aarch64_usubl2<mode>"
3062 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3063 (match_operand:VQW 1 "register_operand" "w")
3064 (match_operand:VQW 2 "register_operand" "w")]
3065 "TARGET_SIMD"
3066 {
3067 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3068 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3069 operands[2], p));
3070 DONE;
3071 })
3072
3073 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3074 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3075 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3076 (match_operand:VD_BHSI 1 "register_operand" "w"))
3077 (ANY_EXTEND:<VWIDE>
3078 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3079 "TARGET_SIMD"
3080 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3081 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3082 )
3083
3084 ;; <su><addsub>w<q>.
3085
3086 (define_expand "widen_ssum<mode>3"
3087 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3088 (plus:<VDBLW> (sign_extend:<VDBLW>
3089 (match_operand:VQW 1 "register_operand" ""))
3090 (match_operand:<VDBLW> 2 "register_operand" "")))]
3091 "TARGET_SIMD"
3092 {
3093 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3094 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3095
3096 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3097 operands[1], p));
3098 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3099 DONE;
3100 }
3101 )
3102
3103 (define_expand "widen_ssum<mode>3"
3104 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3105 (plus:<VWIDE> (sign_extend:<VWIDE>
3106 (match_operand:VD_BHSI 1 "register_operand" ""))
3107 (match_operand:<VWIDE> 2 "register_operand" "")))]
3108 "TARGET_SIMD"
3109 {
3110 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3111 DONE;
3112 })
3113
3114 (define_expand "widen_usum<mode>3"
3115 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3116 (plus:<VDBLW> (zero_extend:<VDBLW>
3117 (match_operand:VQW 1 "register_operand" ""))
3118 (match_operand:<VDBLW> 2 "register_operand" "")))]
3119 "TARGET_SIMD"
3120 {
3121 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3122 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3123
3124 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3125 operands[1], p));
3126 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3127 DONE;
3128 }
3129 )
3130
3131 (define_expand "widen_usum<mode>3"
3132 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3133 (plus:<VWIDE> (zero_extend:<VWIDE>
3134 (match_operand:VD_BHSI 1 "register_operand" ""))
3135 (match_operand:<VWIDE> 2 "register_operand" "")))]
3136 "TARGET_SIMD"
3137 {
3138 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3139 DONE;
3140 })
3141
3142 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3143 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3144 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3145 (ANY_EXTEND:<VWIDE>
3146 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3147 "TARGET_SIMD"
3148 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3149 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3150 )
3151
3152 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3153 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3154 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3155 (ANY_EXTEND:<VWIDE>
3156 (vec_select:<VHALF>
3157 (match_operand:VQW 2 "register_operand" "w")
3158 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3159 "TARGET_SIMD"
3160 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3161 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3162 )
3163
3164 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3165 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3166 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3167 (ANY_EXTEND:<VWIDE>
3168 (vec_select:<VHALF>
3169 (match_operand:VQW 2 "register_operand" "w")
3170 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3171 "TARGET_SIMD"
3172 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3173 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3174 )
3175
3176 (define_expand "aarch64_saddw2<mode>"
3177 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3178 (match_operand:<VWIDE> 1 "register_operand" "w")
3179 (match_operand:VQW 2 "register_operand" "w")]
3180 "TARGET_SIMD"
3181 {
3182 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3183 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3184 operands[2], p));
3185 DONE;
3186 })
3187
3188 (define_expand "aarch64_uaddw2<mode>"
3189 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3190 (match_operand:<VWIDE> 1 "register_operand" "w")
3191 (match_operand:VQW 2 "register_operand" "w")]
3192 "TARGET_SIMD"
3193 {
3194 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3195 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3196 operands[2], p));
3197 DONE;
3198 })
3199
3200
3201 (define_expand "aarch64_ssubw2<mode>"
3202 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3203 (match_operand:<VWIDE> 1 "register_operand" "w")
3204 (match_operand:VQW 2 "register_operand" "w")]
3205 "TARGET_SIMD"
3206 {
3207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3208 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3209 operands[2], p));
3210 DONE;
3211 })
3212
3213 (define_expand "aarch64_usubw2<mode>"
3214 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3215 (match_operand:<VWIDE> 1 "register_operand" "w")
3216 (match_operand:VQW 2 "register_operand" "w")]
3217 "TARGET_SIMD"
3218 {
3219 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3220 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3221 operands[2], p));
3222 DONE;
3223 })
3224
3225 ;; <su><r>h<addsub>.
3226
3227 (define_insn "aarch64_<sur>h<addsub><mode>"
3228 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3229 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3230 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3231 HADDSUB))]
3232 "TARGET_SIMD"
3233 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3234 [(set_attr "type" "neon_<addsub>_halve<q>")]
3235 )
3236
3237 ;; <r><addsub>hn<q>.
3238
3239 (define_insn "aarch64_<sur><addsub>hn<mode>"
3240 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3241 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3242 (match_operand:VQN 2 "register_operand" "w")]
3243 ADDSUBHN))]
3244 "TARGET_SIMD"
3245 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3246 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3247 )
3248
3249 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3250 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3251 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3252 (match_operand:VQN 2 "register_operand" "w")
3253 (match_operand:VQN 3 "register_operand" "w")]
3254 ADDSUBHN2))]
3255 "TARGET_SIMD"
3256 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3257 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3258 )
3259
3260 ;; pmul.
3261
3262 (define_insn "aarch64_pmul<mode>"
3263 [(set (match_operand:VB 0 "register_operand" "=w")
3264 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3265 (match_operand:VB 2 "register_operand" "w")]
3266 UNSPEC_PMUL))]
3267 "TARGET_SIMD"
3268 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3269 [(set_attr "type" "neon_mul_<Vetype><q>")]
3270 )
3271
3272 ;; fmulx.
3273
3274 (define_insn "aarch64_fmulx<mode>"
3275 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3276 (unspec:VHSDF_HSDF
3277 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3278 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3279 UNSPEC_FMULX))]
3280 "TARGET_SIMD"
3281 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3282 [(set_attr "type" "neon_fp_mul_<stype>")]
3283 )
3284
3285 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3286
3287 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3288 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3289 (unspec:VDQSF
3290 [(match_operand:VDQSF 1 "register_operand" "w")
3291 (vec_duplicate:VDQSF
3292 (vec_select:<VEL>
3293 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3294 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3295 UNSPEC_FMULX))]
3296 "TARGET_SIMD"
3297 {
3298 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3299 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3300 }
3301 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3302 )
3303
3304 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3305
3306 (define_insn "*aarch64_mulx_elt<mode>"
3307 [(set (match_operand:VDQF 0 "register_operand" "=w")
3308 (unspec:VDQF
3309 [(match_operand:VDQF 1 "register_operand" "w")
3310 (vec_duplicate:VDQF
3311 (vec_select:<VEL>
3312 (match_operand:VDQF 2 "register_operand" "w")
3313 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3314 UNSPEC_FMULX))]
3315 "TARGET_SIMD"
3316 {
3317 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3318 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3319 }
3320 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3321 )
3322
3323 ;; vmulxq_lane
3324
3325 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3326 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3327 (unspec:VHSDF
3328 [(match_operand:VHSDF 1 "register_operand" "w")
3329 (vec_duplicate:VHSDF
3330 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3331 UNSPEC_FMULX))]
3332 "TARGET_SIMD"
3333 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3334 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3335 )
3336
3337 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3338 ;; vmulxd_lane_f64 == vmulx_lane_f64
3339 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3340
3341 (define_insn "*aarch64_vgetfmulx<mode>"
3342 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3343 (unspec:<VEL>
3344 [(match_operand:<VEL> 1 "register_operand" "w")
3345 (vec_select:<VEL>
3346 (match_operand:VDQF 2 "register_operand" "w")
3347 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3348 UNSPEC_FMULX))]
3349 "TARGET_SIMD"
3350 {
3351 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3352 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3353 }
3354 [(set_attr "type" "fmul<Vetype>")]
3355 )
3356 ;; <su>q<addsub>
3357
3358 (define_insn "aarch64_<su_optab><optab><mode>"
3359 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3360 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3361 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3362 "TARGET_SIMD"
3363 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3364 [(set_attr "type" "neon_<optab><q>")]
3365 )
3366
3367 ;; suqadd and usqadd
3368
3369 (define_insn "aarch64_<sur>qadd<mode>"
3370 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3371 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3372 (match_operand:VSDQ_I 2 "register_operand" "w")]
3373 USSUQADD))]
3374 "TARGET_SIMD"
3375 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3376 [(set_attr "type" "neon_qadd<q>")]
3377 )
3378
3379 ;; sqmovun
3380
3381 (define_insn "aarch64_sqmovun<mode>"
3382 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3383 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3384 UNSPEC_SQXTUN))]
3385 "TARGET_SIMD"
3386 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3387 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3388 )
3389
3390 ;; sqmovn and uqmovn
3391
3392 (define_insn "aarch64_<sur>qmovn<mode>"
3393 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3394 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3395 SUQMOVN))]
3396 "TARGET_SIMD"
3397 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3398 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3399 )
3400
3401 ;; <su>q<absneg>
3402
3403 (define_insn "aarch64_s<optab><mode>"
3404 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3405 (UNQOPS:VSDQ_I
3406 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3407 "TARGET_SIMD"
3408 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3409 [(set_attr "type" "neon_<optab><q>")]
3410 )
3411
3412 ;; sq<r>dmulh.
3413
3414 (define_insn "aarch64_sq<r>dmulh<mode>"
3415 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3416 (unspec:VSDQ_HSI
3417 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3418 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3419 VQDMULH))]
3420 "TARGET_SIMD"
3421 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3422 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3423 )
3424
3425 ;; sq<r>dmulh_lane
3426
3427 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3428 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3429 (unspec:VDQHS
3430 [(match_operand:VDQHS 1 "register_operand" "w")
3431 (vec_select:<VEL>
3432 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3433 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3434 VQDMULH))]
3435 "TARGET_SIMD"
3436 "*
3437 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3438 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3439 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3440 )
3441
3442 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3443 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3444 (unspec:VDQHS
3445 [(match_operand:VDQHS 1 "register_operand" "w")
3446 (vec_select:<VEL>
3447 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3448 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3449 VQDMULH))]
3450 "TARGET_SIMD"
3451 "*
3452 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3453 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3454 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3455 )
3456
3457 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3458 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3459 (unspec:SD_HSI
3460 [(match_operand:SD_HSI 1 "register_operand" "w")
3461 (vec_select:<VEL>
3462 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3463 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3464 VQDMULH))]
3465 "TARGET_SIMD"
3466 "*
3467 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3468 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3469 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3470 )
3471
3472 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3473 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3474 (unspec:SD_HSI
3475 [(match_operand:SD_HSI 1 "register_operand" "w")
3476 (vec_select:<VEL>
3477 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3478 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3479 VQDMULH))]
3480 "TARGET_SIMD"
3481 "*
3482 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3483 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3484 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3485 )
3486
3487 ;; sqrdml[as]h.
3488
3489 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3490 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3491 (unspec:VSDQ_HSI
3492 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3493 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3494 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3495 SQRDMLH_AS))]
3496 "TARGET_SIMD_RDMA"
3497 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3498 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3499 )
3500
3501 ;; sqrdml[as]h_lane.
3502
3503 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3504 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3505 (unspec:VDQHS
3506 [(match_operand:VDQHS 1 "register_operand" "0")
3507 (match_operand:VDQHS 2 "register_operand" "w")
3508 (vec_select:<VEL>
3509 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3510 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3511 SQRDMLH_AS))]
3512 "TARGET_SIMD_RDMA"
3513 {
3514 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3515 return
3516 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3517 }
3518 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3519 )
3520
3521 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3522 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3523 (unspec:SD_HSI
3524 [(match_operand:SD_HSI 1 "register_operand" "0")
3525 (match_operand:SD_HSI 2 "register_operand" "w")
3526 (vec_select:<VEL>
3527 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3528 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3529 SQRDMLH_AS))]
3530 "TARGET_SIMD_RDMA"
3531 {
3532 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3533 return
3534 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3535 }
3536 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3537 )
3538
3539 ;; sqrdml[as]h_laneq.
3540
3541 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3542 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3543 (unspec:VDQHS
3544 [(match_operand:VDQHS 1 "register_operand" "0")
3545 (match_operand:VDQHS 2 "register_operand" "w")
3546 (vec_select:<VEL>
3547 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3548 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3549 SQRDMLH_AS))]
3550 "TARGET_SIMD_RDMA"
3551 {
3552 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3553 return
3554 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3555 }
3556 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3557 )
3558
3559 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3560 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3561 (unspec:SD_HSI
3562 [(match_operand:SD_HSI 1 "register_operand" "0")
3563 (match_operand:SD_HSI 2 "register_operand" "w")
3564 (vec_select:<VEL>
3565 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3566 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3567 SQRDMLH_AS))]
3568 "TARGET_SIMD_RDMA"
3569 {
3570 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3571 return
3572 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3573 }
3574 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3575 )
3576
3577 ;; vqdml[sa]l
3578
3579 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3580 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3581 (SBINQOPS:<VWIDE>
3582 (match_operand:<VWIDE> 1 "register_operand" "0")
3583 (ss_ashift:<VWIDE>
3584 (mult:<VWIDE>
3585 (sign_extend:<VWIDE>
3586 (match_operand:VSD_HSI 2 "register_operand" "w"))
3587 (sign_extend:<VWIDE>
3588 (match_operand:VSD_HSI 3 "register_operand" "w")))
3589 (const_int 1))))]
3590 "TARGET_SIMD"
3591 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3592 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3593 )
3594
3595 ;; vqdml[sa]l_lane
3596
3597 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3598 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3599 (SBINQOPS:<VWIDE>
3600 (match_operand:<VWIDE> 1 "register_operand" "0")
3601 (ss_ashift:<VWIDE>
3602 (mult:<VWIDE>
3603 (sign_extend:<VWIDE>
3604 (match_operand:VD_HSI 2 "register_operand" "w"))
3605 (sign_extend:<VWIDE>
3606 (vec_duplicate:VD_HSI
3607 (vec_select:<VEL>
3608 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3609 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3610 ))
3611 (const_int 1))))]
3612 "TARGET_SIMD"
3613 {
3614 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3615 return
3616 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3617 }
3618 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3619 )
3620
3621 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3622 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3623 (SBINQOPS:<VWIDE>
3624 (match_operand:<VWIDE> 1 "register_operand" "0")
3625 (ss_ashift:<VWIDE>
3626 (mult:<VWIDE>
3627 (sign_extend:<VWIDE>
3628 (match_operand:VD_HSI 2 "register_operand" "w"))
3629 (sign_extend:<VWIDE>
3630 (vec_duplicate:VD_HSI
3631 (vec_select:<VEL>
3632 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3633 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3634 ))
3635 (const_int 1))))]
3636 "TARGET_SIMD"
3637 {
3638 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3639 return
3640 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3641 }
3642 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3643 )
3644
3645 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3646 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3647 (SBINQOPS:<VWIDE>
3648 (match_operand:<VWIDE> 1 "register_operand" "0")
3649 (ss_ashift:<VWIDE>
3650 (mult:<VWIDE>
3651 (sign_extend:<VWIDE>
3652 (match_operand:SD_HSI 2 "register_operand" "w"))
3653 (sign_extend:<VWIDE>
3654 (vec_select:<VEL>
3655 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3656 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3657 )
3658 (const_int 1))))]
3659 "TARGET_SIMD"
3660 {
3661 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3662 return
3663 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3664 }
3665 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3666 )
3667
3668 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3669 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3670 (SBINQOPS:<VWIDE>
3671 (match_operand:<VWIDE> 1 "register_operand" "0")
3672 (ss_ashift:<VWIDE>
3673 (mult:<VWIDE>
3674 (sign_extend:<VWIDE>
3675 (match_operand:SD_HSI 2 "register_operand" "w"))
3676 (sign_extend:<VWIDE>
3677 (vec_select:<VEL>
3678 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3679 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3680 )
3681 (const_int 1))))]
3682 "TARGET_SIMD"
3683 {
3684 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3685 return
3686 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3687 }
3688 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3689 )
3690
3691 ;; vqdml[sa]l_n
3692
3693 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3694 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3695 (SBINQOPS:<VWIDE>
3696 (match_operand:<VWIDE> 1 "register_operand" "0")
3697 (ss_ashift:<VWIDE>
3698 (mult:<VWIDE>
3699 (sign_extend:<VWIDE>
3700 (match_operand:VD_HSI 2 "register_operand" "w"))
3701 (sign_extend:<VWIDE>
3702 (vec_duplicate:VD_HSI
3703 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3704 (const_int 1))))]
3705 "TARGET_SIMD"
3706 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3707 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3708 )
3709
3710 ;; sqdml[as]l2
3711
3712 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3713 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3714 (SBINQOPS:<VWIDE>
3715 (match_operand:<VWIDE> 1 "register_operand" "0")
3716 (ss_ashift:<VWIDE>
3717 (mult:<VWIDE>
3718 (sign_extend:<VWIDE>
3719 (vec_select:<VHALF>
3720 (match_operand:VQ_HSI 2 "register_operand" "w")
3721 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3722 (sign_extend:<VWIDE>
3723 (vec_select:<VHALF>
3724 (match_operand:VQ_HSI 3 "register_operand" "w")
3725 (match_dup 4))))
3726 (const_int 1))))]
3727 "TARGET_SIMD"
3728 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3729 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3730 )
3731
3732 (define_expand "aarch64_sqdmlal2<mode>"
3733 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3734 (match_operand:<VWIDE> 1 "register_operand" "w")
3735 (match_operand:VQ_HSI 2 "register_operand" "w")
3736 (match_operand:VQ_HSI 3 "register_operand" "w")]
3737 "TARGET_SIMD"
3738 {
3739 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3740 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3741 operands[2], operands[3], p));
3742 DONE;
3743 })
3744
3745 (define_expand "aarch64_sqdmlsl2<mode>"
3746 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3747 (match_operand:<VWIDE> 1 "register_operand" "w")
3748 (match_operand:VQ_HSI 2 "register_operand" "w")
3749 (match_operand:VQ_HSI 3 "register_operand" "w")]
3750 "TARGET_SIMD"
3751 {
3752 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3753 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3754 operands[2], operands[3], p));
3755 DONE;
3756 })
3757
3758 ;; vqdml[sa]l2_lane
3759
3760 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3761 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3762 (SBINQOPS:<VWIDE>
3763 (match_operand:<VWIDE> 1 "register_operand" "0")
3764 (ss_ashift:<VWIDE>
3765 (mult:<VWIDE>
3766 (sign_extend:<VWIDE>
3767 (vec_select:<VHALF>
3768 (match_operand:VQ_HSI 2 "register_operand" "w")
3769 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3770 (sign_extend:<VWIDE>
3771 (vec_duplicate:<VHALF>
3772 (vec_select:<VEL>
3773 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3774 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3775 ))))
3776 (const_int 1))))]
3777 "TARGET_SIMD"
3778 {
3779 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3780 return
3781 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3782 }
3783 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3784 )
3785
3786 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3787 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3788 (SBINQOPS:<VWIDE>
3789 (match_operand:<VWIDE> 1 "register_operand" "0")
3790 (ss_ashift:<VWIDE>
3791 (mult:<VWIDE>
3792 (sign_extend:<VWIDE>
3793 (vec_select:<VHALF>
3794 (match_operand:VQ_HSI 2 "register_operand" "w")
3795 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3796 (sign_extend:<VWIDE>
3797 (vec_duplicate:<VHALF>
3798 (vec_select:<VEL>
3799 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3800 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3801 ))))
3802 (const_int 1))))]
3803 "TARGET_SIMD"
3804 {
3805 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3806 return
3807 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3808 }
3809 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3810 )
3811
3812 (define_expand "aarch64_sqdmlal2_lane<mode>"
3813 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3814 (match_operand:<VWIDE> 1 "register_operand" "w")
3815 (match_operand:VQ_HSI 2 "register_operand" "w")
3816 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3817 (match_operand:SI 4 "immediate_operand" "i")]
3818 "TARGET_SIMD"
3819 {
3820 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3821 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3822 operands[2], operands[3],
3823 operands[4], p));
3824 DONE;
3825 })
3826
3827 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3828 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3829 (match_operand:<VWIDE> 1 "register_operand" "w")
3830 (match_operand:VQ_HSI 2 "register_operand" "w")
3831 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3832 (match_operand:SI 4 "immediate_operand" "i")]
3833 "TARGET_SIMD"
3834 {
3835 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3836 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3837 operands[2], operands[3],
3838 operands[4], p));
3839 DONE;
3840 })
3841
3842 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3843 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3844 (match_operand:<VWIDE> 1 "register_operand" "w")
3845 (match_operand:VQ_HSI 2 "register_operand" "w")
3846 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3847 (match_operand:SI 4 "immediate_operand" "i")]
3848 "TARGET_SIMD"
3849 {
3850 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3851 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3852 operands[2], operands[3],
3853 operands[4], p));
3854 DONE;
3855 })
3856
3857 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3858 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3859 (match_operand:<VWIDE> 1 "register_operand" "w")
3860 (match_operand:VQ_HSI 2 "register_operand" "w")
3861 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3862 (match_operand:SI 4 "immediate_operand" "i")]
3863 "TARGET_SIMD"
3864 {
3865 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3866 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3867 operands[2], operands[3],
3868 operands[4], p));
3869 DONE;
3870 })
3871
3872 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3873 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3874 (SBINQOPS:<VWIDE>
3875 (match_operand:<VWIDE> 1 "register_operand" "0")
3876 (ss_ashift:<VWIDE>
3877 (mult:<VWIDE>
3878 (sign_extend:<VWIDE>
3879 (vec_select:<VHALF>
3880 (match_operand:VQ_HSI 2 "register_operand" "w")
3881 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3882 (sign_extend:<VWIDE>
3883 (vec_duplicate:<VHALF>
3884 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3885 (const_int 1))))]
3886 "TARGET_SIMD"
3887 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3888 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3889 )
3890
3891 (define_expand "aarch64_sqdmlal2_n<mode>"
3892 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3893 (match_operand:<VWIDE> 1 "register_operand" "w")
3894 (match_operand:VQ_HSI 2 "register_operand" "w")
3895 (match_operand:<VEL> 3 "register_operand" "w")]
3896 "TARGET_SIMD"
3897 {
3898 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3899 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3900 operands[2], operands[3],
3901 p));
3902 DONE;
3903 })
3904
3905 (define_expand "aarch64_sqdmlsl2_n<mode>"
3906 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3907 (match_operand:<VWIDE> 1 "register_operand" "w")
3908 (match_operand:VQ_HSI 2 "register_operand" "w")
3909 (match_operand:<VEL> 3 "register_operand" "w")]
3910 "TARGET_SIMD"
3911 {
3912 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3913 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3914 operands[2], operands[3],
3915 p));
3916 DONE;
3917 })
3918
3919 ;; vqdmull
3920
3921 (define_insn "aarch64_sqdmull<mode>"
3922 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3923 (ss_ashift:<VWIDE>
3924 (mult:<VWIDE>
3925 (sign_extend:<VWIDE>
3926 (match_operand:VSD_HSI 1 "register_operand" "w"))
3927 (sign_extend:<VWIDE>
3928 (match_operand:VSD_HSI 2 "register_operand" "w")))
3929 (const_int 1)))]
3930 "TARGET_SIMD"
3931 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3932 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3933 )
3934
3935 ;; vqdmull_lane
3936
3937 (define_insn "aarch64_sqdmull_lane<mode>"
3938 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3939 (ss_ashift:<VWIDE>
3940 (mult:<VWIDE>
3941 (sign_extend:<VWIDE>
3942 (match_operand:VD_HSI 1 "register_operand" "w"))
3943 (sign_extend:<VWIDE>
3944 (vec_duplicate:VD_HSI
3945 (vec_select:<VEL>
3946 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3947 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3948 ))
3949 (const_int 1)))]
3950 "TARGET_SIMD"
3951 {
3952 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3953 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3954 }
3955 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3956 )
3957
3958 (define_insn "aarch64_sqdmull_laneq<mode>"
3959 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3960 (ss_ashift:<VWIDE>
3961 (mult:<VWIDE>
3962 (sign_extend:<VWIDE>
3963 (match_operand:VD_HSI 1 "register_operand" "w"))
3964 (sign_extend:<VWIDE>
3965 (vec_duplicate:VD_HSI
3966 (vec_select:<VEL>
3967 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3968 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3969 ))
3970 (const_int 1)))]
3971 "TARGET_SIMD"
3972 {
3973 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3974 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3975 }
3976 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3977 )
3978
3979 (define_insn "aarch64_sqdmull_lane<mode>"
3980 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3981 (ss_ashift:<VWIDE>
3982 (mult:<VWIDE>
3983 (sign_extend:<VWIDE>
3984 (match_operand:SD_HSI 1 "register_operand" "w"))
3985 (sign_extend:<VWIDE>
3986 (vec_select:<VEL>
3987 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3988 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3989 ))
3990 (const_int 1)))]
3991 "TARGET_SIMD"
3992 {
3993 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3994 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3995 }
3996 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3997 )
3998
3999 (define_insn "aarch64_sqdmull_laneq<mode>"
4000 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4001 (ss_ashift:<VWIDE>
4002 (mult:<VWIDE>
4003 (sign_extend:<VWIDE>
4004 (match_operand:SD_HSI 1 "register_operand" "w"))
4005 (sign_extend:<VWIDE>
4006 (vec_select:<VEL>
4007 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4008 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4009 ))
4010 (const_int 1)))]
4011 "TARGET_SIMD"
4012 {
4013 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4014 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4015 }
4016 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4017 )
4018
4019 ;; vqdmull_n
4020
4021 (define_insn "aarch64_sqdmull_n<mode>"
4022 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4023 (ss_ashift:<VWIDE>
4024 (mult:<VWIDE>
4025 (sign_extend:<VWIDE>
4026 (match_operand:VD_HSI 1 "register_operand" "w"))
4027 (sign_extend:<VWIDE>
4028 (vec_duplicate:VD_HSI
4029 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4030 )
4031 (const_int 1)))]
4032 "TARGET_SIMD"
4033 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4034 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4035 )
4036
4037 ;; vqdmull2
4038
4039
4040
4041 (define_insn "aarch64_sqdmull2<mode>_internal"
4042 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4043 (ss_ashift:<VWIDE>
4044 (mult:<VWIDE>
4045 (sign_extend:<VWIDE>
4046 (vec_select:<VHALF>
4047 (match_operand:VQ_HSI 1 "register_operand" "w")
4048 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4049 (sign_extend:<VWIDE>
4050 (vec_select:<VHALF>
4051 (match_operand:VQ_HSI 2 "register_operand" "w")
4052 (match_dup 3)))
4053 )
4054 (const_int 1)))]
4055 "TARGET_SIMD"
4056 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4057 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4058 )
4059
4060 (define_expand "aarch64_sqdmull2<mode>"
4061 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4062 (match_operand:VQ_HSI 1 "register_operand" "w")
4063 (match_operand:VQ_HSI 2 "register_operand" "w")]
4064 "TARGET_SIMD"
4065 {
4066 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4067 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4068 operands[2], p));
4069 DONE;
4070 })
4071
4072 ;; vqdmull2_lane
4073
4074 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4075 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4076 (ss_ashift:<VWIDE>
4077 (mult:<VWIDE>
4078 (sign_extend:<VWIDE>
4079 (vec_select:<VHALF>
4080 (match_operand:VQ_HSI 1 "register_operand" "w")
4081 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4082 (sign_extend:<VWIDE>
4083 (vec_duplicate:<VHALF>
4084 (vec_select:<VEL>
4085 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4086 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4087 ))
4088 (const_int 1)))]
4089 "TARGET_SIMD"
4090 {
4091 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4092 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4093 }
4094 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4095 )
4096
4097 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4098 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4099 (ss_ashift:<VWIDE>
4100 (mult:<VWIDE>
4101 (sign_extend:<VWIDE>
4102 (vec_select:<VHALF>
4103 (match_operand:VQ_HSI 1 "register_operand" "w")
4104 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4105 (sign_extend:<VWIDE>
4106 (vec_duplicate:<VHALF>
4107 (vec_select:<VEL>
4108 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4109 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4110 ))
4111 (const_int 1)))]
4112 "TARGET_SIMD"
4113 {
4114 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4115 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4116 }
4117 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4118 )
4119
4120 (define_expand "aarch64_sqdmull2_lane<mode>"
4121 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4122 (match_operand:VQ_HSI 1 "register_operand" "w")
4123 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4124 (match_operand:SI 3 "immediate_operand" "i")]
4125 "TARGET_SIMD"
4126 {
4127 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4128 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4129 operands[2], operands[3],
4130 p));
4131 DONE;
4132 })
4133
4134 (define_expand "aarch64_sqdmull2_laneq<mode>"
4135 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4136 (match_operand:VQ_HSI 1 "register_operand" "w")
4137 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4138 (match_operand:SI 3 "immediate_operand" "i")]
4139 "TARGET_SIMD"
4140 {
4141 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4142 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4143 operands[2], operands[3],
4144 p));
4145 DONE;
4146 })
4147
4148 ;; vqdmull2_n
4149
4150 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4151 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4152 (ss_ashift:<VWIDE>
4153 (mult:<VWIDE>
4154 (sign_extend:<VWIDE>
4155 (vec_select:<VHALF>
4156 (match_operand:VQ_HSI 1 "register_operand" "w")
4157 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4158 (sign_extend:<VWIDE>
4159 (vec_duplicate:<VHALF>
4160 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4161 )
4162 (const_int 1)))]
4163 "TARGET_SIMD"
4164 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4165 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4166 )
4167
4168 (define_expand "aarch64_sqdmull2_n<mode>"
4169 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4170 (match_operand:VQ_HSI 1 "register_operand" "w")
4171 (match_operand:<VEL> 2 "register_operand" "w")]
4172 "TARGET_SIMD"
4173 {
4174 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4175 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4176 operands[2], p));
4177 DONE;
4178 })
4179
4180 ;; vshl
4181
4182 (define_insn "aarch64_<sur>shl<mode>"
4183 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4184 (unspec:VSDQ_I_DI
4185 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4186 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4187 VSHL))]
4188 "TARGET_SIMD"
4189 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4190 [(set_attr "type" "neon_shift_reg<q>")]
4191 )
4192
4193
4194 ;; vqshl
4195
4196 (define_insn "aarch64_<sur>q<r>shl<mode>"
4197 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4198 (unspec:VSDQ_I
4199 [(match_operand:VSDQ_I 1 "register_operand" "w")
4200 (match_operand:VSDQ_I 2 "register_operand" "w")]
4201 VQSHL))]
4202 "TARGET_SIMD"
4203 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4204 [(set_attr "type" "neon_sat_shift_reg<q>")]
4205 )
4206
4207 ;; vshll_n
4208
4209 (define_insn "aarch64_<sur>shll_n<mode>"
4210 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4211 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4212 (match_operand:SI 2
4213 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4214 VSHLL))]
4215 "TARGET_SIMD"
4216 {
4217 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4218 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4219 else
4220 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4221 }
4222 [(set_attr "type" "neon_shift_imm_long")]
4223 )
4224
4225 ;; vshll_high_n
4226
4227 (define_insn "aarch64_<sur>shll2_n<mode>"
4228 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4229 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4230 (match_operand:SI 2 "immediate_operand" "i")]
4231 VSHLL))]
4232 "TARGET_SIMD"
4233 {
4234 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4235 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4236 else
4237 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4238 }
4239 [(set_attr "type" "neon_shift_imm_long")]
4240 )
4241
4242 ;; vrshr_n
4243
4244 (define_insn "aarch64_<sur>shr_n<mode>"
4245 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4246 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4247 (match_operand:SI 2
4248 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4249 VRSHR_N))]
4250 "TARGET_SIMD"
4251 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4252 [(set_attr "type" "neon_sat_shift_imm<q>")]
4253 )
4254
4255 ;; v(r)sra_n
4256
4257 (define_insn "aarch64_<sur>sra_n<mode>"
4258 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4259 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4260 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4261 (match_operand:SI 3
4262 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4263 VSRA))]
4264 "TARGET_SIMD"
4265 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4266 [(set_attr "type" "neon_shift_acc<q>")]
4267 )
4268
4269 ;; vs<lr>i_n
4270
4271 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4272 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4273 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4274 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4275 (match_operand:SI 3
4276 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4277 VSLRI))]
4278 "TARGET_SIMD"
4279 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4280 [(set_attr "type" "neon_shift_imm<q>")]
4281 )
4282
4283 ;; vqshl(u)
4284
4285 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4286 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4287 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4288 (match_operand:SI 2
4289 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4290 VQSHL_N))]
4291 "TARGET_SIMD"
4292 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4293 [(set_attr "type" "neon_sat_shift_imm<q>")]
4294 )
4295
4296
4297 ;; vq(r)shr(u)n_n
4298
4299 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4300 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4301 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4302 (match_operand:SI 2
4303 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4304 VQSHRN_N))]
4305 "TARGET_SIMD"
4306 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4307 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4308 )
4309
4310
4311 ;; cm(eq|ge|gt|lt|le)
4312 ;; Note, we have constraints for Dz and Z as different expanders
4313 ;; have different ideas of what should be passed to this pattern.
4314
4315 (define_insn "aarch64_cm<optab><mode>"
4316 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4317 (neg:<V_INT_EQUIV>
4318 (COMPARISONS:<V_INT_EQUIV>
4319 (match_operand:VDQ_I 1 "register_operand" "w,w")
4320 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4321 )))]
4322 "TARGET_SIMD"
4323 "@
4324 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4325 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4326 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4327 )
4328
4329 (define_insn_and_split "aarch64_cm<optab>di"
4330 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4331 (neg:DI
4332 (COMPARISONS:DI
4333 (match_operand:DI 1 "register_operand" "w,w,r")
4334 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4335 )))
4336 (clobber (reg:CC CC_REGNUM))]
4337 "TARGET_SIMD"
4338 "#"
4339 "reload_completed"
4340 [(set (match_operand:DI 0 "register_operand")
4341 (neg:DI
4342 (COMPARISONS:DI
4343 (match_operand:DI 1 "register_operand")
4344 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4345 )))]
4346 {
4347 /* If we are in the general purpose register file,
4348 we split to a sequence of comparison and store. */
4349 if (GP_REGNUM_P (REGNO (operands[0]))
4350 && GP_REGNUM_P (REGNO (operands[1])))
4351 {
4352 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4353 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4354 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4355 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4356 DONE;
4357 }
4358 /* Otherwise, we expand to a similar pattern which does not
4359 clobber CC_REGNUM. */
4360 }
4361 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4362 )
4363
4364 (define_insn "*aarch64_cm<optab>di"
4365 [(set (match_operand:DI 0 "register_operand" "=w,w")
4366 (neg:DI
4367 (COMPARISONS:DI
4368 (match_operand:DI 1 "register_operand" "w,w")
4369 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4370 )))]
4371 "TARGET_SIMD && reload_completed"
4372 "@
4373 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4374 cm<optab>\t%d0, %d1, #0"
4375 [(set_attr "type" "neon_compare, neon_compare_zero")]
4376 )
4377
4378 ;; cm(hs|hi)
4379
4380 (define_insn "aarch64_cm<optab><mode>"
4381 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4382 (neg:<V_INT_EQUIV>
4383 (UCOMPARISONS:<V_INT_EQUIV>
4384 (match_operand:VDQ_I 1 "register_operand" "w")
4385 (match_operand:VDQ_I 2 "register_operand" "w")
4386 )))]
4387 "TARGET_SIMD"
4388 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4389 [(set_attr "type" "neon_compare<q>")]
4390 )
4391
4392 (define_insn_and_split "aarch64_cm<optab>di"
4393 [(set (match_operand:DI 0 "register_operand" "=w,r")
4394 (neg:DI
4395 (UCOMPARISONS:DI
4396 (match_operand:DI 1 "register_operand" "w,r")
4397 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4398 )))
4399 (clobber (reg:CC CC_REGNUM))]
4400 "TARGET_SIMD"
4401 "#"
4402 "reload_completed"
4403 [(set (match_operand:DI 0 "register_operand")
4404 (neg:DI
4405 (UCOMPARISONS:DI
4406 (match_operand:DI 1 "register_operand")
4407 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4408 )))]
4409 {
4410 /* If we are in the general purpose register file,
4411 we split to a sequence of comparison and store. */
4412 if (GP_REGNUM_P (REGNO (operands[0]))
4413 && GP_REGNUM_P (REGNO (operands[1])))
4414 {
4415 machine_mode mode = CCmode;
4416 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4417 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4418 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4419 DONE;
4420 }
4421 /* Otherwise, we expand to a similar pattern which does not
4422 clobber CC_REGNUM. */
4423 }
4424 [(set_attr "type" "neon_compare,multiple")]
4425 )
4426
4427 (define_insn "*aarch64_cm<optab>di"
4428 [(set (match_operand:DI 0 "register_operand" "=w")
4429 (neg:DI
4430 (UCOMPARISONS:DI
4431 (match_operand:DI 1 "register_operand" "w")
4432 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4433 )))]
4434 "TARGET_SIMD && reload_completed"
4435 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4436 [(set_attr "type" "neon_compare")]
4437 )
4438
4439 ;; cmtst
4440
4441 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4442 ;; we don't have any insns using ne, and aarch64_vcond outputs
4443 ;; not (neg (eq (and x y) 0))
4444 ;; which is rewritten by simplify_rtx as
4445 ;; plus (eq (and x y) 0) -1.
4446
4447 (define_insn "aarch64_cmtst<mode>"
4448 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4449 (plus:<V_INT_EQUIV>
4450 (eq:<V_INT_EQUIV>
4451 (and:VDQ_I
4452 (match_operand:VDQ_I 1 "register_operand" "w")
4453 (match_operand:VDQ_I 2 "register_operand" "w"))
4454 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4455 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4456 ]
4457 "TARGET_SIMD"
4458 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4459 [(set_attr "type" "neon_tst<q>")]
4460 )
4461
4462 (define_insn_and_split "aarch64_cmtstdi"
4463 [(set (match_operand:DI 0 "register_operand" "=w,r")
4464 (neg:DI
4465 (ne:DI
4466 (and:DI
4467 (match_operand:DI 1 "register_operand" "w,r")
4468 (match_operand:DI 2 "register_operand" "w,r"))
4469 (const_int 0))))
4470 (clobber (reg:CC CC_REGNUM))]
4471 "TARGET_SIMD"
4472 "#"
4473 "reload_completed"
4474 [(set (match_operand:DI 0 "register_operand")
4475 (neg:DI
4476 (ne:DI
4477 (and:DI
4478 (match_operand:DI 1 "register_operand")
4479 (match_operand:DI 2 "register_operand"))
4480 (const_int 0))))]
4481 {
4482 /* If we are in the general purpose register file,
4483 we split to a sequence of comparison and store. */
4484 if (GP_REGNUM_P (REGNO (operands[0]))
4485 && GP_REGNUM_P (REGNO (operands[1])))
4486 {
4487 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4488 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4489 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4490 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4491 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4492 DONE;
4493 }
4494 /* Otherwise, we expand to a similar pattern which does not
4495 clobber CC_REGNUM. */
4496 }
4497 [(set_attr "type" "neon_tst,multiple")]
4498 )
4499
4500 (define_insn "*aarch64_cmtstdi"
4501 [(set (match_operand:DI 0 "register_operand" "=w")
4502 (neg:DI
4503 (ne:DI
4504 (and:DI
4505 (match_operand:DI 1 "register_operand" "w")
4506 (match_operand:DI 2 "register_operand" "w"))
4507 (const_int 0))))]
4508 "TARGET_SIMD"
4509 "cmtst\t%d0, %d1, %d2"
4510 [(set_attr "type" "neon_tst")]
4511 )
4512
4513 ;; fcm(eq|ge|gt|le|lt)
4514
4515 (define_insn "aarch64_cm<optab><mode>"
4516 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4517 (neg:<V_INT_EQUIV>
4518 (COMPARISONS:<V_INT_EQUIV>
4519 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4520 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4521 )))]
4522 "TARGET_SIMD"
4523 "@
4524 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4525 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4526 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4527 )
4528
4529 ;; fac(ge|gt)
4530 ;; Note we can also handle what would be fac(le|lt) by
4531 ;; generating fac(ge|gt).
4532
4533 (define_insn "aarch64_fac<optab><mode>"
4534 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4535 (neg:<V_INT_EQUIV>
4536 (FAC_COMPARISONS:<V_INT_EQUIV>
4537 (abs:VHSDF_HSDF
4538 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4539 (abs:VHSDF_HSDF
4540 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4541 )))]
4542 "TARGET_SIMD"
4543 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4544 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4545 )
4546
4547 ;; addp
4548
4549 (define_insn "aarch64_addp<mode>"
4550 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4551 (unspec:VD_BHSI
4552 [(match_operand:VD_BHSI 1 "register_operand" "w")
4553 (match_operand:VD_BHSI 2 "register_operand" "w")]
4554 UNSPEC_ADDP))]
4555 "TARGET_SIMD"
4556 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4557 [(set_attr "type" "neon_reduc_add<q>")]
4558 )
4559
4560 (define_insn "aarch64_addpdi"
4561 [(set (match_operand:DI 0 "register_operand" "=w")
4562 (unspec:DI
4563 [(match_operand:V2DI 1 "register_operand" "w")]
4564 UNSPEC_ADDP))]
4565 "TARGET_SIMD"
4566 "addp\t%d0, %1.2d"
4567 [(set_attr "type" "neon_reduc_add")]
4568 )
4569
4570 ;; sqrt
4571
4572 (define_expand "sqrt<mode>2"
4573 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4574 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4575 "TARGET_SIMD"
4576 {
4577 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4578 DONE;
4579 })
4580
4581 (define_insn "*sqrt<mode>2"
4582 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4583 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4584 "TARGET_SIMD"
4585 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4586 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4587 )
4588
4589 ;; Patterns for vector struct loads and stores.
4590
4591 (define_insn "aarch64_simd_ld2<mode>"
4592 [(set (match_operand:OI 0 "register_operand" "=w")
4593 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4594 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4595 UNSPEC_LD2))]
4596 "TARGET_SIMD"
4597 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4598 [(set_attr "type" "neon_load2_2reg<q>")]
4599 )
4600
4601 (define_insn "aarch64_simd_ld2r<mode>"
4602 [(set (match_operand:OI 0 "register_operand" "=w")
4603 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4604 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4605 UNSPEC_LD2_DUP))]
4606 "TARGET_SIMD"
4607 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4608 [(set_attr "type" "neon_load2_all_lanes<q>")]
4609 )
4610
4611 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4612 [(set (match_operand:OI 0 "register_operand" "=w")
4613 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4614 (match_operand:OI 2 "register_operand" "0")
4615 (match_operand:SI 3 "immediate_operand" "i")
4616 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4617 UNSPEC_LD2_LANE))]
4618 "TARGET_SIMD"
4619 {
4620 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4621 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4622 }
4623 [(set_attr "type" "neon_load2_one_lane")]
4624 )
4625
4626 (define_expand "vec_load_lanesoi<mode>"
4627 [(set (match_operand:OI 0 "register_operand" "=w")
4628 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4629 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4630 UNSPEC_LD2))]
4631 "TARGET_SIMD"
4632 {
4633 if (BYTES_BIG_ENDIAN)
4634 {
4635 rtx tmp = gen_reg_rtx (OImode);
4636 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4637 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4638 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4639 }
4640 else
4641 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4642 DONE;
4643 })
4644
4645 (define_insn "aarch64_simd_st2<mode>"
4646 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4647 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4648 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4649 UNSPEC_ST2))]
4650 "TARGET_SIMD"
4651 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4652 [(set_attr "type" "neon_store2_2reg<q>")]
4653 )
4654
4655 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4656 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4657 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4658 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4659 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4660 (match_operand:SI 2 "immediate_operand" "i")]
4661 UNSPEC_ST2_LANE))]
4662 "TARGET_SIMD"
4663 {
4664 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4665 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4666 }
4667 [(set_attr "type" "neon_store2_one_lane<q>")]
4668 )
4669
4670 (define_expand "vec_store_lanesoi<mode>"
4671 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4672 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4673 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4674 UNSPEC_ST2))]
4675 "TARGET_SIMD"
4676 {
4677 if (BYTES_BIG_ENDIAN)
4678 {
4679 rtx tmp = gen_reg_rtx (OImode);
4680 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4681 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4682 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4683 }
4684 else
4685 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4686 DONE;
4687 })
4688
4689 (define_insn "aarch64_simd_ld3<mode>"
4690 [(set (match_operand:CI 0 "register_operand" "=w")
4691 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4692 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4693 UNSPEC_LD3))]
4694 "TARGET_SIMD"
4695 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4696 [(set_attr "type" "neon_load3_3reg<q>")]
4697 )
4698
4699 (define_insn "aarch64_simd_ld3r<mode>"
4700 [(set (match_operand:CI 0 "register_operand" "=w")
4701 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4702 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4703 UNSPEC_LD3_DUP))]
4704 "TARGET_SIMD"
4705 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4706 [(set_attr "type" "neon_load3_all_lanes<q>")]
4707 )
4708
4709 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4710 [(set (match_operand:CI 0 "register_operand" "=w")
4711 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4712 (match_operand:CI 2 "register_operand" "0")
4713 (match_operand:SI 3 "immediate_operand" "i")
4714 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4715 UNSPEC_LD3_LANE))]
4716 "TARGET_SIMD"
4717 {
4718 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4719 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4720 }
4721 [(set_attr "type" "neon_load3_one_lane")]
4722 )
4723
4724 (define_expand "vec_load_lanesci<mode>"
4725 [(set (match_operand:CI 0 "register_operand" "=w")
4726 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4727 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4728 UNSPEC_LD3))]
4729 "TARGET_SIMD"
4730 {
4731 if (BYTES_BIG_ENDIAN)
4732 {
4733 rtx tmp = gen_reg_rtx (CImode);
4734 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4735 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4736 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4737 }
4738 else
4739 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4740 DONE;
4741 })
4742
4743 (define_insn "aarch64_simd_st3<mode>"
4744 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4745 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4746 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4747 UNSPEC_ST3))]
4748 "TARGET_SIMD"
4749 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4750 [(set_attr "type" "neon_store3_3reg<q>")]
4751 )
4752
4753 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4754 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4755 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4756 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4757 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4758 (match_operand:SI 2 "immediate_operand" "i")]
4759 UNSPEC_ST3_LANE))]
4760 "TARGET_SIMD"
4761 {
4762 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4763 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4764 }
4765 [(set_attr "type" "neon_store3_one_lane<q>")]
4766 )
4767
4768 (define_expand "vec_store_lanesci<mode>"
4769 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4770 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4771 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4772 UNSPEC_ST3))]
4773 "TARGET_SIMD"
4774 {
4775 if (BYTES_BIG_ENDIAN)
4776 {
4777 rtx tmp = gen_reg_rtx (CImode);
4778 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4779 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4780 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4781 }
4782 else
4783 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4784 DONE;
4785 })
4786
4787 (define_insn "aarch64_simd_ld4<mode>"
4788 [(set (match_operand:XI 0 "register_operand" "=w")
4789 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4790 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4791 UNSPEC_LD4))]
4792 "TARGET_SIMD"
4793 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4794 [(set_attr "type" "neon_load4_4reg<q>")]
4795 )
4796
4797 (define_insn "aarch64_simd_ld4r<mode>"
4798 [(set (match_operand:XI 0 "register_operand" "=w")
4799 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4800 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4801 UNSPEC_LD4_DUP))]
4802 "TARGET_SIMD"
4803 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4804 [(set_attr "type" "neon_load4_all_lanes<q>")]
4805 )
4806
4807 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4808 [(set (match_operand:XI 0 "register_operand" "=w")
4809 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4810 (match_operand:XI 2 "register_operand" "0")
4811 (match_operand:SI 3 "immediate_operand" "i")
4812 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4813 UNSPEC_LD4_LANE))]
4814 "TARGET_SIMD"
4815 {
4816 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4817 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4818 }
4819 [(set_attr "type" "neon_load4_one_lane")]
4820 )
4821
4822 (define_expand "vec_load_lanesxi<mode>"
4823 [(set (match_operand:XI 0 "register_operand" "=w")
4824 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4825 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4826 UNSPEC_LD4))]
4827 "TARGET_SIMD"
4828 {
4829 if (BYTES_BIG_ENDIAN)
4830 {
4831 rtx tmp = gen_reg_rtx (XImode);
4832 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4833 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4834 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4835 }
4836 else
4837 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4838 DONE;
4839 })
4840
4841 (define_insn "aarch64_simd_st4<mode>"
4842 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4843 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4844 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4845 UNSPEC_ST4))]
4846 "TARGET_SIMD"
4847 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4848 [(set_attr "type" "neon_store4_4reg<q>")]
4849 )
4850
4851 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4852 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4853 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4854 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4855 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4856 (match_operand:SI 2 "immediate_operand" "i")]
4857 UNSPEC_ST4_LANE))]
4858 "TARGET_SIMD"
4859 {
4860 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4861 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4862 }
4863 [(set_attr "type" "neon_store4_one_lane<q>")]
4864 )
4865
4866 (define_expand "vec_store_lanesxi<mode>"
4867 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4868 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4869 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4870 UNSPEC_ST4))]
4871 "TARGET_SIMD"
4872 {
4873 if (BYTES_BIG_ENDIAN)
4874 {
4875 rtx tmp = gen_reg_rtx (XImode);
4876 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4877 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4878 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4879 }
4880 else
4881 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4882 DONE;
4883 })
4884
4885 (define_insn_and_split "aarch64_rev_reglist<mode>"
4886 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4887 (unspec:VSTRUCT
4888 [(match_operand:VSTRUCT 1 "register_operand" "w")
4889 (match_operand:V16QI 2 "register_operand" "w")]
4890 UNSPEC_REV_REGLIST))]
4891 "TARGET_SIMD"
4892 "#"
4893 "&& reload_completed"
4894 [(const_int 0)]
4895 {
4896 int i;
4897 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4898 for (i = 0; i < nregs; i++)
4899 {
4900 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4901 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4902 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4903 }
4904 DONE;
4905 }
4906 [(set_attr "type" "neon_tbl1_q")
4907 (set_attr "length" "<insn_count>")]
4908 )
4909
4910 ;; Reload patterns for AdvSIMD register list operands.
4911
4912 (define_expand "mov<mode>"
4913 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4914 (match_operand:VSTRUCT 1 "general_operand" ""))]
4915 "TARGET_SIMD"
4916 {
4917 if (can_create_pseudo_p ())
4918 {
4919 if (GET_CODE (operands[0]) != REG)
4920 operands[1] = force_reg (<MODE>mode, operands[1]);
4921 }
4922 })
4923
4924 (define_insn "*aarch64_mov<mode>"
4925 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4926 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4927 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4928 && (register_operand (operands[0], <MODE>mode)
4929 || register_operand (operands[1], <MODE>mode))"
4930 "@
4931 #
4932 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4933 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4934 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4935 neon_load<nregs>_<nregs>reg_q")
4936 (set_attr "length" "<insn_count>,4,4")]
4937 )
4938
4939 (define_insn "aarch64_be_ld1<mode>"
4940 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4941 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4942 "aarch64_simd_struct_operand" "Utv")]
4943 UNSPEC_LD1))]
4944 "TARGET_SIMD"
4945 "ld1\\t{%0<Vmtype>}, %1"
4946 [(set_attr "type" "neon_load1_1reg<q>")]
4947 )
4948
4949 (define_insn "aarch64_be_st1<mode>"
4950 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4951 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4952 UNSPEC_ST1))]
4953 "TARGET_SIMD"
4954 "st1\\t{%1<Vmtype>}, %0"
4955 [(set_attr "type" "neon_store1_1reg<q>")]
4956 )
4957
4958 (define_insn "*aarch64_be_movoi"
4959 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4960 (match_operand:OI 1 "general_operand" " w,w,m"))]
4961 "TARGET_SIMD && BYTES_BIG_ENDIAN
4962 && (register_operand (operands[0], OImode)
4963 || register_operand (operands[1], OImode))"
4964 "@
4965 #
4966 stp\\t%q1, %R1, %0
4967 ldp\\t%q0, %R0, %1"
4968 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4969 (set_attr "length" "8,4,4")]
4970 )
4971
4972 (define_insn "*aarch64_be_movci"
4973 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4974 (match_operand:CI 1 "general_operand" " w,w,o"))]
4975 "TARGET_SIMD && BYTES_BIG_ENDIAN
4976 && (register_operand (operands[0], CImode)
4977 || register_operand (operands[1], CImode))"
4978 "#"
4979 [(set_attr "type" "multiple")
4980 (set_attr "length" "12,4,4")]
4981 )
4982
4983 (define_insn "*aarch64_be_movxi"
4984 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4985 (match_operand:XI 1 "general_operand" " w,w,o"))]
4986 "TARGET_SIMD && BYTES_BIG_ENDIAN
4987 && (register_operand (operands[0], XImode)
4988 || register_operand (operands[1], XImode))"
4989 "#"
4990 [(set_attr "type" "multiple")
4991 (set_attr "length" "16,4,4")]
4992 )
4993
4994 (define_split
4995 [(set (match_operand:OI 0 "register_operand")
4996 (match_operand:OI 1 "register_operand"))]
4997 "TARGET_SIMD && reload_completed"
4998 [(const_int 0)]
4999 {
5000 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5001 DONE;
5002 })
5003
5004 (define_split
5005 [(set (match_operand:CI 0 "nonimmediate_operand")
5006 (match_operand:CI 1 "general_operand"))]
5007 "TARGET_SIMD && reload_completed"
5008 [(const_int 0)]
5009 {
5010 if (register_operand (operands[0], CImode)
5011 && register_operand (operands[1], CImode))
5012 {
5013 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5014 DONE;
5015 }
5016 else if (BYTES_BIG_ENDIAN)
5017 {
5018 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5019 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5020 emit_move_insn (gen_lowpart (V16QImode,
5021 simplify_gen_subreg (TImode, operands[0],
5022 CImode, 32)),
5023 gen_lowpart (V16QImode,
5024 simplify_gen_subreg (TImode, operands[1],
5025 CImode, 32)));
5026 DONE;
5027 }
5028 else
5029 FAIL;
5030 })
5031
5032 (define_split
5033 [(set (match_operand:XI 0 "nonimmediate_operand")
5034 (match_operand:XI 1 "general_operand"))]
5035 "TARGET_SIMD && reload_completed"
5036 [(const_int 0)]
5037 {
5038 if (register_operand (operands[0], XImode)
5039 && register_operand (operands[1], XImode))
5040 {
5041 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5042 DONE;
5043 }
5044 else if (BYTES_BIG_ENDIAN)
5045 {
5046 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5047 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5048 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5049 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5050 DONE;
5051 }
5052 else
5053 FAIL;
5054 })
5055
5056 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5057 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5058 (match_operand:DI 1 "register_operand" "w")
5059 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5060 "TARGET_SIMD"
5061 {
5062 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5063 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5064 * <VSTRUCT:nregs>);
5065
5066 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5067 mem));
5068 DONE;
5069 })
5070
5071 (define_insn "aarch64_ld2<mode>_dreg"
5072 [(set (match_operand:OI 0 "register_operand" "=w")
5073 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5074 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5075 UNSPEC_LD2_DREG))]
5076 "TARGET_SIMD"
5077 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5078 [(set_attr "type" "neon_load2_2reg<q>")]
5079 )
5080
5081 (define_insn "aarch64_ld2<mode>_dreg"
5082 [(set (match_operand:OI 0 "register_operand" "=w")
5083 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5084 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5085 UNSPEC_LD2_DREG))]
5086 "TARGET_SIMD"
5087 "ld1\\t{%S0.1d - %T0.1d}, %1"
5088 [(set_attr "type" "neon_load1_2reg<q>")]
5089 )
5090
5091 (define_insn "aarch64_ld3<mode>_dreg"
5092 [(set (match_operand:CI 0 "register_operand" "=w")
5093 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5094 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5095 UNSPEC_LD3_DREG))]
5096 "TARGET_SIMD"
5097 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5098 [(set_attr "type" "neon_load3_3reg<q>")]
5099 )
5100
5101 (define_insn "aarch64_ld3<mode>_dreg"
5102 [(set (match_operand:CI 0 "register_operand" "=w")
5103 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5104 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5105 UNSPEC_LD3_DREG))]
5106 "TARGET_SIMD"
5107 "ld1\\t{%S0.1d - %U0.1d}, %1"
5108 [(set_attr "type" "neon_load1_3reg<q>")]
5109 )
5110
5111 (define_insn "aarch64_ld4<mode>_dreg"
5112 [(set (match_operand:XI 0 "register_operand" "=w")
5113 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5114 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5115 UNSPEC_LD4_DREG))]
5116 "TARGET_SIMD"
5117 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5118 [(set_attr "type" "neon_load4_4reg<q>")]
5119 )
5120
5121 (define_insn "aarch64_ld4<mode>_dreg"
5122 [(set (match_operand:XI 0 "register_operand" "=w")
5123 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5124 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5125 UNSPEC_LD4_DREG))]
5126 "TARGET_SIMD"
5127 "ld1\\t{%S0.1d - %V0.1d}, %1"
5128 [(set_attr "type" "neon_load1_4reg<q>")]
5129 )
5130
5131 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5132 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5133 (match_operand:DI 1 "register_operand" "r")
5134 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5135 "TARGET_SIMD"
5136 {
5137 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5138 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5139
5140 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5141 DONE;
5142 })
5143
5144 (define_expand "aarch64_ld1<VALL_F16:mode>"
5145 [(match_operand:VALL_F16 0 "register_operand")
5146 (match_operand:DI 1 "register_operand")]
5147 "TARGET_SIMD"
5148 {
5149 machine_mode mode = <VALL_F16:MODE>mode;
5150 rtx mem = gen_rtx_MEM (mode, operands[1]);
5151
5152 if (BYTES_BIG_ENDIAN)
5153 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5154 else
5155 emit_move_insn (operands[0], mem);
5156 DONE;
5157 })
5158
5159 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5160 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5161 (match_operand:DI 1 "register_operand" "r")
5162 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5163 "TARGET_SIMD"
5164 {
5165 machine_mode mode = <VSTRUCT:MODE>mode;
5166 rtx mem = gen_rtx_MEM (mode, operands[1]);
5167
5168 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5169 DONE;
5170 })
5171
5172 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5173 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5174 (match_operand:DI 1 "register_operand" "w")
5175 (match_operand:VSTRUCT 2 "register_operand" "0")
5176 (match_operand:SI 3 "immediate_operand" "i")
5177 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5178 "TARGET_SIMD"
5179 {
5180 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5181 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5182 * <VSTRUCT:nregs>);
5183
5184 aarch64_simd_lane_bounds (operands[3], 0,
5185 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5186 NULL);
5187 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5188 operands[0], mem, operands[2], operands[3]));
5189 DONE;
5190 })
5191
5192 ;; Expanders for builtins to extract vector registers from large
5193 ;; opaque integer modes.
5194
5195 ;; D-register list.
5196
5197 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5198 [(match_operand:VDC 0 "register_operand" "=w")
5199 (match_operand:VSTRUCT 1 "register_operand" "w")
5200 (match_operand:SI 2 "immediate_operand" "i")]
5201 "TARGET_SIMD"
5202 {
5203 int part = INTVAL (operands[2]);
5204 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5205 int offset = part * 16;
5206
5207 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5208 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5209 DONE;
5210 })
5211
5212 ;; Q-register list.
5213
5214 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5215 [(match_operand:VQ 0 "register_operand" "=w")
5216 (match_operand:VSTRUCT 1 "register_operand" "w")
5217 (match_operand:SI 2 "immediate_operand" "i")]
5218 "TARGET_SIMD"
5219 {
5220 int part = INTVAL (operands[2]);
5221 int offset = part * 16;
5222
5223 emit_move_insn (operands[0],
5224 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5225 DONE;
5226 })
5227
5228 ;; Permuted-store expanders for neon intrinsics.
5229
5230 ;; Permute instructions
5231
5232 ;; vec_perm support
5233
5234 (define_expand "vec_perm_const<mode>"
5235 [(match_operand:VALL_F16 0 "register_operand")
5236 (match_operand:VALL_F16 1 "register_operand")
5237 (match_operand:VALL_F16 2 "register_operand")
5238 (match_operand:<V_INT_EQUIV> 3)]
5239 "TARGET_SIMD"
5240 {
5241 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5242 operands[2], operands[3]))
5243 DONE;
5244 else
5245 FAIL;
5246 })
5247
5248 (define_expand "vec_perm<mode>"
5249 [(match_operand:VB 0 "register_operand")
5250 (match_operand:VB 1 "register_operand")
5251 (match_operand:VB 2 "register_operand")
5252 (match_operand:VB 3 "register_operand")]
5253 "TARGET_SIMD"
5254 {
5255 aarch64_expand_vec_perm (operands[0], operands[1],
5256 operands[2], operands[3]);
5257 DONE;
5258 })
5259
5260 (define_insn "aarch64_tbl1<mode>"
5261 [(set (match_operand:VB 0 "register_operand" "=w")
5262 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5263 (match_operand:VB 2 "register_operand" "w")]
5264 UNSPEC_TBL))]
5265 "TARGET_SIMD"
5266 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5267 [(set_attr "type" "neon_tbl1<q>")]
5268 )
5269
5270 ;; Two source registers.
5271
5272 (define_insn "aarch64_tbl2v16qi"
5273 [(set (match_operand:V16QI 0 "register_operand" "=w")
5274 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5275 (match_operand:V16QI 2 "register_operand" "w")]
5276 UNSPEC_TBL))]
5277 "TARGET_SIMD"
5278 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5279 [(set_attr "type" "neon_tbl2_q")]
5280 )
5281
5282 (define_insn "aarch64_tbl3<mode>"
5283 [(set (match_operand:VB 0 "register_operand" "=w")
5284 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5285 (match_operand:VB 2 "register_operand" "w")]
5286 UNSPEC_TBL))]
5287 "TARGET_SIMD"
5288 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5289 [(set_attr "type" "neon_tbl3")]
5290 )
5291
5292 (define_insn "aarch64_tbx4<mode>"
5293 [(set (match_operand:VB 0 "register_operand" "=w")
5294 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5295 (match_operand:OI 2 "register_operand" "w")
5296 (match_operand:VB 3 "register_operand" "w")]
5297 UNSPEC_TBX))]
5298 "TARGET_SIMD"
5299 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5300 [(set_attr "type" "neon_tbl4")]
5301 )
5302
5303 ;; Three source registers.
5304
5305 (define_insn "aarch64_qtbl3<mode>"
5306 [(set (match_operand:VB 0 "register_operand" "=w")
5307 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5308 (match_operand:VB 2 "register_operand" "w")]
5309 UNSPEC_TBL))]
5310 "TARGET_SIMD"
5311 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5312 [(set_attr "type" "neon_tbl3")]
5313 )
5314
5315 (define_insn "aarch64_qtbx3<mode>"
5316 [(set (match_operand:VB 0 "register_operand" "=w")
5317 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5318 (match_operand:CI 2 "register_operand" "w")
5319 (match_operand:VB 3 "register_operand" "w")]
5320 UNSPEC_TBX))]
5321 "TARGET_SIMD"
5322 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5323 [(set_attr "type" "neon_tbl3")]
5324 )
5325
5326 ;; Four source registers.
5327
5328 (define_insn "aarch64_qtbl4<mode>"
5329 [(set (match_operand:VB 0 "register_operand" "=w")
5330 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5331 (match_operand:VB 2 "register_operand" "w")]
5332 UNSPEC_TBL))]
5333 "TARGET_SIMD"
5334 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5335 [(set_attr "type" "neon_tbl4")]
5336 )
5337
5338 (define_insn "aarch64_qtbx4<mode>"
5339 [(set (match_operand:VB 0 "register_operand" "=w")
5340 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5341 (match_operand:XI 2 "register_operand" "w")
5342 (match_operand:VB 3 "register_operand" "w")]
5343 UNSPEC_TBX))]
5344 "TARGET_SIMD"
5345 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5346 [(set_attr "type" "neon_tbl4")]
5347 )
5348
5349 (define_insn_and_split "aarch64_combinev16qi"
5350 [(set (match_operand:OI 0 "register_operand" "=w")
5351 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5352 (match_operand:V16QI 2 "register_operand" "w")]
5353 UNSPEC_CONCAT))]
5354 "TARGET_SIMD"
5355 "#"
5356 "&& reload_completed"
5357 [(const_int 0)]
5358 {
5359 aarch64_split_combinev16qi (operands);
5360 DONE;
5361 }
5362 [(set_attr "type" "multiple")]
5363 )
5364
5365 ;; This instruction's pattern is generated directly by
5366 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5367 ;; need corresponding changes there.
5368 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5369 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5370 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5371 (match_operand:VALL_F16 2 "register_operand" "w")]
5372 PERMUTE))]
5373 "TARGET_SIMD"
5374 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5375 [(set_attr "type" "neon_permute<q>")]
5376 )
5377
5378 ;; This instruction's pattern is generated directly by
5379 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5380 ;; need corresponding changes there. Note that the immediate (third)
5381 ;; operand is a lane index not a byte index.
5382 (define_insn "aarch64_ext<mode>"
5383 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5384 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5385 (match_operand:VALL_F16 2 "register_operand" "w")
5386 (match_operand:SI 3 "immediate_operand" "i")]
5387 UNSPEC_EXT))]
5388 "TARGET_SIMD"
5389 {
5390 operands[3] = GEN_INT (INTVAL (operands[3])
5391 * GET_MODE_UNIT_SIZE (<MODE>mode));
5392 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5393 }
5394 [(set_attr "type" "neon_ext<q>")]
5395 )
5396
5397 ;; This instruction's pattern is generated directly by
5398 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5399 ;; need corresponding changes there.
5400 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5401 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5402 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5403 REVERSE))]
5404 "TARGET_SIMD"
5405 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5406 [(set_attr "type" "neon_rev<q>")]
5407 )
5408
5409 (define_insn "aarch64_st2<mode>_dreg"
5410 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5411 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5412 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5413 UNSPEC_ST2))]
5414 "TARGET_SIMD"
5415 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5416 [(set_attr "type" "neon_store2_2reg")]
5417 )
5418
5419 (define_insn "aarch64_st2<mode>_dreg"
5420 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5421 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5422 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5423 UNSPEC_ST2))]
5424 "TARGET_SIMD"
5425 "st1\\t{%S1.1d - %T1.1d}, %0"
5426 [(set_attr "type" "neon_store1_2reg")]
5427 )
5428
5429 (define_insn "aarch64_st3<mode>_dreg"
5430 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5431 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5432 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5433 UNSPEC_ST3))]
5434 "TARGET_SIMD"
5435 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5436 [(set_attr "type" "neon_store3_3reg")]
5437 )
5438
5439 (define_insn "aarch64_st3<mode>_dreg"
5440 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5441 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5442 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5443 UNSPEC_ST3))]
5444 "TARGET_SIMD"
5445 "st1\\t{%S1.1d - %U1.1d}, %0"
5446 [(set_attr "type" "neon_store1_3reg")]
5447 )
5448
5449 (define_insn "aarch64_st4<mode>_dreg"
5450 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5451 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5452 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5453 UNSPEC_ST4))]
5454 "TARGET_SIMD"
5455 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5456 [(set_attr "type" "neon_store4_4reg")]
5457 )
5458
5459 (define_insn "aarch64_st4<mode>_dreg"
5460 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5461 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5462 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5463 UNSPEC_ST4))]
5464 "TARGET_SIMD"
5465 "st1\\t{%S1.1d - %V1.1d}, %0"
5466 [(set_attr "type" "neon_store1_4reg")]
5467 )
5468
5469 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5470 [(match_operand:DI 0 "register_operand" "r")
5471 (match_operand:VSTRUCT 1 "register_operand" "w")
5472 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5473 "TARGET_SIMD"
5474 {
5475 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5476 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5477
5478 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5479 DONE;
5480 })
5481
5482 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5483 [(match_operand:DI 0 "register_operand" "r")
5484 (match_operand:VSTRUCT 1 "register_operand" "w")
5485 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5486 "TARGET_SIMD"
5487 {
5488 machine_mode mode = <VSTRUCT:MODE>mode;
5489 rtx mem = gen_rtx_MEM (mode, operands[0]);
5490
5491 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5492 DONE;
5493 })
5494
5495 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5496 [(match_operand:DI 0 "register_operand" "r")
5497 (match_operand:VSTRUCT 1 "register_operand" "w")
5498 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5499 (match_operand:SI 2 "immediate_operand")]
5500 "TARGET_SIMD"
5501 {
5502 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5503 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5504 * <VSTRUCT:nregs>);
5505
5506 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5507 mem, operands[1], operands[2]));
5508 DONE;
5509 })
5510
5511 (define_expand "aarch64_st1<VALL_F16:mode>"
5512 [(match_operand:DI 0 "register_operand")
5513 (match_operand:VALL_F16 1 "register_operand")]
5514 "TARGET_SIMD"
5515 {
5516 machine_mode mode = <VALL_F16:MODE>mode;
5517 rtx mem = gen_rtx_MEM (mode, operands[0]);
5518
5519 if (BYTES_BIG_ENDIAN)
5520 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5521 else
5522 emit_move_insn (mem, operands[1]);
5523 DONE;
5524 })
5525
5526 ;; Expander for builtins to insert vector registers into large
5527 ;; opaque integer modes.
5528
5529 ;; Q-register list. We don't need a D-reg inserter as we zero
5530 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5531
5532 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5533 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5534 (match_operand:VSTRUCT 1 "register_operand" "0")
5535 (match_operand:VQ 2 "register_operand" "w")
5536 (match_operand:SI 3 "immediate_operand" "i")]
5537 "TARGET_SIMD"
5538 {
5539 int part = INTVAL (operands[3]);
5540 int offset = part * 16;
5541
5542 emit_move_insn (operands[0], operands[1]);
5543 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5544 operands[2]);
5545 DONE;
5546 })
5547
5548 ;; Standard pattern name vec_init<mode><Vel>.
5549
5550 (define_expand "vec_init<mode><Vel>"
5551 [(match_operand:VALL_F16 0 "register_operand" "")
5552 (match_operand 1 "" "")]
5553 "TARGET_SIMD"
5554 {
5555 aarch64_expand_vector_init (operands[0], operands[1]);
5556 DONE;
5557 })
5558
5559 (define_insn "*aarch64_simd_ld1r<mode>"
5560 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5561 (vec_duplicate:VALL_F16
5562 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5563 "TARGET_SIMD"
5564 "ld1r\\t{%0.<Vtype>}, %1"
5565 [(set_attr "type" "neon_load1_all_lanes")]
5566 )
5567
5568 (define_insn "aarch64_frecpe<mode>"
5569 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5570 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5571 UNSPEC_FRECPE))]
5572 "TARGET_SIMD"
5573 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5574 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5575 )
5576
5577 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5578 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5579 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5580 FRECP))]
5581 "TARGET_SIMD"
5582 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5583 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5584 )
5585
5586 (define_insn "aarch64_frecps<mode>"
5587 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5588 (unspec:VHSDF_HSDF
5589 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5590 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5591 UNSPEC_FRECPS))]
5592 "TARGET_SIMD"
5593 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5594 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5595 )
5596
5597 (define_insn "aarch64_urecpe<mode>"
5598 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5599 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5600 UNSPEC_URECPE))]
5601 "TARGET_SIMD"
5602 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5603 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5604
5605 ;; Standard pattern name vec_extract<mode><Vel>.
5606
5607 (define_expand "vec_extract<mode><Vel>"
5608 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5609 (match_operand:VALL_F16 1 "register_operand" "")
5610 (match_operand:SI 2 "immediate_operand" "")]
5611 "TARGET_SIMD"
5612 {
5613 emit_insn
5614 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5615 DONE;
5616 })
5617
5618 ;; aes
5619
5620 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5621 [(set (match_operand:V16QI 0 "register_operand" "=w")
5622 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5623 (match_operand:V16QI 2 "register_operand" "w")]
5624 CRYPTO_AES))]
5625 "TARGET_SIMD && TARGET_CRYPTO"
5626 "aes<aes_op>\\t%0.16b, %2.16b"
5627 [(set_attr "type" "crypto_aese")]
5628 )
5629
5630 ;; When AES/AESMC fusion is enabled we want the register allocation to
5631 ;; look like:
5632 ;; AESE Vn, _
5633 ;; AESMC Vn, Vn
5634 ;; So prefer to tie operand 1 to operand 0 when fusing.
5635
5636 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5637 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5638 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5639 CRYPTO_AESMC))]
5640 "TARGET_SIMD && TARGET_CRYPTO"
5641 "aes<aesmc_op>\\t%0.16b, %1.16b"
5642 [(set_attr "type" "crypto_aesmc")
5643 (set_attr_alternative "enabled"
5644 [(if_then_else (match_test
5645 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5646 (const_string "yes" )
5647 (const_string "no"))
5648 (const_string "yes")])]
5649 )
5650
5651 ;; sha1
5652
5653 (define_insn "aarch64_crypto_sha1hsi"
5654 [(set (match_operand:SI 0 "register_operand" "=w")
5655 (unspec:SI [(match_operand:SI 1
5656 "register_operand" "w")]
5657 UNSPEC_SHA1H))]
5658 "TARGET_SIMD && TARGET_CRYPTO"
5659 "sha1h\\t%s0, %s1"
5660 [(set_attr "type" "crypto_sha1_fast")]
5661 )
5662
5663 (define_insn "aarch64_crypto_sha1hv4si"
5664 [(set (match_operand:SI 0 "register_operand" "=w")
5665 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5666 (parallel [(const_int 0)]))]
5667 UNSPEC_SHA1H))]
5668 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5669 "sha1h\\t%s0, %s1"
5670 [(set_attr "type" "crypto_sha1_fast")]
5671 )
5672
5673 (define_insn "aarch64_be_crypto_sha1hv4si"
5674 [(set (match_operand:SI 0 "register_operand" "=w")
5675 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5676 (parallel [(const_int 3)]))]
5677 UNSPEC_SHA1H))]
5678 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5679 "sha1h\\t%s0, %s1"
5680 [(set_attr "type" "crypto_sha1_fast")]
5681 )
5682
5683 (define_insn "aarch64_crypto_sha1su1v4si"
5684 [(set (match_operand:V4SI 0 "register_operand" "=w")
5685 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5686 (match_operand:V4SI 2 "register_operand" "w")]
5687 UNSPEC_SHA1SU1))]
5688 "TARGET_SIMD && TARGET_CRYPTO"
5689 "sha1su1\\t%0.4s, %2.4s"
5690 [(set_attr "type" "crypto_sha1_fast")]
5691 )
5692
5693 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5694 [(set (match_operand:V4SI 0 "register_operand" "=w")
5695 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5696 (match_operand:SI 2 "register_operand" "w")
5697 (match_operand:V4SI 3 "register_operand" "w")]
5698 CRYPTO_SHA1))]
5699 "TARGET_SIMD && TARGET_CRYPTO"
5700 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5701 [(set_attr "type" "crypto_sha1_slow")]
5702 )
5703
5704 (define_insn "aarch64_crypto_sha1su0v4si"
5705 [(set (match_operand:V4SI 0 "register_operand" "=w")
5706 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5707 (match_operand:V4SI 2 "register_operand" "w")
5708 (match_operand:V4SI 3 "register_operand" "w")]
5709 UNSPEC_SHA1SU0))]
5710 "TARGET_SIMD && TARGET_CRYPTO"
5711 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5712 [(set_attr "type" "crypto_sha1_xor")]
5713 )
5714
5715 ;; sha256
5716
5717 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5718 [(set (match_operand:V4SI 0 "register_operand" "=w")
5719 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5720 (match_operand:V4SI 2 "register_operand" "w")
5721 (match_operand:V4SI 3 "register_operand" "w")]
5722 CRYPTO_SHA256))]
5723 "TARGET_SIMD && TARGET_CRYPTO"
5724 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5725 [(set_attr "type" "crypto_sha256_slow")]
5726 )
5727
5728 (define_insn "aarch64_crypto_sha256su0v4si"
5729 [(set (match_operand:V4SI 0 "register_operand" "=w")
5730 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5731 (match_operand:V4SI 2 "register_operand" "w")]
5732 UNSPEC_SHA256SU0))]
5733 "TARGET_SIMD &&TARGET_CRYPTO"
5734 "sha256su0\\t%0.4s, %2.4s"
5735 [(set_attr "type" "crypto_sha256_fast")]
5736 )
5737
5738 (define_insn "aarch64_crypto_sha256su1v4si"
5739 [(set (match_operand:V4SI 0 "register_operand" "=w")
5740 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5741 (match_operand:V4SI 2 "register_operand" "w")
5742 (match_operand:V4SI 3 "register_operand" "w")]
5743 UNSPEC_SHA256SU1))]
5744 "TARGET_SIMD &&TARGET_CRYPTO"
5745 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5746 [(set_attr "type" "crypto_sha256_slow")]
5747 )
5748
5749 ;; pmull
5750
5751 (define_insn "aarch64_crypto_pmulldi"
5752 [(set (match_operand:TI 0 "register_operand" "=w")
5753 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5754 (match_operand:DI 2 "register_operand" "w")]
5755 UNSPEC_PMULL))]
5756 "TARGET_SIMD && TARGET_CRYPTO"
5757 "pmull\\t%0.1q, %1.1d, %2.1d"
5758 [(set_attr "type" "crypto_pmull")]
5759 )
5760
5761 (define_insn "aarch64_crypto_pmullv2di"
5762 [(set (match_operand:TI 0 "register_operand" "=w")
5763 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5764 (match_operand:V2DI 2 "register_operand" "w")]
5765 UNSPEC_PMULL2))]
5766 "TARGET_SIMD && TARGET_CRYPTO"
5767 "pmull2\\t%0.1q, %1.2d, %2.2d"
5768 [(set_attr "type" "crypto_pmull")]
5769 )