]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
[AArch64][1/10] ARMv8.2-A FP16 data processing intrinsics
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
28 "
29 )
30
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
34 "TARGET_SIMD"
35 {
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
42 })
43
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
46 (vec_duplicate:VDQ_I
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
48 "TARGET_SIMD"
49 "@
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
53 )
54
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
59 "TARGET_SIMD"
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
62 )
63
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
67 (vec_select:<VEL>
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
70 )))]
71 "TARGET_SIMD"
72 {
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
75 }
76 [(set_attr "type" "neon_dup<q>")]
77 )
78
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16
82 (vec_select:<VEL>
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
85 )))]
86 "TARGET_SIMD"
87 {
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
91 }
92 [(set_attr "type" "neon_dup<q>")]
93 )
94
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
100 "TARGET_SIMD
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
103 {
104 switch (which_alternative)
105 {
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "ins\t%0.d[0], %1";
111 case 5: return "mov\t%0, %1";
112 case 6:
113 return aarch64_output_simd_mov_immediate (operands[1],
114 <MODE>mode, 64);
115 default: gcc_unreachable ();
116 }
117 }
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
120 mov_reg, neon_move<q>")]
121 )
122
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
128 "TARGET_SIMD
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
131 {
132 switch (which_alternative)
133 {
134 case 0:
135 return "ldr\\t%q0, %1";
136 case 1:
137 return "str\\t%q1, %0";
138 case 2:
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
140 case 3:
141 case 4:
142 case 5:
143 return "#";
144 case 6:
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
146 default:
147 gcc_unreachable ();
148 }
149 }
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
152 neon_move<q>")
153 (set_attr "length" "4,4,4,8,8,8,4")]
154 )
155
156 (define_insn "load_pair<mode>"
157 [(set (match_operand:VD 0 "register_operand" "=w")
158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159 (set (match_operand:VD 2 "register_operand" "=w")
160 (match_operand:VD 3 "memory_operand" "m"))]
161 "TARGET_SIMD
162 && rtx_equal_p (XEXP (operands[3], 0),
163 plus_constant (Pmode,
164 XEXP (operands[1], 0),
165 GET_MODE_SIZE (<MODE>mode)))"
166 "ldp\\t%d0, %d2, %1"
167 [(set_attr "type" "neon_ldp")]
168 )
169
170 (define_insn "store_pair<mode>"
171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172 (match_operand:VD 1 "register_operand" "w"))
173 (set (match_operand:VD 2 "memory_operand" "=m")
174 (match_operand:VD 3 "register_operand" "w"))]
175 "TARGET_SIMD
176 && rtx_equal_p (XEXP (operands[2], 0),
177 plus_constant (Pmode,
178 XEXP (operands[0], 0),
179 GET_MODE_SIZE (<MODE>mode)))"
180 "stp\\t%d1, %d3, %0"
181 [(set_attr "type" "neon_stp")]
182 )
183
184 (define_split
185 [(set (match_operand:VQ 0 "register_operand" "")
186 (match_operand:VQ 1 "register_operand" ""))]
187 "TARGET_SIMD && reload_completed
188 && GP_REGNUM_P (REGNO (operands[0]))
189 && GP_REGNUM_P (REGNO (operands[1]))"
190 [(const_int 0)]
191 {
192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
193 DONE;
194 })
195
196 (define_split
197 [(set (match_operand:VQ 0 "register_operand" "")
198 (match_operand:VQ 1 "register_operand" ""))]
199 "TARGET_SIMD && reload_completed
200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
202 [(const_int 0)]
203 {
204 aarch64_split_simd_move (operands[0], operands[1]);
205 DONE;
206 })
207
208 (define_expand "aarch64_split_simd_mov<mode>"
209 [(set (match_operand:VQ 0)
210 (match_operand:VQ 1))]
211 "TARGET_SIMD"
212 {
213 rtx dst = operands[0];
214 rtx src = operands[1];
215
216 if (GP_REGNUM_P (REGNO (src)))
217 {
218 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219 rtx src_high_part = gen_highpart (<VHALF>mode, src);
220
221 emit_insn
222 (gen_move_lo_quad_<mode> (dst, src_low_part));
223 emit_insn
224 (gen_move_hi_quad_<mode> (dst, src_high_part));
225 }
226
227 else
228 {
229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
233
234 emit_insn
235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
236 emit_insn
237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
238 }
239 DONE;
240 }
241 )
242
243 (define_insn "aarch64_simd_mov_from_<mode>low"
244 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
245 (vec_select:<VHALF>
246 (match_operand:VQ 1 "register_operand" "w")
247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248 "TARGET_SIMD && reload_completed"
249 "umov\t%0, %1.d[0]"
250 [(set_attr "type" "neon_to_gp<q>")
251 (set_attr "length" "4")
252 ])
253
254 (define_insn "aarch64_simd_mov_from_<mode>high"
255 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
256 (vec_select:<VHALF>
257 (match_operand:VQ 1 "register_operand" "w")
258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259 "TARGET_SIMD && reload_completed"
260 "umov\t%0, %1.d[1]"
261 [(set_attr "type" "neon_to_gp<q>")
262 (set_attr "length" "4")
263 ])
264
265 (define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268 (match_operand:VDQ_I 2 "register_operand" "w")))]
269 "TARGET_SIMD"
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271 [(set_attr "type" "neon_logic<q>")]
272 )
273
274 (define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277 (match_operand:VDQ_I 2 "register_operand" "w")))]
278 "TARGET_SIMD"
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280 [(set_attr "type" "neon_logic<q>")]
281 )
282
283 (define_insn "add<mode>3"
284 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286 (match_operand:VDQ_I 2 "register_operand" "w")))]
287 "TARGET_SIMD"
288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289 [(set_attr "type" "neon_add<q>")]
290 )
291
292 (define_insn "sub<mode>3"
293 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295 (match_operand:VDQ_I 2 "register_operand" "w")))]
296 "TARGET_SIMD"
297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298 [(set_attr "type" "neon_sub<q>")]
299 )
300
301 (define_insn "mul<mode>3"
302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
305 "TARGET_SIMD"
306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307 [(set_attr "type" "neon_mul_<Vetype><q>")]
308 )
309
310 (define_insn "bswap<mode>2"
311 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
313 "TARGET_SIMD"
314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315 [(set_attr "type" "neon_rev<q>")]
316 )
317
318 (define_insn "aarch64_rbit<mode>"
319 [(set (match_operand:VB 0 "register_operand" "=w")
320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
321 UNSPEC_RBIT))]
322 "TARGET_SIMD"
323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324 [(set_attr "type" "neon_rbit")]
325 )
326
327 (define_expand "ctz<mode>2"
328 [(set (match_operand:VS 0 "register_operand")
329 (ctz:VS (match_operand:VS 1 "register_operand")))]
330 "TARGET_SIMD"
331 {
332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
334 <MODE>mode, 0);
335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
337 DONE;
338 }
339 )
340
341 (define_insn "*aarch64_mul3_elt<mode>"
342 [(set (match_operand:VMUL 0 "register_operand" "=w")
343 (mult:VMUL
344 (vec_duplicate:VMUL
345 (vec_select:<VEL>
346 (match_operand:VMUL 1 "register_operand" "<h_con>")
347 (parallel [(match_operand:SI 2 "immediate_operand")])))
348 (match_operand:VMUL 3 "register_operand" "w")))]
349 "TARGET_SIMD"
350 {
351 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
352 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
353 }
354 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
355 )
356
357 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
358 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
359 (mult:VMUL_CHANGE_NLANES
360 (vec_duplicate:VMUL_CHANGE_NLANES
361 (vec_select:<VEL>
362 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
363 (parallel [(match_operand:SI 2 "immediate_operand")])))
364 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
365 "TARGET_SIMD"
366 {
367 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
368 INTVAL (operands[2])));
369 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
370 }
371 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
372 )
373
374 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
375 [(set (match_operand:VMUL 0 "register_operand" "=w")
376 (mult:VMUL
377 (vec_duplicate:VMUL
378 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
379 (match_operand:VMUL 2 "register_operand" "w")))]
380 "TARGET_SIMD"
381 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
382 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
383 )
384
385 (define_insn "aarch64_rsqrte<mode>"
386 [(set (match_operand:VALLF 0 "register_operand" "=w")
387 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
388 UNSPEC_RSQRTE))]
389 "TARGET_SIMD"
390 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
391 [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
392
393 (define_insn "aarch64_rsqrts<mode>"
394 [(set (match_operand:VALLF 0 "register_operand" "=w")
395 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
396 (match_operand:VALLF 2 "register_operand" "w")]
397 UNSPEC_RSQRTS))]
398 "TARGET_SIMD"
399 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
400 [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
401
402 (define_expand "rsqrt<mode>2"
403 [(set (match_operand:VALLF 0 "register_operand" "=w")
404 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
405 UNSPEC_RSQRT))]
406 "TARGET_SIMD"
407 {
408 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
409 DONE;
410 })
411
412 (define_insn "*aarch64_mul3_elt_to_64v2df"
413 [(set (match_operand:DF 0 "register_operand" "=w")
414 (mult:DF
415 (vec_select:DF
416 (match_operand:V2DF 1 "register_operand" "w")
417 (parallel [(match_operand:SI 2 "immediate_operand")]))
418 (match_operand:DF 3 "register_operand" "w")))]
419 "TARGET_SIMD"
420 {
421 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
422 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
423 }
424 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
425 )
426
427 (define_insn "neg<mode>2"
428 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
429 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
430 "TARGET_SIMD"
431 "neg\t%0.<Vtype>, %1.<Vtype>"
432 [(set_attr "type" "neon_neg<q>")]
433 )
434
435 (define_insn "abs<mode>2"
436 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
437 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
438 "TARGET_SIMD"
439 "abs\t%0.<Vtype>, %1.<Vtype>"
440 [(set_attr "type" "neon_abs<q>")]
441 )
442
443 ;; The intrinsic version of integer ABS must not be allowed to
444 ;; combine with any operation with an integerated ABS step, such
445 ;; as SABD.
446 (define_insn "aarch64_abs<mode>"
447 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
448 (unspec:VSDQ_I_DI
449 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
450 UNSPEC_ABS))]
451 "TARGET_SIMD"
452 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
453 [(set_attr "type" "neon_abs<q>")]
454 )
455
456 (define_insn "abd<mode>_3"
457 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
458 (abs:VDQ_BHSI (minus:VDQ_BHSI
459 (match_operand:VDQ_BHSI 1 "register_operand" "w")
460 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
461 "TARGET_SIMD"
462 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
463 [(set_attr "type" "neon_abd<q>")]
464 )
465
466 (define_insn "aba<mode>_3"
467 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
468 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
469 (match_operand:VDQ_BHSI 1 "register_operand" "w")
470 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
471 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
472 "TARGET_SIMD"
473 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
474 [(set_attr "type" "neon_arith_acc<q>")]
475 )
476
477 (define_insn "fabd<mode>3"
478 [(set (match_operand:VALLF 0 "register_operand" "=w")
479 (abs:VALLF
480 (minus:VALLF
481 (match_operand:VALLF 1 "register_operand" "w")
482 (match_operand:VALLF 2 "register_operand" "w"))))]
483 "TARGET_SIMD"
484 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
485 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
486 )
487
488 (define_insn "and<mode>3"
489 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
490 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
491 (match_operand:VDQ_I 2 "register_operand" "w")))]
492 "TARGET_SIMD"
493 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
494 [(set_attr "type" "neon_logic<q>")]
495 )
496
497 (define_insn "ior<mode>3"
498 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
499 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
500 (match_operand:VDQ_I 2 "register_operand" "w")))]
501 "TARGET_SIMD"
502 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
503 [(set_attr "type" "neon_logic<q>")]
504 )
505
506 (define_insn "xor<mode>3"
507 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509 (match_operand:VDQ_I 2 "register_operand" "w")))]
510 "TARGET_SIMD"
511 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512 [(set_attr "type" "neon_logic<q>")]
513 )
514
515 (define_insn "one_cmpl<mode>2"
516 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
518 "TARGET_SIMD"
519 "not\t%0.<Vbtype>, %1.<Vbtype>"
520 [(set_attr "type" "neon_logic<q>")]
521 )
522
523 (define_insn "aarch64_simd_vec_set<mode>"
524 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
525 (vec_merge:VDQ_BHSI
526 (vec_duplicate:VDQ_BHSI
527 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
528 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
529 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
530 "TARGET_SIMD"
531 {
532 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
533 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
534 switch (which_alternative)
535 {
536 case 0:
537 return "ins\\t%0.<Vetype>[%p2], %w1";
538 case 1:
539 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
540 case 2:
541 return "ld1\\t{%0.<Vetype>}[%p2], %1";
542 default:
543 gcc_unreachable ();
544 }
545 }
546 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
547 )
548
549 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
550 [(set (match_operand:VALL 0 "register_operand" "=w")
551 (vec_merge:VALL
552 (vec_duplicate:VALL
553 (vec_select:<VEL>
554 (match_operand:VALL 3 "register_operand" "w")
555 (parallel
556 [(match_operand:SI 4 "immediate_operand" "i")])))
557 (match_operand:VALL 1 "register_operand" "0")
558 (match_operand:SI 2 "immediate_operand" "i")))]
559 "TARGET_SIMD"
560 {
561 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
562 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
563 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
564
565 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
566 }
567 [(set_attr "type" "neon_ins<q>")]
568 )
569
570 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
571 [(set (match_operand:VALL 0 "register_operand" "=w")
572 (vec_merge:VALL
573 (vec_duplicate:VALL
574 (vec_select:<VEL>
575 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
576 (parallel
577 [(match_operand:SI 4 "immediate_operand" "i")])))
578 (match_operand:VALL 1 "register_operand" "0")
579 (match_operand:SI 2 "immediate_operand" "i")))]
580 "TARGET_SIMD"
581 {
582 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
583 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
584 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
585 INTVAL (operands[4])));
586
587 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
588 }
589 [(set_attr "type" "neon_ins<q>")]
590 )
591
592 (define_insn "aarch64_simd_lshr<mode>"
593 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
594 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
595 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
596 "TARGET_SIMD"
597 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
598 [(set_attr "type" "neon_shift_imm<q>")]
599 )
600
601 (define_insn "aarch64_simd_ashr<mode>"
602 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
603 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
604 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
605 "TARGET_SIMD"
606 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
607 [(set_attr "type" "neon_shift_imm<q>")]
608 )
609
610 (define_insn "aarch64_simd_imm_shl<mode>"
611 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
612 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
613 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
614 "TARGET_SIMD"
615 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
616 [(set_attr "type" "neon_shift_imm<q>")]
617 )
618
619 (define_insn "aarch64_simd_reg_sshl<mode>"
620 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
621 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
622 (match_operand:VDQ_I 2 "register_operand" "w")))]
623 "TARGET_SIMD"
624 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
625 [(set_attr "type" "neon_shift_reg<q>")]
626 )
627
628 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
629 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
630 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
631 (match_operand:VDQ_I 2 "register_operand" "w")]
632 UNSPEC_ASHIFT_UNSIGNED))]
633 "TARGET_SIMD"
634 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
635 [(set_attr "type" "neon_shift_reg<q>")]
636 )
637
638 (define_insn "aarch64_simd_reg_shl<mode>_signed"
639 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
640 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
641 (match_operand:VDQ_I 2 "register_operand" "w")]
642 UNSPEC_ASHIFT_SIGNED))]
643 "TARGET_SIMD"
644 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
645 [(set_attr "type" "neon_shift_reg<q>")]
646 )
647
648 (define_expand "ashl<mode>3"
649 [(match_operand:VDQ_I 0 "register_operand" "")
650 (match_operand:VDQ_I 1 "register_operand" "")
651 (match_operand:SI 2 "general_operand" "")]
652 "TARGET_SIMD"
653 {
654 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
655 int shift_amount;
656
657 if (CONST_INT_P (operands[2]))
658 {
659 shift_amount = INTVAL (operands[2]);
660 if (shift_amount >= 0 && shift_amount < bit_width)
661 {
662 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
663 shift_amount);
664 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
665 operands[1],
666 tmp));
667 DONE;
668 }
669 else
670 {
671 operands[2] = force_reg (SImode, operands[2]);
672 }
673 }
674 else if (MEM_P (operands[2]))
675 {
676 operands[2] = force_reg (SImode, operands[2]);
677 }
678
679 if (REG_P (operands[2]))
680 {
681 rtx tmp = gen_reg_rtx (<MODE>mode);
682 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
683 convert_to_mode (<VEL>mode,
684 operands[2],
685 0)));
686 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
687 tmp));
688 DONE;
689 }
690 else
691 FAIL;
692 }
693 )
694
695 (define_expand "lshr<mode>3"
696 [(match_operand:VDQ_I 0 "register_operand" "")
697 (match_operand:VDQ_I 1 "register_operand" "")
698 (match_operand:SI 2 "general_operand" "")]
699 "TARGET_SIMD"
700 {
701 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
702 int shift_amount;
703
704 if (CONST_INT_P (operands[2]))
705 {
706 shift_amount = INTVAL (operands[2]);
707 if (shift_amount > 0 && shift_amount <= bit_width)
708 {
709 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
710 shift_amount);
711 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
712 operands[1],
713 tmp));
714 DONE;
715 }
716 else
717 operands[2] = force_reg (SImode, operands[2]);
718 }
719 else if (MEM_P (operands[2]))
720 {
721 operands[2] = force_reg (SImode, operands[2]);
722 }
723
724 if (REG_P (operands[2]))
725 {
726 rtx tmp = gen_reg_rtx (SImode);
727 rtx tmp1 = gen_reg_rtx (<MODE>mode);
728 emit_insn (gen_negsi2 (tmp, operands[2]));
729 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
730 convert_to_mode (<VEL>mode,
731 tmp, 0)));
732 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
733 operands[1],
734 tmp1));
735 DONE;
736 }
737 else
738 FAIL;
739 }
740 )
741
742 (define_expand "ashr<mode>3"
743 [(match_operand:VDQ_I 0 "register_operand" "")
744 (match_operand:VDQ_I 1 "register_operand" "")
745 (match_operand:SI 2 "general_operand" "")]
746 "TARGET_SIMD"
747 {
748 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
749 int shift_amount;
750
751 if (CONST_INT_P (operands[2]))
752 {
753 shift_amount = INTVAL (operands[2]);
754 if (shift_amount > 0 && shift_amount <= bit_width)
755 {
756 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
757 shift_amount);
758 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
759 operands[1],
760 tmp));
761 DONE;
762 }
763 else
764 operands[2] = force_reg (SImode, operands[2]);
765 }
766 else if (MEM_P (operands[2]))
767 {
768 operands[2] = force_reg (SImode, operands[2]);
769 }
770
771 if (REG_P (operands[2]))
772 {
773 rtx tmp = gen_reg_rtx (SImode);
774 rtx tmp1 = gen_reg_rtx (<MODE>mode);
775 emit_insn (gen_negsi2 (tmp, operands[2]));
776 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
777 convert_to_mode (<VEL>mode,
778 tmp, 0)));
779 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
780 operands[1],
781 tmp1));
782 DONE;
783 }
784 else
785 FAIL;
786 }
787 )
788
789 (define_expand "vashl<mode>3"
790 [(match_operand:VDQ_I 0 "register_operand" "")
791 (match_operand:VDQ_I 1 "register_operand" "")
792 (match_operand:VDQ_I 2 "register_operand" "")]
793 "TARGET_SIMD"
794 {
795 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
796 operands[2]));
797 DONE;
798 })
799
800 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
801 ;; Negating individual lanes most certainly offsets the
802 ;; gain from vectorization.
803 (define_expand "vashr<mode>3"
804 [(match_operand:VDQ_BHSI 0 "register_operand" "")
805 (match_operand:VDQ_BHSI 1 "register_operand" "")
806 (match_operand:VDQ_BHSI 2 "register_operand" "")]
807 "TARGET_SIMD"
808 {
809 rtx neg = gen_reg_rtx (<MODE>mode);
810 emit (gen_neg<mode>2 (neg, operands[2]));
811 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
812 neg));
813 DONE;
814 })
815
816 ;; DI vector shift
817 (define_expand "aarch64_ashr_simddi"
818 [(match_operand:DI 0 "register_operand" "=w")
819 (match_operand:DI 1 "register_operand" "w")
820 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
821 "TARGET_SIMD"
822 {
823 /* An arithmetic shift right by 64 fills the result with copies of the sign
824 bit, just like asr by 63 - however the standard pattern does not handle
825 a shift by 64. */
826 if (INTVAL (operands[2]) == 64)
827 operands[2] = GEN_INT (63);
828 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
829 DONE;
830 }
831 )
832
833 (define_expand "vlshr<mode>3"
834 [(match_operand:VDQ_BHSI 0 "register_operand" "")
835 (match_operand:VDQ_BHSI 1 "register_operand" "")
836 (match_operand:VDQ_BHSI 2 "register_operand" "")]
837 "TARGET_SIMD"
838 {
839 rtx neg = gen_reg_rtx (<MODE>mode);
840 emit (gen_neg<mode>2 (neg, operands[2]));
841 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
842 neg));
843 DONE;
844 })
845
846 (define_expand "aarch64_lshr_simddi"
847 [(match_operand:DI 0 "register_operand" "=w")
848 (match_operand:DI 1 "register_operand" "w")
849 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
850 "TARGET_SIMD"
851 {
852 if (INTVAL (operands[2]) == 64)
853 emit_move_insn (operands[0], const0_rtx);
854 else
855 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
856 DONE;
857 }
858 )
859
860 (define_expand "vec_set<mode>"
861 [(match_operand:VDQ_BHSI 0 "register_operand")
862 (match_operand:<VEL> 1 "register_operand")
863 (match_operand:SI 2 "immediate_operand")]
864 "TARGET_SIMD"
865 {
866 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
867 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
868 GEN_INT (elem), operands[0]));
869 DONE;
870 }
871 )
872
873 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
874 (define_insn "vec_shr_<mode>"
875 [(set (match_operand:VD 0 "register_operand" "=w")
876 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
877 (match_operand:SI 2 "immediate_operand" "i")]
878 UNSPEC_VEC_SHR))]
879 "TARGET_SIMD"
880 {
881 if (BYTES_BIG_ENDIAN)
882 return "shl %d0, %d1, %2";
883 else
884 return "ushr %d0, %d1, %2";
885 }
886 [(set_attr "type" "neon_shift_imm")]
887 )
888
889 (define_insn "aarch64_simd_vec_setv2di"
890 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
891 (vec_merge:V2DI
892 (vec_duplicate:V2DI
893 (match_operand:DI 1 "register_operand" "r,w"))
894 (match_operand:V2DI 3 "register_operand" "0,0")
895 (match_operand:SI 2 "immediate_operand" "i,i")))]
896 "TARGET_SIMD"
897 {
898 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
899 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
900 switch (which_alternative)
901 {
902 case 0:
903 return "ins\\t%0.d[%p2], %1";
904 case 1:
905 return "ins\\t%0.d[%p2], %1.d[0]";
906 default:
907 gcc_unreachable ();
908 }
909 }
910 [(set_attr "type" "neon_from_gp, neon_ins_q")]
911 )
912
913 (define_expand "vec_setv2di"
914 [(match_operand:V2DI 0 "register_operand")
915 (match_operand:DI 1 "register_operand")
916 (match_operand:SI 2 "immediate_operand")]
917 "TARGET_SIMD"
918 {
919 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
920 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
921 GEN_INT (elem), operands[0]));
922 DONE;
923 }
924 )
925
926 (define_insn "aarch64_simd_vec_set<mode>"
927 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
928 (vec_merge:VDQF_F16
929 (vec_duplicate:VDQF_F16
930 (match_operand:<VEL> 1 "register_operand" "w"))
931 (match_operand:VDQF_F16 3 "register_operand" "0")
932 (match_operand:SI 2 "immediate_operand" "i")))]
933 "TARGET_SIMD"
934 {
935 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
936
937 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
938 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
939 }
940 [(set_attr "type" "neon_ins<q>")]
941 )
942
943 (define_expand "vec_set<mode>"
944 [(match_operand:VDQF_F16 0 "register_operand" "+w")
945 (match_operand:<VEL> 1 "register_operand" "w")
946 (match_operand:SI 2 "immediate_operand" "")]
947 "TARGET_SIMD"
948 {
949 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
950 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
951 GEN_INT (elem), operands[0]));
952 DONE;
953 }
954 )
955
956
957 (define_insn "aarch64_mla<mode>"
958 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
959 (plus:VDQ_BHSI (mult:VDQ_BHSI
960 (match_operand:VDQ_BHSI 2 "register_operand" "w")
961 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
962 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
963 "TARGET_SIMD"
964 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
965 [(set_attr "type" "neon_mla_<Vetype><q>")]
966 )
967
968 (define_insn "*aarch64_mla_elt<mode>"
969 [(set (match_operand:VDQHS 0 "register_operand" "=w")
970 (plus:VDQHS
971 (mult:VDQHS
972 (vec_duplicate:VDQHS
973 (vec_select:<VEL>
974 (match_operand:VDQHS 1 "register_operand" "<h_con>")
975 (parallel [(match_operand:SI 2 "immediate_operand")])))
976 (match_operand:VDQHS 3 "register_operand" "w"))
977 (match_operand:VDQHS 4 "register_operand" "0")))]
978 "TARGET_SIMD"
979 {
980 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
981 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
982 }
983 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
984 )
985
986 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
987 [(set (match_operand:VDQHS 0 "register_operand" "=w")
988 (plus:VDQHS
989 (mult:VDQHS
990 (vec_duplicate:VDQHS
991 (vec_select:<VEL>
992 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
993 (parallel [(match_operand:SI 2 "immediate_operand")])))
994 (match_operand:VDQHS 3 "register_operand" "w"))
995 (match_operand:VDQHS 4 "register_operand" "0")))]
996 "TARGET_SIMD"
997 {
998 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
999 INTVAL (operands[2])));
1000 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1001 }
1002 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1003 )
1004
1005 (define_insn "aarch64_mls<mode>"
1006 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1007 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1008 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1009 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1010 "TARGET_SIMD"
1011 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1012 [(set_attr "type" "neon_mla_<Vetype><q>")]
1013 )
1014
1015 (define_insn "*aarch64_mls_elt<mode>"
1016 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1017 (minus:VDQHS
1018 (match_operand:VDQHS 4 "register_operand" "0")
1019 (mult:VDQHS
1020 (vec_duplicate:VDQHS
1021 (vec_select:<VEL>
1022 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1023 (parallel [(match_operand:SI 2 "immediate_operand")])))
1024 (match_operand:VDQHS 3 "register_operand" "w"))))]
1025 "TARGET_SIMD"
1026 {
1027 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1028 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1029 }
1030 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1031 )
1032
1033 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1034 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1035 (minus:VDQHS
1036 (match_operand:VDQHS 4 "register_operand" "0")
1037 (mult:VDQHS
1038 (vec_duplicate:VDQHS
1039 (vec_select:<VEL>
1040 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1041 (parallel [(match_operand:SI 2 "immediate_operand")])))
1042 (match_operand:VDQHS 3 "register_operand" "w"))))]
1043 "TARGET_SIMD"
1044 {
1045 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1046 INTVAL (operands[2])));
1047 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1048 }
1049 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1050 )
1051
1052 ;; Max/Min operations.
1053 (define_insn "<su><maxmin><mode>3"
1054 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1055 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1056 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1057 "TARGET_SIMD"
1058 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1059 [(set_attr "type" "neon_minmax<q>")]
1060 )
1061
1062 (define_expand "<su><maxmin>v2di3"
1063 [(set (match_operand:V2DI 0 "register_operand" "")
1064 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1065 (match_operand:V2DI 2 "register_operand" "")))]
1066 "TARGET_SIMD"
1067 {
1068 enum rtx_code cmp_operator;
1069 rtx cmp_fmt;
1070
1071 switch (<CODE>)
1072 {
1073 case UMIN:
1074 cmp_operator = LTU;
1075 break;
1076 case SMIN:
1077 cmp_operator = LT;
1078 break;
1079 case UMAX:
1080 cmp_operator = GTU;
1081 break;
1082 case SMAX:
1083 cmp_operator = GT;
1084 break;
1085 default:
1086 gcc_unreachable ();
1087 }
1088
1089 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1090 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1091 operands[2], cmp_fmt, operands[1], operands[2]));
1092 DONE;
1093 })
1094
1095 ;; Pairwise Integer Max/Min operations.
1096 (define_insn "aarch64_<maxmin_uns>p<mode>"
1097 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1098 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1099 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1100 MAXMINV))]
1101 "TARGET_SIMD"
1102 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1103 [(set_attr "type" "neon_minmax<q>")]
1104 )
1105
1106 ;; Pairwise FP Max/Min operations.
1107 (define_insn "aarch64_<maxmin_uns>p<mode>"
1108 [(set (match_operand:VDQF 0 "register_operand" "=w")
1109 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1110 (match_operand:VDQF 2 "register_operand" "w")]
1111 FMAXMINV))]
1112 "TARGET_SIMD"
1113 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1114 [(set_attr "type" "neon_minmax<q>")]
1115 )
1116
1117 ;; vec_concat gives a new vector with the low elements from operand 1, and
1118 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1119 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1120 ;; What that means, is that the RTL descriptions of the below patterns
1121 ;; need to change depending on endianness.
1122
1123 ;; Move to the low architectural bits of the register.
1124 ;; On little-endian this is { operand, zeroes }
1125 ;; On big-endian this is { zeroes, operand }
1126
1127 (define_insn "move_lo_quad_internal_<mode>"
1128 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1129 (vec_concat:VQ_NO2E
1130 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1131 (vec_duplicate:<VHALF> (const_int 0))))]
1132 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1133 "@
1134 dup\\t%d0, %1.d[0]
1135 fmov\\t%d0, %1
1136 dup\\t%d0, %1"
1137 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1138 (set_attr "simd" "yes,*,yes")
1139 (set_attr "fp" "*,yes,*")
1140 (set_attr "length" "4")]
1141 )
1142
1143 (define_insn "move_lo_quad_internal_<mode>"
1144 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1145 (vec_concat:VQ_2E
1146 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1147 (const_int 0)))]
1148 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1149 "@
1150 dup\\t%d0, %1.d[0]
1151 fmov\\t%d0, %1
1152 dup\\t%d0, %1"
1153 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1154 (set_attr "simd" "yes,*,yes")
1155 (set_attr "fp" "*,yes,*")
1156 (set_attr "length" "4")]
1157 )
1158
1159 (define_insn "move_lo_quad_internal_be_<mode>"
1160 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1161 (vec_concat:VQ_NO2E
1162 (vec_duplicate:<VHALF> (const_int 0))
1163 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1164 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1165 "@
1166 dup\\t%d0, %1.d[0]
1167 fmov\\t%d0, %1
1168 dup\\t%d0, %1"
1169 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1170 (set_attr "simd" "yes,*,yes")
1171 (set_attr "fp" "*,yes,*")
1172 (set_attr "length" "4")]
1173 )
1174
1175 (define_insn "move_lo_quad_internal_be_<mode>"
1176 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1177 (vec_concat:VQ_2E
1178 (const_int 0)
1179 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1180 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1181 "@
1182 dup\\t%d0, %1.d[0]
1183 fmov\\t%d0, %1
1184 dup\\t%d0, %1"
1185 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1186 (set_attr "simd" "yes,*,yes")
1187 (set_attr "fp" "*,yes,*")
1188 (set_attr "length" "4")]
1189 )
1190
1191 (define_expand "move_lo_quad_<mode>"
1192 [(match_operand:VQ 0 "register_operand")
1193 (match_operand:VQ 1 "register_operand")]
1194 "TARGET_SIMD"
1195 {
1196 if (BYTES_BIG_ENDIAN)
1197 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1198 else
1199 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1200 DONE;
1201 }
1202 )
1203
1204 ;; Move operand1 to the high architectural bits of the register, keeping
1205 ;; the low architectural bits of operand2.
1206 ;; For little-endian this is { operand2, operand1 }
1207 ;; For big-endian this is { operand1, operand2 }
1208
1209 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1210 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1211 (vec_concat:VQ
1212 (vec_select:<VHALF>
1213 (match_dup 0)
1214 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1215 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1216 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1217 "@
1218 ins\\t%0.d[1], %1.d[0]
1219 ins\\t%0.d[1], %1"
1220 [(set_attr "type" "neon_ins")]
1221 )
1222
1223 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1224 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1225 (vec_concat:VQ
1226 (match_operand:<VHALF> 1 "register_operand" "w,r")
1227 (vec_select:<VHALF>
1228 (match_dup 0)
1229 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1230 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1231 "@
1232 ins\\t%0.d[1], %1.d[0]
1233 ins\\t%0.d[1], %1"
1234 [(set_attr "type" "neon_ins")]
1235 )
1236
1237 (define_expand "move_hi_quad_<mode>"
1238 [(match_operand:VQ 0 "register_operand" "")
1239 (match_operand:<VHALF> 1 "register_operand" "")]
1240 "TARGET_SIMD"
1241 {
1242 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1243 if (BYTES_BIG_ENDIAN)
1244 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1245 operands[1], p));
1246 else
1247 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1248 operands[1], p));
1249 DONE;
1250 })
1251
1252 ;; Narrowing operations.
1253
1254 ;; For doubles.
1255 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1256 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1257 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1258 "TARGET_SIMD"
1259 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1260 [(set_attr "type" "neon_shift_imm_narrow_q")]
1261 )
1262
1263 (define_expand "vec_pack_trunc_<mode>"
1264 [(match_operand:<VNARROWD> 0 "register_operand" "")
1265 (match_operand:VDN 1 "register_operand" "")
1266 (match_operand:VDN 2 "register_operand" "")]
1267 "TARGET_SIMD"
1268 {
1269 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1270 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1271 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1272
1273 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1274 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1275 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1276 DONE;
1277 })
1278
1279 ;; For quads.
1280
1281 (define_insn "vec_pack_trunc_<mode>"
1282 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1283 (vec_concat:<VNARROWQ2>
1284 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1285 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1286 "TARGET_SIMD"
1287 {
1288 if (BYTES_BIG_ENDIAN)
1289 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1290 else
1291 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1292 }
1293 [(set_attr "type" "multiple")
1294 (set_attr "length" "8")]
1295 )
1296
1297 ;; Widening operations.
1298
1299 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1301 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1302 (match_operand:VQW 1 "register_operand" "w")
1303 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1304 )))]
1305 "TARGET_SIMD"
1306 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1307 [(set_attr "type" "neon_shift_imm_long")]
1308 )
1309
1310 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1311 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1312 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1313 (match_operand:VQW 1 "register_operand" "w")
1314 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1315 )))]
1316 "TARGET_SIMD"
1317 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1318 [(set_attr "type" "neon_shift_imm_long")]
1319 )
1320
1321 (define_expand "vec_unpack<su>_hi_<mode>"
1322 [(match_operand:<VWIDE> 0 "register_operand" "")
1323 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1324 "TARGET_SIMD"
1325 {
1326 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1327 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1328 operands[1], p));
1329 DONE;
1330 }
1331 )
1332
1333 (define_expand "vec_unpack<su>_lo_<mode>"
1334 [(match_operand:<VWIDE> 0 "register_operand" "")
1335 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1336 "TARGET_SIMD"
1337 {
1338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1339 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1340 operands[1], p));
1341 DONE;
1342 }
1343 )
1344
1345 ;; Widening arithmetic.
1346
1347 (define_insn "*aarch64_<su>mlal_lo<mode>"
1348 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1349 (plus:<VWIDE>
1350 (mult:<VWIDE>
1351 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1352 (match_operand:VQW 2 "register_operand" "w")
1353 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1354 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1355 (match_operand:VQW 4 "register_operand" "w")
1356 (match_dup 3))))
1357 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1358 "TARGET_SIMD"
1359 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1360 [(set_attr "type" "neon_mla_<Vetype>_long")]
1361 )
1362
1363 (define_insn "*aarch64_<su>mlal_hi<mode>"
1364 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1365 (plus:<VWIDE>
1366 (mult:<VWIDE>
1367 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1368 (match_operand:VQW 2 "register_operand" "w")
1369 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1370 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1371 (match_operand:VQW 4 "register_operand" "w")
1372 (match_dup 3))))
1373 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1374 "TARGET_SIMD"
1375 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1376 [(set_attr "type" "neon_mla_<Vetype>_long")]
1377 )
1378
1379 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1380 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1381 (minus:<VWIDE>
1382 (match_operand:<VWIDE> 1 "register_operand" "0")
1383 (mult:<VWIDE>
1384 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1385 (match_operand:VQW 2 "register_operand" "w")
1386 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1387 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1388 (match_operand:VQW 4 "register_operand" "w")
1389 (match_dup 3))))))]
1390 "TARGET_SIMD"
1391 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1392 [(set_attr "type" "neon_mla_<Vetype>_long")]
1393 )
1394
1395 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1396 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1397 (minus:<VWIDE>
1398 (match_operand:<VWIDE> 1 "register_operand" "0")
1399 (mult:<VWIDE>
1400 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1401 (match_operand:VQW 2 "register_operand" "w")
1402 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1403 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1404 (match_operand:VQW 4 "register_operand" "w")
1405 (match_dup 3))))))]
1406 "TARGET_SIMD"
1407 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1408 [(set_attr "type" "neon_mla_<Vetype>_long")]
1409 )
1410
1411 (define_insn "*aarch64_<su>mlal<mode>"
1412 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1413 (plus:<VWIDE>
1414 (mult:<VWIDE>
1415 (ANY_EXTEND:<VWIDE>
1416 (match_operand:VD_BHSI 1 "register_operand" "w"))
1417 (ANY_EXTEND:<VWIDE>
1418 (match_operand:VD_BHSI 2 "register_operand" "w")))
1419 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1420 "TARGET_SIMD"
1421 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1422 [(set_attr "type" "neon_mla_<Vetype>_long")]
1423 )
1424
1425 (define_insn "*aarch64_<su>mlsl<mode>"
1426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1427 (minus:<VWIDE>
1428 (match_operand:<VWIDE> 1 "register_operand" "0")
1429 (mult:<VWIDE>
1430 (ANY_EXTEND:<VWIDE>
1431 (match_operand:VD_BHSI 2 "register_operand" "w"))
1432 (ANY_EXTEND:<VWIDE>
1433 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1434 "TARGET_SIMD"
1435 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1436 [(set_attr "type" "neon_mla_<Vetype>_long")]
1437 )
1438
1439 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1440 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1441 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1442 (match_operand:VQW 1 "register_operand" "w")
1443 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1444 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1445 (match_operand:VQW 2 "register_operand" "w")
1446 (match_dup 3)))))]
1447 "TARGET_SIMD"
1448 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1449 [(set_attr "type" "neon_mul_<Vetype>_long")]
1450 )
1451
1452 (define_expand "vec_widen_<su>mult_lo_<mode>"
1453 [(match_operand:<VWIDE> 0 "register_operand" "")
1454 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1455 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1456 "TARGET_SIMD"
1457 {
1458 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1459 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1460 operands[1],
1461 operands[2], p));
1462 DONE;
1463 }
1464 )
1465
1466 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1467 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1468 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1469 (match_operand:VQW 1 "register_operand" "w")
1470 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1471 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1472 (match_operand:VQW 2 "register_operand" "w")
1473 (match_dup 3)))))]
1474 "TARGET_SIMD"
1475 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1476 [(set_attr "type" "neon_mul_<Vetype>_long")]
1477 )
1478
1479 (define_expand "vec_widen_<su>mult_hi_<mode>"
1480 [(match_operand:<VWIDE> 0 "register_operand" "")
1481 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1482 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1483 "TARGET_SIMD"
1484 {
1485 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1486 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1487 operands[1],
1488 operands[2], p));
1489 DONE;
1490
1491 }
1492 )
1493
1494 ;; FP vector operations.
1495 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1496 ;; double-precision (64-bit) floating-point data types and arithmetic as
1497 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1498 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1499 ;;
1500 ;; Floating-point operations can raise an exception. Vectorizing such
1501 ;; operations are safe because of reasons explained below.
1502 ;;
1503 ;; ARMv8 permits an extension to enable trapped floating-point
1504 ;; exception handling, however this is an optional feature. In the
1505 ;; event of a floating-point exception being raised by vectorised
1506 ;; code then:
1507 ;; 1. If trapped floating-point exceptions are available, then a trap
1508 ;; will be taken when any lane raises an enabled exception. A trap
1509 ;; handler may determine which lane raised the exception.
1510 ;; 2. Alternatively a sticky exception flag is set in the
1511 ;; floating-point status register (FPSR). Software may explicitly
1512 ;; test the exception flags, in which case the tests will either
1513 ;; prevent vectorisation, allowing precise identification of the
1514 ;; failing operation, or if tested outside of vectorisable regions
1515 ;; then the specific operation and lane are not of interest.
1516
1517 ;; FP arithmetic operations.
1518
1519 (define_insn "add<mode>3"
1520 [(set (match_operand:VDQF 0 "register_operand" "=w")
1521 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1522 (match_operand:VDQF 2 "register_operand" "w")))]
1523 "TARGET_SIMD"
1524 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1525 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1526 )
1527
1528 (define_insn "sub<mode>3"
1529 [(set (match_operand:VDQF 0 "register_operand" "=w")
1530 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1531 (match_operand:VDQF 2 "register_operand" "w")))]
1532 "TARGET_SIMD"
1533 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1534 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1535 )
1536
1537 (define_insn "mul<mode>3"
1538 [(set (match_operand:VDQF 0 "register_operand" "=w")
1539 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1540 (match_operand:VDQF 2 "register_operand" "w")))]
1541 "TARGET_SIMD"
1542 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1543 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1544 )
1545
1546 (define_expand "div<mode>3"
1547 [(set (match_operand:VDQF 0 "register_operand")
1548 (div:VDQF (match_operand:VDQF 1 "general_operand")
1549 (match_operand:VDQF 2 "register_operand")))]
1550 "TARGET_SIMD"
1551 {
1552 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1553 DONE;
1554
1555 operands[1] = force_reg (<MODE>mode, operands[1]);
1556 })
1557
1558 (define_insn "*div<mode>3"
1559 [(set (match_operand:VDQF 0 "register_operand" "=w")
1560 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1561 (match_operand:VDQF 2 "register_operand" "w")))]
1562 "TARGET_SIMD"
1563 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1564 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1565 )
1566
1567 (define_insn "neg<mode>2"
1568 [(set (match_operand:VDQF 0 "register_operand" "=w")
1569 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1570 "TARGET_SIMD"
1571 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1572 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1573 )
1574
1575 (define_insn "abs<mode>2"
1576 [(set (match_operand:VDQF 0 "register_operand" "=w")
1577 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1578 "TARGET_SIMD"
1579 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1580 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1581 )
1582
1583 (define_insn "fma<mode>4"
1584 [(set (match_operand:VDQF 0 "register_operand" "=w")
1585 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1586 (match_operand:VDQF 2 "register_operand" "w")
1587 (match_operand:VDQF 3 "register_operand" "0")))]
1588 "TARGET_SIMD"
1589 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1590 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1591 )
1592
1593 (define_insn "*aarch64_fma4_elt<mode>"
1594 [(set (match_operand:VDQF 0 "register_operand" "=w")
1595 (fma:VDQF
1596 (vec_duplicate:VDQF
1597 (vec_select:<VEL>
1598 (match_operand:VDQF 1 "register_operand" "<h_con>")
1599 (parallel [(match_operand:SI 2 "immediate_operand")])))
1600 (match_operand:VDQF 3 "register_operand" "w")
1601 (match_operand:VDQF 4 "register_operand" "0")))]
1602 "TARGET_SIMD"
1603 {
1604 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1605 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1606 }
1607 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1608 )
1609
1610 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1611 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1612 (fma:VDQSF
1613 (vec_duplicate:VDQSF
1614 (vec_select:<VEL>
1615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1616 (parallel [(match_operand:SI 2 "immediate_operand")])))
1617 (match_operand:VDQSF 3 "register_operand" "w")
1618 (match_operand:VDQSF 4 "register_operand" "0")))]
1619 "TARGET_SIMD"
1620 {
1621 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1622 INTVAL (operands[2])));
1623 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1624 }
1625 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1626 )
1627
1628 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1629 [(set (match_operand:VMUL 0 "register_operand" "=w")
1630 (fma:VMUL
1631 (vec_duplicate:VMUL
1632 (match_operand:<VEL> 1 "register_operand" "w"))
1633 (match_operand:VMUL 2 "register_operand" "w")
1634 (match_operand:VMUL 3 "register_operand" "0")))]
1635 "TARGET_SIMD"
1636 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1637 [(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
1638 )
1639
1640 (define_insn "*aarch64_fma4_elt_to_64v2df"
1641 [(set (match_operand:DF 0 "register_operand" "=w")
1642 (fma:DF
1643 (vec_select:DF
1644 (match_operand:V2DF 1 "register_operand" "w")
1645 (parallel [(match_operand:SI 2 "immediate_operand")]))
1646 (match_operand:DF 3 "register_operand" "w")
1647 (match_operand:DF 4 "register_operand" "0")))]
1648 "TARGET_SIMD"
1649 {
1650 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1651 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1652 }
1653 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1654 )
1655
1656 (define_insn "fnma<mode>4"
1657 [(set (match_operand:VDQF 0 "register_operand" "=w")
1658 (fma:VDQF
1659 (match_operand:VDQF 1 "register_operand" "w")
1660 (neg:VDQF
1661 (match_operand:VDQF 2 "register_operand" "w"))
1662 (match_operand:VDQF 3 "register_operand" "0")))]
1663 "TARGET_SIMD"
1664 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1665 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1666 )
1667
1668 (define_insn "*aarch64_fnma4_elt<mode>"
1669 [(set (match_operand:VDQF 0 "register_operand" "=w")
1670 (fma:VDQF
1671 (neg:VDQF
1672 (match_operand:VDQF 3 "register_operand" "w"))
1673 (vec_duplicate:VDQF
1674 (vec_select:<VEL>
1675 (match_operand:VDQF 1 "register_operand" "<h_con>")
1676 (parallel [(match_operand:SI 2 "immediate_operand")])))
1677 (match_operand:VDQF 4 "register_operand" "0")))]
1678 "TARGET_SIMD"
1679 {
1680 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1681 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1682 }
1683 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1684 )
1685
1686 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1687 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1688 (fma:VDQSF
1689 (neg:VDQSF
1690 (match_operand:VDQSF 3 "register_operand" "w"))
1691 (vec_duplicate:VDQSF
1692 (vec_select:<VEL>
1693 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1694 (parallel [(match_operand:SI 2 "immediate_operand")])))
1695 (match_operand:VDQSF 4 "register_operand" "0")))]
1696 "TARGET_SIMD"
1697 {
1698 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1699 INTVAL (operands[2])));
1700 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1701 }
1702 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1703 )
1704
1705 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1706 [(set (match_operand:VMUL 0 "register_operand" "=w")
1707 (fma:VMUL
1708 (neg:VMUL
1709 (match_operand:VMUL 2 "register_operand" "w"))
1710 (vec_duplicate:VMUL
1711 (match_operand:<VEL> 1 "register_operand" "w"))
1712 (match_operand:VMUL 3 "register_operand" "0")))]
1713 "TARGET_SIMD"
1714 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1715 [(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
1716 )
1717
1718 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1719 [(set (match_operand:DF 0 "register_operand" "=w")
1720 (fma:DF
1721 (vec_select:DF
1722 (match_operand:V2DF 1 "register_operand" "w")
1723 (parallel [(match_operand:SI 2 "immediate_operand")]))
1724 (neg:DF
1725 (match_operand:DF 3 "register_operand" "w"))
1726 (match_operand:DF 4 "register_operand" "0")))]
1727 "TARGET_SIMD"
1728 {
1729 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1730 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1731 }
1732 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1733 )
1734
1735 ;; Vector versions of the floating-point frint patterns.
1736 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1737 (define_insn "<frint_pattern><mode>2"
1738 [(set (match_operand:VDQF 0 "register_operand" "=w")
1739 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1740 FRINT))]
1741 "TARGET_SIMD"
1742 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1743 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1744 )
1745
1746 ;; Vector versions of the fcvt standard patterns.
1747 ;; Expands to lbtrunc, lround, lceil, lfloor
1748 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1749 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1750 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1751 [(match_operand:VDQF 1 "register_operand" "w")]
1752 FCVT)))]
1753 "TARGET_SIMD"
1754 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1755 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1756 )
1757
1758 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1759 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1760 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1761 [(mult:VDQF
1762 (match_operand:VDQF 1 "register_operand" "w")
1763 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1764 UNSPEC_FRINTZ)))]
1765 "TARGET_SIMD
1766 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1767 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1768 {
1769 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1770 char buf[64];
1771 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1772 output_asm_insn (buf, operands);
1773 return "";
1774 }
1775 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1776 )
1777
1778 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1779 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1780 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1781 [(match_operand:VDQF 1 "register_operand")]
1782 UNSPEC_FRINTZ)))]
1783 "TARGET_SIMD"
1784 {})
1785
1786 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1787 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1788 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1789 [(match_operand:VDQF 1 "register_operand")]
1790 UNSPEC_FRINTZ)))]
1791 "TARGET_SIMD"
1792 {})
1793
1794 (define_expand "ftrunc<VDQF:mode>2"
1795 [(set (match_operand:VDQF 0 "register_operand")
1796 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1797 UNSPEC_FRINTZ))]
1798 "TARGET_SIMD"
1799 {})
1800
1801 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1802 [(set (match_operand:VDQF 0 "register_operand" "=w")
1803 (FLOATUORS:VDQF
1804 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1805 "TARGET_SIMD"
1806 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1807 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1808 )
1809
1810 ;; Conversions between vectors of floats and doubles.
1811 ;; Contains a mix of patterns to match standard pattern names
1812 ;; and those for intrinsics.
1813
1814 ;; Float widening operations.
1815
1816 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1817 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1818 (float_extend:<VWIDE> (vec_select:<VHALF>
1819 (match_operand:VQ_HSF 1 "register_operand" "w")
1820 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1821 )))]
1822 "TARGET_SIMD"
1823 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1824 [(set_attr "type" "neon_fp_cvt_widen_s")]
1825 )
1826
1827 ;; Convert between fixed-point and floating-point (vector modes)
1828
1829 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VDQF:mode>3"
1830 [(set (match_operand:<VDQF:FCVT_TARGET> 0 "register_operand" "=w")
1831 (unspec:<VDQF:FCVT_TARGET> [(match_operand:VDQF 1 "register_operand" "w")
1832 (match_operand:SI 2 "immediate_operand" "i")]
1833 FCVT_F2FIXED))]
1834 "TARGET_SIMD"
1835 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1836 [(set_attr "type" "neon_fp_to_int_<VDQF:Vetype><q>")]
1837 )
1838
1839 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_SDI:mode>3"
1840 [(set (match_operand:<VDQ_SDI:FCVT_TARGET> 0 "register_operand" "=w")
1841 (unspec:<VDQ_SDI:FCVT_TARGET> [(match_operand:VDQ_SDI 1 "register_operand" "w")
1842 (match_operand:SI 2 "immediate_operand" "i")]
1843 FCVT_FIXED2F))]
1844 "TARGET_SIMD"
1845 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1846 [(set_attr "type" "neon_int_to_fp_<VDQ_SDI:Vetype><q>")]
1847 )
1848
1849 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1850 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1851 ;; the meaning of HI and LO changes depending on the target endianness.
1852 ;; While elsewhere we map the higher numbered elements of a vector to
1853 ;; the lower architectural lanes of the vector, for these patterns we want
1854 ;; to always treat "hi" as referring to the higher architectural lanes.
1855 ;; Consequently, while the patterns below look inconsistent with our
1856 ;; other big-endian patterns their behavior is as required.
1857
1858 (define_expand "vec_unpacks_lo_<mode>"
1859 [(match_operand:<VWIDE> 0 "register_operand" "")
1860 (match_operand:VQ_HSF 1 "register_operand" "")]
1861 "TARGET_SIMD"
1862 {
1863 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1864 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1865 operands[1], p));
1866 DONE;
1867 }
1868 )
1869
1870 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1871 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1872 (float_extend:<VWIDE> (vec_select:<VHALF>
1873 (match_operand:VQ_HSF 1 "register_operand" "w")
1874 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1875 )))]
1876 "TARGET_SIMD"
1877 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1878 [(set_attr "type" "neon_fp_cvt_widen_s")]
1879 )
1880
1881 (define_expand "vec_unpacks_hi_<mode>"
1882 [(match_operand:<VWIDE> 0 "register_operand" "")
1883 (match_operand:VQ_HSF 1 "register_operand" "")]
1884 "TARGET_SIMD"
1885 {
1886 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1887 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1888 operands[1], p));
1889 DONE;
1890 }
1891 )
1892 (define_insn "aarch64_float_extend_lo_<Vwide>"
1893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1894 (float_extend:<VWIDE>
1895 (match_operand:VDF 1 "register_operand" "w")))]
1896 "TARGET_SIMD"
1897 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1898 [(set_attr "type" "neon_fp_cvt_widen_s")]
1899 )
1900
1901 ;; Float narrowing operations.
1902
1903 (define_insn "aarch64_float_truncate_lo_<mode>"
1904 [(set (match_operand:VDF 0 "register_operand" "=w")
1905 (float_truncate:VDF
1906 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1907 "TARGET_SIMD"
1908 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1909 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1910 )
1911
1912 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1913 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1914 (vec_concat:<VDBL>
1915 (match_operand:VDF 1 "register_operand" "0")
1916 (float_truncate:VDF
1917 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1918 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1919 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1920 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1921 )
1922
1923 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1924 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1925 (vec_concat:<VDBL>
1926 (float_truncate:VDF
1927 (match_operand:<VWIDE> 2 "register_operand" "w"))
1928 (match_operand:VDF 1 "register_operand" "0")))]
1929 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1930 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1931 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1932 )
1933
1934 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1935 [(match_operand:<VDBL> 0 "register_operand" "=w")
1936 (match_operand:VDF 1 "register_operand" "0")
1937 (match_operand:<VWIDE> 2 "register_operand" "w")]
1938 "TARGET_SIMD"
1939 {
1940 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1941 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1942 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1943 emit_insn (gen (operands[0], operands[1], operands[2]));
1944 DONE;
1945 }
1946 )
1947
1948 (define_expand "vec_pack_trunc_v2df"
1949 [(set (match_operand:V4SF 0 "register_operand")
1950 (vec_concat:V4SF
1951 (float_truncate:V2SF
1952 (match_operand:V2DF 1 "register_operand"))
1953 (float_truncate:V2SF
1954 (match_operand:V2DF 2 "register_operand"))
1955 ))]
1956 "TARGET_SIMD"
1957 {
1958 rtx tmp = gen_reg_rtx (V2SFmode);
1959 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1960 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1961
1962 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1963 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1964 tmp, operands[hi]));
1965 DONE;
1966 }
1967 )
1968
1969 (define_expand "vec_pack_trunc_df"
1970 [(set (match_operand:V2SF 0 "register_operand")
1971 (vec_concat:V2SF
1972 (float_truncate:SF
1973 (match_operand:DF 1 "register_operand"))
1974 (float_truncate:SF
1975 (match_operand:DF 2 "register_operand"))
1976 ))]
1977 "TARGET_SIMD"
1978 {
1979 rtx tmp = gen_reg_rtx (V2SFmode);
1980 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1981 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1982
1983 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1984 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1985 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1986 DONE;
1987 }
1988 )
1989
1990 ;; FP Max/Min
1991 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1992 ;; expression like:
1993 ;; a = (b < c) ? b : c;
1994 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1995 ;; either explicitly or indirectly via -ffast-math.
1996 ;;
1997 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1998 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1999 ;; operand will be returned when both operands are zero (i.e. they may not
2000 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2001 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2002 ;; NaNs.
2003
2004 (define_insn "<su><maxmin><mode>3"
2005 [(set (match_operand:VDQF 0 "register_operand" "=w")
2006 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
2007 (match_operand:VDQF 2 "register_operand" "w")))]
2008 "TARGET_SIMD"
2009 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2010 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
2011 )
2012
2013 (define_insn "<maxmin_uns><mode>3"
2014 [(set (match_operand:VDQF 0 "register_operand" "=w")
2015 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
2016 (match_operand:VDQF 2 "register_operand" "w")]
2017 FMAXMIN_UNS))]
2018 "TARGET_SIMD"
2019 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2020 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
2021 )
2022
2023 ;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
2024 (define_insn "<fmaxmin><mode>3"
2025 [(set (match_operand:VDQF 0 "register_operand" "=w")
2026 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
2027 (match_operand:VDQF 2 "register_operand" "w")]
2028 FMAXMIN))]
2029 "TARGET_SIMD"
2030 "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2031 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
2032 )
2033
2034 ;; 'across lanes' add.
2035
2036 (define_expand "reduc_plus_scal_<mode>"
2037 [(match_operand:<VEL> 0 "register_operand" "=w")
2038 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2039 UNSPEC_ADDV)]
2040 "TARGET_SIMD"
2041 {
2042 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2043 rtx scratch = gen_reg_rtx (<MODE>mode);
2044 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2045 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2046 DONE;
2047 }
2048 )
2049
2050 (define_insn "aarch64_faddp<mode>"
2051 [(set (match_operand:VDQF 0 "register_operand" "=w")
2052 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
2053 (match_operand:VDQF 2 "register_operand" "w")]
2054 UNSPEC_FADDV))]
2055 "TARGET_SIMD"
2056 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2057 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2058 )
2059
2060 (define_insn "aarch64_reduc_plus_internal<mode>"
2061 [(set (match_operand:VDQV 0 "register_operand" "=w")
2062 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2063 UNSPEC_ADDV))]
2064 "TARGET_SIMD"
2065 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2066 [(set_attr "type" "neon_reduc_add<q>")]
2067 )
2068
2069 (define_insn "aarch64_reduc_plus_internalv2si"
2070 [(set (match_operand:V2SI 0 "register_operand" "=w")
2071 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2072 UNSPEC_ADDV))]
2073 "TARGET_SIMD"
2074 "addp\\t%0.2s, %1.2s, %1.2s"
2075 [(set_attr "type" "neon_reduc_add")]
2076 )
2077
2078 (define_insn "reduc_plus_scal_<mode>"
2079 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2080 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2081 UNSPEC_FADDV))]
2082 "TARGET_SIMD"
2083 "faddp\\t%<Vetype>0, %1.<Vtype>"
2084 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2085 )
2086
2087 (define_expand "reduc_plus_scal_v4sf"
2088 [(set (match_operand:SF 0 "register_operand")
2089 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2090 UNSPEC_FADDV))]
2091 "TARGET_SIMD"
2092 {
2093 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2094 rtx scratch = gen_reg_rtx (V4SFmode);
2095 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2096 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2097 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2098 DONE;
2099 })
2100
2101 (define_insn "clrsb<mode>2"
2102 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2103 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2104 "TARGET_SIMD"
2105 "cls\\t%0.<Vtype>, %1.<Vtype>"
2106 [(set_attr "type" "neon_cls<q>")]
2107 )
2108
2109 (define_insn "clz<mode>2"
2110 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2111 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2112 "TARGET_SIMD"
2113 "clz\\t%0.<Vtype>, %1.<Vtype>"
2114 [(set_attr "type" "neon_cls<q>")]
2115 )
2116
2117 (define_insn "popcount<mode>2"
2118 [(set (match_operand:VB 0 "register_operand" "=w")
2119 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2120 "TARGET_SIMD"
2121 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2122 [(set_attr "type" "neon_cnt<q>")]
2123 )
2124
2125 ;; 'across lanes' max and min ops.
2126
2127 ;; Template for outputting a scalar, so we can create __builtins which can be
2128 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2129 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2130 [(match_operand:<VEL> 0 "register_operand")
2131 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
2132 FMAXMINV)]
2133 "TARGET_SIMD"
2134 {
2135 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2136 rtx scratch = gen_reg_rtx (<MODE>mode);
2137 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2138 operands[1]));
2139 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2140 DONE;
2141 }
2142 )
2143
2144 ;; Likewise for integer cases, signed and unsigned.
2145 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2146 [(match_operand:<VEL> 0 "register_operand")
2147 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2148 MAXMINV)]
2149 "TARGET_SIMD"
2150 {
2151 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2152 rtx scratch = gen_reg_rtx (<MODE>mode);
2153 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2154 operands[1]));
2155 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2156 DONE;
2157 }
2158 )
2159
2160 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2161 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2162 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2163 MAXMINV))]
2164 "TARGET_SIMD"
2165 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2166 [(set_attr "type" "neon_reduc_minmax<q>")]
2167 )
2168
2169 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2170 [(set (match_operand:V2SI 0 "register_operand" "=w")
2171 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2172 MAXMINV))]
2173 "TARGET_SIMD"
2174 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2175 [(set_attr "type" "neon_reduc_minmax")]
2176 )
2177
2178 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2179 [(set (match_operand:VDQF 0 "register_operand" "=w")
2180 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
2181 FMAXMINV))]
2182 "TARGET_SIMD"
2183 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2184 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
2185 )
2186
2187 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2188 ;; allocation.
2189 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2190 ;; to select.
2191 ;;
2192 ;; Thus our BSL is of the form:
2193 ;; op0 = bsl (mask, op2, op3)
2194 ;; We can use any of:
2195 ;;
2196 ;; if (op0 = mask)
2197 ;; bsl mask, op1, op2
2198 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2199 ;; bit op0, op2, mask
2200 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2201 ;; bif op0, op1, mask
2202 ;;
2203 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2204 ;; Some forms of straight-line code may generate the equivalent form
2205 ;; in *aarch64_simd_bsl<mode>_alt.
2206
2207 (define_insn "aarch64_simd_bsl<mode>_internal"
2208 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2209 (xor:VSDQ_I_DI
2210 (and:VSDQ_I_DI
2211 (xor:VSDQ_I_DI
2212 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2213 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2214 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2215 (match_dup:<V_cmp_result> 3)
2216 ))]
2217 "TARGET_SIMD"
2218 "@
2219 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2220 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2221 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2222 [(set_attr "type" "neon_bsl<q>")]
2223 )
2224
2225 ;; We need this form in addition to the above pattern to match the case
2226 ;; when combine tries merging three insns such that the second operand of
2227 ;; the outer XOR matches the second operand of the inner XOR rather than
2228 ;; the first. The two are equivalent but since recog doesn't try all
2229 ;; permutations of commutative operations, we have to have a separate pattern.
2230
2231 (define_insn "*aarch64_simd_bsl<mode>_alt"
2232 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2233 (xor:VSDQ_I_DI
2234 (and:VSDQ_I_DI
2235 (xor:VSDQ_I_DI
2236 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2237 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2238 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2239 (match_dup:VSDQ_I_DI 2)))]
2240 "TARGET_SIMD"
2241 "@
2242 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2243 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2244 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2245 [(set_attr "type" "neon_bsl<q>")]
2246 )
2247
2248 (define_expand "aarch64_simd_bsl<mode>"
2249 [(match_operand:VALLDIF 0 "register_operand")
2250 (match_operand:<V_cmp_result> 1 "register_operand")
2251 (match_operand:VALLDIF 2 "register_operand")
2252 (match_operand:VALLDIF 3 "register_operand")]
2253 "TARGET_SIMD"
2254 {
2255 /* We can't alias operands together if they have different modes. */
2256 rtx tmp = operands[0];
2257 if (FLOAT_MODE_P (<MODE>mode))
2258 {
2259 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2260 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2261 tmp = gen_reg_rtx (<V_cmp_result>mode);
2262 }
2263 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2264 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2265 operands[1],
2266 operands[2],
2267 operands[3]));
2268 if (tmp != operands[0])
2269 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2270
2271 DONE;
2272 })
2273
2274 (define_expand "aarch64_vcond_internal<mode><mode>"
2275 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2276 (if_then_else:VSDQ_I_DI
2277 (match_operator 3 "comparison_operator"
2278 [(match_operand:VSDQ_I_DI 4 "register_operand")
2279 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2280 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2281 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2282 "TARGET_SIMD"
2283 {
2284 rtx op1 = operands[1];
2285 rtx op2 = operands[2];
2286 rtx mask = gen_reg_rtx (<MODE>mode);
2287 enum rtx_code code = GET_CODE (operands[3]);
2288
2289 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2290 and desirable for other comparisons if it results in FOO ? -1 : 0
2291 (this allows direct use of the comparison result without a bsl). */
2292 if (code == NE
2293 || (code != EQ
2294 && op1 == CONST0_RTX (<V_cmp_result>mode)
2295 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2296 {
2297 op1 = operands[2];
2298 op2 = operands[1];
2299 switch (code)
2300 {
2301 case LE: code = GT; break;
2302 case LT: code = GE; break;
2303 case GE: code = LT; break;
2304 case GT: code = LE; break;
2305 /* No case EQ. */
2306 case NE: code = EQ; break;
2307 case LTU: code = GEU; break;
2308 case LEU: code = GTU; break;
2309 case GTU: code = LEU; break;
2310 case GEU: code = LTU; break;
2311 default: gcc_unreachable ();
2312 }
2313 }
2314
2315 /* Make sure we can handle the last operand. */
2316 switch (code)
2317 {
2318 case NE:
2319 /* Normalized to EQ above. */
2320 gcc_unreachable ();
2321
2322 case LE:
2323 case LT:
2324 case GE:
2325 case GT:
2326 case EQ:
2327 /* These instructions have a form taking an immediate zero. */
2328 if (operands[5] == CONST0_RTX (<MODE>mode))
2329 break;
2330 /* Fall through, as may need to load into register. */
2331 default:
2332 if (!REG_P (operands[5]))
2333 operands[5] = force_reg (<MODE>mode, operands[5]);
2334 break;
2335 }
2336
2337 switch (code)
2338 {
2339 case LT:
2340 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2341 break;
2342
2343 case GE:
2344 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2345 break;
2346
2347 case LE:
2348 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2349 break;
2350
2351 case GT:
2352 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2353 break;
2354
2355 case LTU:
2356 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2357 break;
2358
2359 case GEU:
2360 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2361 break;
2362
2363 case LEU:
2364 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2365 break;
2366
2367 case GTU:
2368 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2369 break;
2370
2371 /* NE has been normalized to EQ above. */
2372 case EQ:
2373 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2374 break;
2375
2376 default:
2377 gcc_unreachable ();
2378 }
2379
2380 /* If we have (a = (b CMP c) ? -1 : 0);
2381 Then we can simply move the generated mask. */
2382
2383 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2384 && op2 == CONST0_RTX (<V_cmp_result>mode))
2385 emit_move_insn (operands[0], mask);
2386 else
2387 {
2388 if (!REG_P (op1))
2389 op1 = force_reg (<MODE>mode, op1);
2390 if (!REG_P (op2))
2391 op2 = force_reg (<MODE>mode, op2);
2392 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2393 op1, op2));
2394 }
2395
2396 DONE;
2397 })
2398
2399 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2400 [(set (match_operand:VDQF_COND 0 "register_operand")
2401 (if_then_else:VDQF
2402 (match_operator 3 "comparison_operator"
2403 [(match_operand:VDQF 4 "register_operand")
2404 (match_operand:VDQF 5 "nonmemory_operand")])
2405 (match_operand:VDQF_COND 1 "nonmemory_operand")
2406 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2407 "TARGET_SIMD"
2408 {
2409 int inverse = 0;
2410 int use_zero_form = 0;
2411 int swap_bsl_operands = 0;
2412 rtx op1 = operands[1];
2413 rtx op2 = operands[2];
2414 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2415 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2416
2417 rtx (*base_comparison) (rtx, rtx, rtx);
2418 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2419
2420 switch (GET_CODE (operands[3]))
2421 {
2422 case GE:
2423 case GT:
2424 case LE:
2425 case LT:
2426 case EQ:
2427 if (operands[5] == CONST0_RTX (<MODE>mode))
2428 {
2429 use_zero_form = 1;
2430 break;
2431 }
2432 /* Fall through. */
2433 default:
2434 if (!REG_P (operands[5]))
2435 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2436 }
2437
2438 switch (GET_CODE (operands[3]))
2439 {
2440 case LT:
2441 case UNLT:
2442 inverse = 1;
2443 /* Fall through. */
2444 case GE:
2445 case UNGE:
2446 case ORDERED:
2447 case UNORDERED:
2448 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2449 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2450 break;
2451 case LE:
2452 case UNLE:
2453 inverse = 1;
2454 /* Fall through. */
2455 case GT:
2456 case UNGT:
2457 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2458 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2459 break;
2460 case EQ:
2461 case NE:
2462 case UNEQ:
2463 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2464 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2465 break;
2466 default:
2467 gcc_unreachable ();
2468 }
2469
2470 switch (GET_CODE (operands[3]))
2471 {
2472 case LT:
2473 case LE:
2474 case GT:
2475 case GE:
2476 case EQ:
2477 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2478 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2479 a GE b -> a GE b
2480 a GT b -> a GT b
2481 a LE b -> b GE a
2482 a LT b -> b GT a
2483 a EQ b -> a EQ b
2484 Note that there also exist direct comparison against 0 forms,
2485 so catch those as a special case. */
2486 if (use_zero_form)
2487 {
2488 inverse = 0;
2489 switch (GET_CODE (operands[3]))
2490 {
2491 case LT:
2492 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2493 break;
2494 case LE:
2495 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2496 break;
2497 default:
2498 /* Do nothing, other zero form cases already have the correct
2499 base_comparison. */
2500 break;
2501 }
2502 }
2503
2504 if (!inverse)
2505 emit_insn (base_comparison (mask, operands[4], operands[5]));
2506 else
2507 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2508 break;
2509 case UNLT:
2510 case UNLE:
2511 case UNGT:
2512 case UNGE:
2513 case NE:
2514 /* FCM returns false for lanes which are unordered, so if we use
2515 the inverse of the comparison we actually want to emit, then
2516 swap the operands to BSL, we will end up with the correct result.
2517 Note that a NE NaN and NaN NE b are true for all a, b.
2518
2519 Our transformations are:
2520 a GE b -> !(b GT a)
2521 a GT b -> !(b GE a)
2522 a LE b -> !(a GT b)
2523 a LT b -> !(a GE b)
2524 a NE b -> !(a EQ b) */
2525
2526 if (inverse)
2527 emit_insn (base_comparison (mask, operands[4], operands[5]));
2528 else
2529 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2530
2531 swap_bsl_operands = 1;
2532 break;
2533 case UNEQ:
2534 /* We check (a > b || b > a). combining these comparisons give us
2535 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2536 will then give us (a == b || a UNORDERED b) as intended. */
2537
2538 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2539 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2540 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2541 swap_bsl_operands = 1;
2542 break;
2543 case UNORDERED:
2544 /* Operands are ORDERED iff (a > b || b >= a).
2545 Swapping the operands to BSL will give the UNORDERED case. */
2546 swap_bsl_operands = 1;
2547 /* Fall through. */
2548 case ORDERED:
2549 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2550 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2551 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2552 break;
2553 default:
2554 gcc_unreachable ();
2555 }
2556
2557 if (swap_bsl_operands)
2558 {
2559 op1 = operands[2];
2560 op2 = operands[1];
2561 }
2562
2563 /* If we have (a = (b CMP c) ? -1 : 0);
2564 Then we can simply move the generated mask. */
2565
2566 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2567 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2568 emit_move_insn (operands[0], mask);
2569 else
2570 {
2571 if (!REG_P (op1))
2572 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2573 if (!REG_P (op2))
2574 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2575 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2576 op1, op2));
2577 }
2578
2579 DONE;
2580 })
2581
2582 (define_expand "vcond<mode><mode>"
2583 [(set (match_operand:VALLDI 0 "register_operand")
2584 (if_then_else:VALLDI
2585 (match_operator 3 "comparison_operator"
2586 [(match_operand:VALLDI 4 "register_operand")
2587 (match_operand:VALLDI 5 "nonmemory_operand")])
2588 (match_operand:VALLDI 1 "nonmemory_operand")
2589 (match_operand:VALLDI 2 "nonmemory_operand")))]
2590 "TARGET_SIMD"
2591 {
2592 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2593 operands[2], operands[3],
2594 operands[4], operands[5]));
2595 DONE;
2596 })
2597
2598 (define_expand "vcond<v_cmp_result><mode>"
2599 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2600 (if_then_else:<V_cmp_result>
2601 (match_operator 3 "comparison_operator"
2602 [(match_operand:VDQF 4 "register_operand")
2603 (match_operand:VDQF 5 "nonmemory_operand")])
2604 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2605 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2606 "TARGET_SIMD"
2607 {
2608 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2609 operands[0], operands[1],
2610 operands[2], operands[3],
2611 operands[4], operands[5]));
2612 DONE;
2613 })
2614
2615 (define_expand "vcondu<mode><mode>"
2616 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2617 (if_then_else:VSDQ_I_DI
2618 (match_operator 3 "comparison_operator"
2619 [(match_operand:VSDQ_I_DI 4 "register_operand")
2620 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2621 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2622 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2623 "TARGET_SIMD"
2624 {
2625 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2626 operands[2], operands[3],
2627 operands[4], operands[5]));
2628 DONE;
2629 })
2630
2631 ;; Patterns for AArch64 SIMD Intrinsics.
2632
2633 ;; Lane extraction with sign extension to general purpose register.
2634 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2635 [(set (match_operand:GPI 0 "register_operand" "=r")
2636 (sign_extend:GPI
2637 (vec_select:<VEL>
2638 (match_operand:VDQQH 1 "register_operand" "w")
2639 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2640 "TARGET_SIMD"
2641 {
2642 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2643 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2644 }
2645 [(set_attr "type" "neon_to_gp<q>")]
2646 )
2647
2648 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2649 [(set (match_operand:SI 0 "register_operand" "=r")
2650 (zero_extend:SI
2651 (vec_select:<VEL>
2652 (match_operand:VDQQH 1 "register_operand" "w")
2653 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2654 "TARGET_SIMD"
2655 {
2656 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2657 return "umov\\t%w0, %1.<Vetype>[%2]";
2658 }
2659 [(set_attr "type" "neon_to_gp<q>")]
2660 )
2661
2662 ;; Lane extraction of a value, neither sign nor zero extension
2663 ;; is guaranteed so upper bits should be considered undefined.
2664 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2665 (define_insn "aarch64_get_lane<mode>"
2666 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2667 (vec_select:<VEL>
2668 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2669 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2670 "TARGET_SIMD"
2671 {
2672 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2673 switch (which_alternative)
2674 {
2675 case 0:
2676 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2677 case 1:
2678 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2679 case 2:
2680 return "st1\\t{%1.<Vetype>}[%2], %0";
2681 default:
2682 gcc_unreachable ();
2683 }
2684 }
2685 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2686 )
2687
2688 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2689 ;; dest vector.
2690
2691 (define_insn "*aarch64_combinez<mode>"
2692 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2693 (vec_concat:<VDBL>
2694 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2695 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2696 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2697 "@
2698 mov\\t%0.8b, %1.8b
2699 fmov\t%d0, %1
2700 ldr\\t%d0, %1"
2701 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2702 (set_attr "simd" "yes,*,yes")
2703 (set_attr "fp" "*,yes,*")]
2704 )
2705
2706 (define_insn "*aarch64_combinez_be<mode>"
2707 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2708 (vec_concat:<VDBL>
2709 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2710 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2711 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2712 "@
2713 mov\\t%0.8b, %1.8b
2714 fmov\t%d0, %1
2715 ldr\\t%d0, %1"
2716 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2717 (set_attr "simd" "yes,*,yes")
2718 (set_attr "fp" "*,yes,*")]
2719 )
2720
2721 (define_expand "aarch64_combine<mode>"
2722 [(match_operand:<VDBL> 0 "register_operand")
2723 (match_operand:VDC 1 "register_operand")
2724 (match_operand:VDC 2 "register_operand")]
2725 "TARGET_SIMD"
2726 {
2727 rtx op1, op2;
2728 if (BYTES_BIG_ENDIAN)
2729 {
2730 op1 = operands[2];
2731 op2 = operands[1];
2732 }
2733 else
2734 {
2735 op1 = operands[1];
2736 op2 = operands[2];
2737 }
2738 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2739 DONE;
2740 }
2741 )
2742
2743 (define_insn_and_split "aarch64_combine_internal<mode>"
2744 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2745 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2746 (match_operand:VDC 2 "register_operand" "w")))]
2747 "TARGET_SIMD"
2748 "#"
2749 "&& reload_completed"
2750 [(const_int 0)]
2751 {
2752 if (BYTES_BIG_ENDIAN)
2753 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2754 else
2755 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2756 DONE;
2757 }
2758 [(set_attr "type" "multiple")]
2759 )
2760
2761 (define_expand "aarch64_simd_combine<mode>"
2762 [(match_operand:<VDBL> 0 "register_operand")
2763 (match_operand:VDC 1 "register_operand")
2764 (match_operand:VDC 2 "register_operand")]
2765 "TARGET_SIMD"
2766 {
2767 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2768 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2769 DONE;
2770 }
2771 [(set_attr "type" "multiple")]
2772 )
2773
2774 ;; <su><addsub>l<q>.
2775
2776 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2777 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2778 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2779 (match_operand:VQW 1 "register_operand" "w")
2780 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2781 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2782 (match_operand:VQW 2 "register_operand" "w")
2783 (match_dup 3)))))]
2784 "TARGET_SIMD"
2785 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2786 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2787 )
2788
2789 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2790 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2791 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2792 (match_operand:VQW 1 "register_operand" "w")
2793 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2794 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2795 (match_operand:VQW 2 "register_operand" "w")
2796 (match_dup 3)))))]
2797 "TARGET_SIMD"
2798 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2799 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2800 )
2801
2802
2803 (define_expand "aarch64_saddl2<mode>"
2804 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2805 (match_operand:VQW 1 "register_operand" "w")
2806 (match_operand:VQW 2 "register_operand" "w")]
2807 "TARGET_SIMD"
2808 {
2809 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2810 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2811 operands[2], p));
2812 DONE;
2813 })
2814
2815 (define_expand "aarch64_uaddl2<mode>"
2816 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2817 (match_operand:VQW 1 "register_operand" "w")
2818 (match_operand:VQW 2 "register_operand" "w")]
2819 "TARGET_SIMD"
2820 {
2821 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2822 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2823 operands[2], p));
2824 DONE;
2825 })
2826
2827 (define_expand "aarch64_ssubl2<mode>"
2828 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2829 (match_operand:VQW 1 "register_operand" "w")
2830 (match_operand:VQW 2 "register_operand" "w")]
2831 "TARGET_SIMD"
2832 {
2833 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2834 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2835 operands[2], p));
2836 DONE;
2837 })
2838
2839 (define_expand "aarch64_usubl2<mode>"
2840 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2841 (match_operand:VQW 1 "register_operand" "w")
2842 (match_operand:VQW 2 "register_operand" "w")]
2843 "TARGET_SIMD"
2844 {
2845 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2846 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2847 operands[2], p));
2848 DONE;
2849 })
2850
2851 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2852 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2853 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2854 (match_operand:VD_BHSI 1 "register_operand" "w"))
2855 (ANY_EXTEND:<VWIDE>
2856 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2857 "TARGET_SIMD"
2858 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2859 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2860 )
2861
2862 ;; <su><addsub>w<q>.
2863
2864 (define_expand "widen_ssum<mode>3"
2865 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2866 (plus:<VDBLW> (sign_extend:<VDBLW>
2867 (match_operand:VQW 1 "register_operand" ""))
2868 (match_operand:<VDBLW> 2 "register_operand" "")))]
2869 "TARGET_SIMD"
2870 {
2871 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2872 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2873
2874 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2875 operands[1], p));
2876 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2877 DONE;
2878 }
2879 )
2880
2881 (define_expand "widen_ssum<mode>3"
2882 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2883 (plus:<VWIDE> (sign_extend:<VWIDE>
2884 (match_operand:VD_BHSI 1 "register_operand" ""))
2885 (match_operand:<VWIDE> 2 "register_operand" "")))]
2886 "TARGET_SIMD"
2887 {
2888 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2889 DONE;
2890 })
2891
2892 (define_expand "widen_usum<mode>3"
2893 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2894 (plus:<VDBLW> (zero_extend:<VDBLW>
2895 (match_operand:VQW 1 "register_operand" ""))
2896 (match_operand:<VDBLW> 2 "register_operand" "")))]
2897 "TARGET_SIMD"
2898 {
2899 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2900 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2901
2902 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2903 operands[1], p));
2904 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2905 DONE;
2906 }
2907 )
2908
2909 (define_expand "widen_usum<mode>3"
2910 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2911 (plus:<VWIDE> (zero_extend:<VWIDE>
2912 (match_operand:VD_BHSI 1 "register_operand" ""))
2913 (match_operand:<VWIDE> 2 "register_operand" "")))]
2914 "TARGET_SIMD"
2915 {
2916 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2917 DONE;
2918 })
2919
2920 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2921 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2922 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2923 (ANY_EXTEND:<VWIDE>
2924 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2925 "TARGET_SIMD"
2926 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2927 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2928 )
2929
2930 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
2931 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2932 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2933 (ANY_EXTEND:<VWIDE>
2934 (vec_select:<VHALF>
2935 (match_operand:VQW 2 "register_operand" "w")
2936 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
2937 "TARGET_SIMD"
2938 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
2939 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2940 )
2941
2942 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2943 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2944 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2945 (ANY_EXTEND:<VWIDE>
2946 (vec_select:<VHALF>
2947 (match_operand:VQW 2 "register_operand" "w")
2948 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2949 "TARGET_SIMD"
2950 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2951 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2952 )
2953
2954 (define_expand "aarch64_saddw2<mode>"
2955 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2956 (match_operand:<VWIDE> 1 "register_operand" "w")
2957 (match_operand:VQW 2 "register_operand" "w")]
2958 "TARGET_SIMD"
2959 {
2960 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2961 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2962 operands[2], p));
2963 DONE;
2964 })
2965
2966 (define_expand "aarch64_uaddw2<mode>"
2967 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2968 (match_operand:<VWIDE> 1 "register_operand" "w")
2969 (match_operand:VQW 2 "register_operand" "w")]
2970 "TARGET_SIMD"
2971 {
2972 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2973 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2974 operands[2], p));
2975 DONE;
2976 })
2977
2978
2979 (define_expand "aarch64_ssubw2<mode>"
2980 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2981 (match_operand:<VWIDE> 1 "register_operand" "w")
2982 (match_operand:VQW 2 "register_operand" "w")]
2983 "TARGET_SIMD"
2984 {
2985 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2986 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2987 operands[2], p));
2988 DONE;
2989 })
2990
2991 (define_expand "aarch64_usubw2<mode>"
2992 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2993 (match_operand:<VWIDE> 1 "register_operand" "w")
2994 (match_operand:VQW 2 "register_operand" "w")]
2995 "TARGET_SIMD"
2996 {
2997 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2998 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2999 operands[2], p));
3000 DONE;
3001 })
3002
3003 ;; <su><r>h<addsub>.
3004
3005 (define_insn "aarch64_<sur>h<addsub><mode>"
3006 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3007 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3008 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3009 HADDSUB))]
3010 "TARGET_SIMD"
3011 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3012 [(set_attr "type" "neon_<addsub>_halve<q>")]
3013 )
3014
3015 ;; <r><addsub>hn<q>.
3016
3017 (define_insn "aarch64_<sur><addsub>hn<mode>"
3018 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3019 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3020 (match_operand:VQN 2 "register_operand" "w")]
3021 ADDSUBHN))]
3022 "TARGET_SIMD"
3023 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3024 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3025 )
3026
3027 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3028 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3029 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3030 (match_operand:VQN 2 "register_operand" "w")
3031 (match_operand:VQN 3 "register_operand" "w")]
3032 ADDSUBHN2))]
3033 "TARGET_SIMD"
3034 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3035 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3036 )
3037
3038 ;; pmul.
3039
3040 (define_insn "aarch64_pmul<mode>"
3041 [(set (match_operand:VB 0 "register_operand" "=w")
3042 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3043 (match_operand:VB 2 "register_operand" "w")]
3044 UNSPEC_PMUL))]
3045 "TARGET_SIMD"
3046 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3047 [(set_attr "type" "neon_mul_<Vetype><q>")]
3048 )
3049
3050 ;; fmulx.
3051
3052 (define_insn "aarch64_fmulx<mode>"
3053 [(set (match_operand:VALLF 0 "register_operand" "=w")
3054 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
3055 (match_operand:VALLF 2 "register_operand" "w")]
3056 UNSPEC_FMULX))]
3057 "TARGET_SIMD"
3058 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3059 [(set_attr "type" "neon_fp_mul_<Vetype>")]
3060 )
3061
3062 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3063
3064 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3065 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3066 (unspec:VDQSF
3067 [(match_operand:VDQSF 1 "register_operand" "w")
3068 (vec_duplicate:VDQSF
3069 (vec_select:<VEL>
3070 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3071 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3072 UNSPEC_FMULX))]
3073 "TARGET_SIMD"
3074 {
3075 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3076 INTVAL (operands[3])));
3077 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3078 }
3079 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3080 )
3081
3082 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3083
3084 (define_insn "*aarch64_mulx_elt<mode>"
3085 [(set (match_operand:VDQF 0 "register_operand" "=w")
3086 (unspec:VDQF
3087 [(match_operand:VDQF 1 "register_operand" "w")
3088 (vec_duplicate:VDQF
3089 (vec_select:<VEL>
3090 (match_operand:VDQF 2 "register_operand" "w")
3091 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3092 UNSPEC_FMULX))]
3093 "TARGET_SIMD"
3094 {
3095 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3096 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3097 }
3098 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3099 )
3100
3101 ;; vmulxq_lane_f64
3102
3103 (define_insn "*aarch64_mulx_elt_to_64v2df"
3104 [(set (match_operand:V2DF 0 "register_operand" "=w")
3105 (unspec:V2DF
3106 [(match_operand:V2DF 1 "register_operand" "w")
3107 (vec_duplicate:V2DF
3108 (match_operand:DF 2 "register_operand" "w"))]
3109 UNSPEC_FMULX))]
3110 "TARGET_SIMD"
3111 {
3112 return "fmulx\t%0.2d, %1.2d, %2.d[0]";
3113 }
3114 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
3115 )
3116
3117 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3118 ;; vmulxd_lane_f64 == vmulx_lane_f64
3119 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3120
3121 (define_insn "*aarch64_vgetfmulx<mode>"
3122 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3123 (unspec:<VEL>
3124 [(match_operand:<VEL> 1 "register_operand" "w")
3125 (vec_select:<VEL>
3126 (match_operand:VDQF_DF 2 "register_operand" "w")
3127 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3128 UNSPEC_FMULX))]
3129 "TARGET_SIMD"
3130 {
3131 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3132 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3133 }
3134 [(set_attr "type" "fmul<Vetype>")]
3135 )
3136 ;; <su>q<addsub>
3137
3138 (define_insn "aarch64_<su_optab><optab><mode>"
3139 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3140 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3141 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3142 "TARGET_SIMD"
3143 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3144 [(set_attr "type" "neon_<optab><q>")]
3145 )
3146
3147 ;; suqadd and usqadd
3148
3149 (define_insn "aarch64_<sur>qadd<mode>"
3150 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3151 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3152 (match_operand:VSDQ_I 2 "register_operand" "w")]
3153 USSUQADD))]
3154 "TARGET_SIMD"
3155 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3156 [(set_attr "type" "neon_qadd<q>")]
3157 )
3158
3159 ;; sqmovun
3160
3161 (define_insn "aarch64_sqmovun<mode>"
3162 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3163 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3164 UNSPEC_SQXTUN))]
3165 "TARGET_SIMD"
3166 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3167 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3168 )
3169
3170 ;; sqmovn and uqmovn
3171
3172 (define_insn "aarch64_<sur>qmovn<mode>"
3173 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3174 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3175 SUQMOVN))]
3176 "TARGET_SIMD"
3177 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3178 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3179 )
3180
3181 ;; <su>q<absneg>
3182
3183 (define_insn "aarch64_s<optab><mode>"
3184 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3185 (UNQOPS:VSDQ_I
3186 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3187 "TARGET_SIMD"
3188 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3189 [(set_attr "type" "neon_<optab><q>")]
3190 )
3191
3192 ;; sq<r>dmulh.
3193
3194 (define_insn "aarch64_sq<r>dmulh<mode>"
3195 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3196 (unspec:VSDQ_HSI
3197 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3198 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3199 VQDMULH))]
3200 "TARGET_SIMD"
3201 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3202 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3203 )
3204
3205 ;; sq<r>dmulh_lane
3206
3207 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3208 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3209 (unspec:VDQHS
3210 [(match_operand:VDQHS 1 "register_operand" "w")
3211 (vec_select:<VEL>
3212 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3213 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3214 VQDMULH))]
3215 "TARGET_SIMD"
3216 "*
3217 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3218 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3219 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3220 )
3221
3222 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3223 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3224 (unspec:VDQHS
3225 [(match_operand:VDQHS 1 "register_operand" "w")
3226 (vec_select:<VEL>
3227 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3228 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3229 VQDMULH))]
3230 "TARGET_SIMD"
3231 "*
3232 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3233 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3234 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3235 )
3236
3237 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3238 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3239 (unspec:SD_HSI
3240 [(match_operand:SD_HSI 1 "register_operand" "w")
3241 (vec_select:<VEL>
3242 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3243 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3244 VQDMULH))]
3245 "TARGET_SIMD"
3246 "*
3247 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3248 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3249 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3250 )
3251
3252 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3253 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3254 (unspec:SD_HSI
3255 [(match_operand:SD_HSI 1 "register_operand" "w")
3256 (vec_select:<VEL>
3257 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3258 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3259 VQDMULH))]
3260 "TARGET_SIMD"
3261 "*
3262 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3263 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3264 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3265 )
3266
3267 ;; sqrdml[as]h.
3268
3269 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3270 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3271 (unspec:VSDQ_HSI
3272 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3273 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3274 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3275 SQRDMLH_AS))]
3276 "TARGET_SIMD_RDMA"
3277 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3278 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3279 )
3280
3281 ;; sqrdml[as]h_lane.
3282
3283 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3284 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3285 (unspec:VDQHS
3286 [(match_operand:VDQHS 1 "register_operand" "0")
3287 (match_operand:VDQHS 2 "register_operand" "w")
3288 (vec_select:<VEL>
3289 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3290 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3291 SQRDMLH_AS))]
3292 "TARGET_SIMD_RDMA"
3293 {
3294 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3295 return
3296 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3297 }
3298 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3299 )
3300
3301 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3302 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3303 (unspec:SD_HSI
3304 [(match_operand:SD_HSI 1 "register_operand" "0")
3305 (match_operand:SD_HSI 2 "register_operand" "w")
3306 (vec_select:<VEL>
3307 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3308 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3309 SQRDMLH_AS))]
3310 "TARGET_SIMD_RDMA"
3311 {
3312 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3313 return
3314 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3315 }
3316 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3317 )
3318
3319 ;; sqrdml[as]h_laneq.
3320
3321 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3322 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3323 (unspec:VDQHS
3324 [(match_operand:VDQHS 1 "register_operand" "0")
3325 (match_operand:VDQHS 2 "register_operand" "w")
3326 (vec_select:<VEL>
3327 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3328 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3329 SQRDMLH_AS))]
3330 "TARGET_SIMD_RDMA"
3331 {
3332 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3333 return
3334 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3335 }
3336 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3337 )
3338
3339 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3340 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3341 (unspec:SD_HSI
3342 [(match_operand:SD_HSI 1 "register_operand" "0")
3343 (match_operand:SD_HSI 2 "register_operand" "w")
3344 (vec_select:<VEL>
3345 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3346 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3347 SQRDMLH_AS))]
3348 "TARGET_SIMD_RDMA"
3349 {
3350 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3351 return
3352 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3353 }
3354 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3355 )
3356
3357 ;; vqdml[sa]l
3358
3359 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3360 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3361 (SBINQOPS:<VWIDE>
3362 (match_operand:<VWIDE> 1 "register_operand" "0")
3363 (ss_ashift:<VWIDE>
3364 (mult:<VWIDE>
3365 (sign_extend:<VWIDE>
3366 (match_operand:VSD_HSI 2 "register_operand" "w"))
3367 (sign_extend:<VWIDE>
3368 (match_operand:VSD_HSI 3 "register_operand" "w")))
3369 (const_int 1))))]
3370 "TARGET_SIMD"
3371 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3372 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3373 )
3374
3375 ;; vqdml[sa]l_lane
3376
3377 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3378 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3379 (SBINQOPS:<VWIDE>
3380 (match_operand:<VWIDE> 1 "register_operand" "0")
3381 (ss_ashift:<VWIDE>
3382 (mult:<VWIDE>
3383 (sign_extend:<VWIDE>
3384 (match_operand:VD_HSI 2 "register_operand" "w"))
3385 (sign_extend:<VWIDE>
3386 (vec_duplicate:VD_HSI
3387 (vec_select:<VEL>
3388 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3389 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3390 ))
3391 (const_int 1))))]
3392 "TARGET_SIMD"
3393 {
3394 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3395 return
3396 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3397 }
3398 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3399 )
3400
3401 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3402 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3403 (SBINQOPS:<VWIDE>
3404 (match_operand:<VWIDE> 1 "register_operand" "0")
3405 (ss_ashift:<VWIDE>
3406 (mult:<VWIDE>
3407 (sign_extend:<VWIDE>
3408 (match_operand:VD_HSI 2 "register_operand" "w"))
3409 (sign_extend:<VWIDE>
3410 (vec_duplicate:VD_HSI
3411 (vec_select:<VEL>
3412 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3413 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3414 ))
3415 (const_int 1))))]
3416 "TARGET_SIMD"
3417 {
3418 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3419 return
3420 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3421 }
3422 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3423 )
3424
3425 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3427 (SBINQOPS:<VWIDE>
3428 (match_operand:<VWIDE> 1 "register_operand" "0")
3429 (ss_ashift:<VWIDE>
3430 (mult:<VWIDE>
3431 (sign_extend:<VWIDE>
3432 (match_operand:SD_HSI 2 "register_operand" "w"))
3433 (sign_extend:<VWIDE>
3434 (vec_select:<VEL>
3435 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3436 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3437 )
3438 (const_int 1))))]
3439 "TARGET_SIMD"
3440 {
3441 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3442 return
3443 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3444 }
3445 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3446 )
3447
3448 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3449 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3450 (SBINQOPS:<VWIDE>
3451 (match_operand:<VWIDE> 1 "register_operand" "0")
3452 (ss_ashift:<VWIDE>
3453 (mult:<VWIDE>
3454 (sign_extend:<VWIDE>
3455 (match_operand:SD_HSI 2 "register_operand" "w"))
3456 (sign_extend:<VWIDE>
3457 (vec_select:<VEL>
3458 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3459 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3460 )
3461 (const_int 1))))]
3462 "TARGET_SIMD"
3463 {
3464 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3465 return
3466 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3467 }
3468 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3469 )
3470
3471 ;; vqdml[sa]l_n
3472
3473 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3474 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3475 (SBINQOPS:<VWIDE>
3476 (match_operand:<VWIDE> 1 "register_operand" "0")
3477 (ss_ashift:<VWIDE>
3478 (mult:<VWIDE>
3479 (sign_extend:<VWIDE>
3480 (match_operand:VD_HSI 2 "register_operand" "w"))
3481 (sign_extend:<VWIDE>
3482 (vec_duplicate:VD_HSI
3483 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3484 (const_int 1))))]
3485 "TARGET_SIMD"
3486 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3487 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3488 )
3489
3490 ;; sqdml[as]l2
3491
3492 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3493 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3494 (SBINQOPS:<VWIDE>
3495 (match_operand:<VWIDE> 1 "register_operand" "0")
3496 (ss_ashift:<VWIDE>
3497 (mult:<VWIDE>
3498 (sign_extend:<VWIDE>
3499 (vec_select:<VHALF>
3500 (match_operand:VQ_HSI 2 "register_operand" "w")
3501 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3502 (sign_extend:<VWIDE>
3503 (vec_select:<VHALF>
3504 (match_operand:VQ_HSI 3 "register_operand" "w")
3505 (match_dup 4))))
3506 (const_int 1))))]
3507 "TARGET_SIMD"
3508 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3509 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3510 )
3511
3512 (define_expand "aarch64_sqdmlal2<mode>"
3513 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3514 (match_operand:<VWIDE> 1 "register_operand" "w")
3515 (match_operand:VQ_HSI 2 "register_operand" "w")
3516 (match_operand:VQ_HSI 3 "register_operand" "w")]
3517 "TARGET_SIMD"
3518 {
3519 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3520 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3521 operands[2], operands[3], p));
3522 DONE;
3523 })
3524
3525 (define_expand "aarch64_sqdmlsl2<mode>"
3526 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3527 (match_operand:<VWIDE> 1 "register_operand" "w")
3528 (match_operand:VQ_HSI 2 "register_operand" "w")
3529 (match_operand:VQ_HSI 3 "register_operand" "w")]
3530 "TARGET_SIMD"
3531 {
3532 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3533 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3534 operands[2], operands[3], p));
3535 DONE;
3536 })
3537
3538 ;; vqdml[sa]l2_lane
3539
3540 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3541 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3542 (SBINQOPS:<VWIDE>
3543 (match_operand:<VWIDE> 1 "register_operand" "0")
3544 (ss_ashift:<VWIDE>
3545 (mult:<VWIDE>
3546 (sign_extend:<VWIDE>
3547 (vec_select:<VHALF>
3548 (match_operand:VQ_HSI 2 "register_operand" "w")
3549 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3550 (sign_extend:<VWIDE>
3551 (vec_duplicate:<VHALF>
3552 (vec_select:<VEL>
3553 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3554 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3555 ))))
3556 (const_int 1))))]
3557 "TARGET_SIMD"
3558 {
3559 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3560 return
3561 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3562 }
3563 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3564 )
3565
3566 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3567 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3568 (SBINQOPS:<VWIDE>
3569 (match_operand:<VWIDE> 1 "register_operand" "0")
3570 (ss_ashift:<VWIDE>
3571 (mult:<VWIDE>
3572 (sign_extend:<VWIDE>
3573 (vec_select:<VHALF>
3574 (match_operand:VQ_HSI 2 "register_operand" "w")
3575 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3576 (sign_extend:<VWIDE>
3577 (vec_duplicate:<VHALF>
3578 (vec_select:<VEL>
3579 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3580 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3581 ))))
3582 (const_int 1))))]
3583 "TARGET_SIMD"
3584 {
3585 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3586 return
3587 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3588 }
3589 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3590 )
3591
3592 (define_expand "aarch64_sqdmlal2_lane<mode>"
3593 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3594 (match_operand:<VWIDE> 1 "register_operand" "w")
3595 (match_operand:VQ_HSI 2 "register_operand" "w")
3596 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3597 (match_operand:SI 4 "immediate_operand" "i")]
3598 "TARGET_SIMD"
3599 {
3600 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3601 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3602 operands[2], operands[3],
3603 operands[4], p));
3604 DONE;
3605 })
3606
3607 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3608 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3609 (match_operand:<VWIDE> 1 "register_operand" "w")
3610 (match_operand:VQ_HSI 2 "register_operand" "w")
3611 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3612 (match_operand:SI 4 "immediate_operand" "i")]
3613 "TARGET_SIMD"
3614 {
3615 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3616 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3617 operands[2], operands[3],
3618 operands[4], p));
3619 DONE;
3620 })
3621
3622 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3623 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3624 (match_operand:<VWIDE> 1 "register_operand" "w")
3625 (match_operand:VQ_HSI 2 "register_operand" "w")
3626 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3627 (match_operand:SI 4 "immediate_operand" "i")]
3628 "TARGET_SIMD"
3629 {
3630 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3631 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3632 operands[2], operands[3],
3633 operands[4], p));
3634 DONE;
3635 })
3636
3637 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3638 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3639 (match_operand:<VWIDE> 1 "register_operand" "w")
3640 (match_operand:VQ_HSI 2 "register_operand" "w")
3641 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3642 (match_operand:SI 4 "immediate_operand" "i")]
3643 "TARGET_SIMD"
3644 {
3645 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3646 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3647 operands[2], operands[3],
3648 operands[4], p));
3649 DONE;
3650 })
3651
3652 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3654 (SBINQOPS:<VWIDE>
3655 (match_operand:<VWIDE> 1 "register_operand" "0")
3656 (ss_ashift:<VWIDE>
3657 (mult:<VWIDE>
3658 (sign_extend:<VWIDE>
3659 (vec_select:<VHALF>
3660 (match_operand:VQ_HSI 2 "register_operand" "w")
3661 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3662 (sign_extend:<VWIDE>
3663 (vec_duplicate:<VHALF>
3664 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3665 (const_int 1))))]
3666 "TARGET_SIMD"
3667 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3668 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3669 )
3670
3671 (define_expand "aarch64_sqdmlal2_n<mode>"
3672 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3673 (match_operand:<VWIDE> 1 "register_operand" "w")
3674 (match_operand:VQ_HSI 2 "register_operand" "w")
3675 (match_operand:<VEL> 3 "register_operand" "w")]
3676 "TARGET_SIMD"
3677 {
3678 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3679 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3680 operands[2], operands[3],
3681 p));
3682 DONE;
3683 })
3684
3685 (define_expand "aarch64_sqdmlsl2_n<mode>"
3686 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3687 (match_operand:<VWIDE> 1 "register_operand" "w")
3688 (match_operand:VQ_HSI 2 "register_operand" "w")
3689 (match_operand:<VEL> 3 "register_operand" "w")]
3690 "TARGET_SIMD"
3691 {
3692 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3693 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3694 operands[2], operands[3],
3695 p));
3696 DONE;
3697 })
3698
3699 ;; vqdmull
3700
3701 (define_insn "aarch64_sqdmull<mode>"
3702 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3703 (ss_ashift:<VWIDE>
3704 (mult:<VWIDE>
3705 (sign_extend:<VWIDE>
3706 (match_operand:VSD_HSI 1 "register_operand" "w"))
3707 (sign_extend:<VWIDE>
3708 (match_operand:VSD_HSI 2 "register_operand" "w")))
3709 (const_int 1)))]
3710 "TARGET_SIMD"
3711 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3712 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3713 )
3714
3715 ;; vqdmull_lane
3716
3717 (define_insn "aarch64_sqdmull_lane<mode>"
3718 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3719 (ss_ashift:<VWIDE>
3720 (mult:<VWIDE>
3721 (sign_extend:<VWIDE>
3722 (match_operand:VD_HSI 1 "register_operand" "w"))
3723 (sign_extend:<VWIDE>
3724 (vec_duplicate:VD_HSI
3725 (vec_select:<VEL>
3726 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3727 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3728 ))
3729 (const_int 1)))]
3730 "TARGET_SIMD"
3731 {
3732 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3733 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3734 }
3735 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3736 )
3737
3738 (define_insn "aarch64_sqdmull_laneq<mode>"
3739 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3740 (ss_ashift:<VWIDE>
3741 (mult:<VWIDE>
3742 (sign_extend:<VWIDE>
3743 (match_operand:VD_HSI 1 "register_operand" "w"))
3744 (sign_extend:<VWIDE>
3745 (vec_duplicate:VD_HSI
3746 (vec_select:<VEL>
3747 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3748 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3749 ))
3750 (const_int 1)))]
3751 "TARGET_SIMD"
3752 {
3753 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3754 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3755 }
3756 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3757 )
3758
3759 (define_insn "aarch64_sqdmull_lane<mode>"
3760 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3761 (ss_ashift:<VWIDE>
3762 (mult:<VWIDE>
3763 (sign_extend:<VWIDE>
3764 (match_operand:SD_HSI 1 "register_operand" "w"))
3765 (sign_extend:<VWIDE>
3766 (vec_select:<VEL>
3767 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3768 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3769 ))
3770 (const_int 1)))]
3771 "TARGET_SIMD"
3772 {
3773 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3774 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3775 }
3776 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3777 )
3778
3779 (define_insn "aarch64_sqdmull_laneq<mode>"
3780 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3781 (ss_ashift:<VWIDE>
3782 (mult:<VWIDE>
3783 (sign_extend:<VWIDE>
3784 (match_operand:SD_HSI 1 "register_operand" "w"))
3785 (sign_extend:<VWIDE>
3786 (vec_select:<VEL>
3787 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3788 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3789 ))
3790 (const_int 1)))]
3791 "TARGET_SIMD"
3792 {
3793 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3794 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3795 }
3796 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3797 )
3798
3799 ;; vqdmull_n
3800
3801 (define_insn "aarch64_sqdmull_n<mode>"
3802 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3803 (ss_ashift:<VWIDE>
3804 (mult:<VWIDE>
3805 (sign_extend:<VWIDE>
3806 (match_operand:VD_HSI 1 "register_operand" "w"))
3807 (sign_extend:<VWIDE>
3808 (vec_duplicate:VD_HSI
3809 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3810 )
3811 (const_int 1)))]
3812 "TARGET_SIMD"
3813 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3814 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3815 )
3816
3817 ;; vqdmull2
3818
3819
3820
3821 (define_insn "aarch64_sqdmull2<mode>_internal"
3822 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3823 (ss_ashift:<VWIDE>
3824 (mult:<VWIDE>
3825 (sign_extend:<VWIDE>
3826 (vec_select:<VHALF>
3827 (match_operand:VQ_HSI 1 "register_operand" "w")
3828 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3829 (sign_extend:<VWIDE>
3830 (vec_select:<VHALF>
3831 (match_operand:VQ_HSI 2 "register_operand" "w")
3832 (match_dup 3)))
3833 )
3834 (const_int 1)))]
3835 "TARGET_SIMD"
3836 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3837 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3838 )
3839
3840 (define_expand "aarch64_sqdmull2<mode>"
3841 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3842 (match_operand:VQ_HSI 1 "register_operand" "w")
3843 (match_operand:VQ_HSI 2 "register_operand" "w")]
3844 "TARGET_SIMD"
3845 {
3846 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3847 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3848 operands[2], p));
3849 DONE;
3850 })
3851
3852 ;; vqdmull2_lane
3853
3854 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3855 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3856 (ss_ashift:<VWIDE>
3857 (mult:<VWIDE>
3858 (sign_extend:<VWIDE>
3859 (vec_select:<VHALF>
3860 (match_operand:VQ_HSI 1 "register_operand" "w")
3861 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3862 (sign_extend:<VWIDE>
3863 (vec_duplicate:<VHALF>
3864 (vec_select:<VEL>
3865 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3866 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3867 ))
3868 (const_int 1)))]
3869 "TARGET_SIMD"
3870 {
3871 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3872 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3873 }
3874 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3875 )
3876
3877 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3878 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3879 (ss_ashift:<VWIDE>
3880 (mult:<VWIDE>
3881 (sign_extend:<VWIDE>
3882 (vec_select:<VHALF>
3883 (match_operand:VQ_HSI 1 "register_operand" "w")
3884 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3885 (sign_extend:<VWIDE>
3886 (vec_duplicate:<VHALF>
3887 (vec_select:<VEL>
3888 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3889 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3890 ))
3891 (const_int 1)))]
3892 "TARGET_SIMD"
3893 {
3894 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3895 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3896 }
3897 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3898 )
3899
3900 (define_expand "aarch64_sqdmull2_lane<mode>"
3901 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3902 (match_operand:VQ_HSI 1 "register_operand" "w")
3903 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3904 (match_operand:SI 3 "immediate_operand" "i")]
3905 "TARGET_SIMD"
3906 {
3907 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3908 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3909 operands[2], operands[3],
3910 p));
3911 DONE;
3912 })
3913
3914 (define_expand "aarch64_sqdmull2_laneq<mode>"
3915 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3916 (match_operand:VQ_HSI 1 "register_operand" "w")
3917 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3918 (match_operand:SI 3 "immediate_operand" "i")]
3919 "TARGET_SIMD"
3920 {
3921 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3922 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3923 operands[2], operands[3],
3924 p));
3925 DONE;
3926 })
3927
3928 ;; vqdmull2_n
3929
3930 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3931 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3932 (ss_ashift:<VWIDE>
3933 (mult:<VWIDE>
3934 (sign_extend:<VWIDE>
3935 (vec_select:<VHALF>
3936 (match_operand:VQ_HSI 1 "register_operand" "w")
3937 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3938 (sign_extend:<VWIDE>
3939 (vec_duplicate:<VHALF>
3940 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3941 )
3942 (const_int 1)))]
3943 "TARGET_SIMD"
3944 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3945 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3946 )
3947
3948 (define_expand "aarch64_sqdmull2_n<mode>"
3949 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3950 (match_operand:VQ_HSI 1 "register_operand" "w")
3951 (match_operand:<VEL> 2 "register_operand" "w")]
3952 "TARGET_SIMD"
3953 {
3954 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3955 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3956 operands[2], p));
3957 DONE;
3958 })
3959
3960 ;; vshl
3961
3962 (define_insn "aarch64_<sur>shl<mode>"
3963 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3964 (unspec:VSDQ_I_DI
3965 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3966 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3967 VSHL))]
3968 "TARGET_SIMD"
3969 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3970 [(set_attr "type" "neon_shift_reg<q>")]
3971 )
3972
3973
3974 ;; vqshl
3975
3976 (define_insn "aarch64_<sur>q<r>shl<mode>"
3977 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3978 (unspec:VSDQ_I
3979 [(match_operand:VSDQ_I 1 "register_operand" "w")
3980 (match_operand:VSDQ_I 2 "register_operand" "w")]
3981 VQSHL))]
3982 "TARGET_SIMD"
3983 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3984 [(set_attr "type" "neon_sat_shift_reg<q>")]
3985 )
3986
3987 ;; vshll_n
3988
3989 (define_insn "aarch64_<sur>shll_n<mode>"
3990 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3991 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3992 (match_operand:SI 2
3993 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3994 VSHLL))]
3995 "TARGET_SIMD"
3996 {
3997 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
3998 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
3999 else
4000 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4001 }
4002 [(set_attr "type" "neon_shift_imm_long")]
4003 )
4004
4005 ;; vshll_high_n
4006
4007 (define_insn "aarch64_<sur>shll2_n<mode>"
4008 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4010 (match_operand:SI 2 "immediate_operand" "i")]
4011 VSHLL))]
4012 "TARGET_SIMD"
4013 {
4014 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4015 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4016 else
4017 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4018 }
4019 [(set_attr "type" "neon_shift_imm_long")]
4020 )
4021
4022 ;; vrshr_n
4023
4024 (define_insn "aarch64_<sur>shr_n<mode>"
4025 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4026 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4027 (match_operand:SI 2
4028 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4029 VRSHR_N))]
4030 "TARGET_SIMD"
4031 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4032 [(set_attr "type" "neon_sat_shift_imm<q>")]
4033 )
4034
4035 ;; v(r)sra_n
4036
4037 (define_insn "aarch64_<sur>sra_n<mode>"
4038 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4039 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4040 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4041 (match_operand:SI 3
4042 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4043 VSRA))]
4044 "TARGET_SIMD"
4045 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4046 [(set_attr "type" "neon_shift_acc<q>")]
4047 )
4048
4049 ;; vs<lr>i_n
4050
4051 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4052 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4053 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4054 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4055 (match_operand:SI 3
4056 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4057 VSLRI))]
4058 "TARGET_SIMD"
4059 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4060 [(set_attr "type" "neon_shift_imm<q>")]
4061 )
4062
4063 ;; vqshl(u)
4064
4065 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4066 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4067 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4068 (match_operand:SI 2
4069 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4070 VQSHL_N))]
4071 "TARGET_SIMD"
4072 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4073 [(set_attr "type" "neon_sat_shift_imm<q>")]
4074 )
4075
4076
4077 ;; vq(r)shr(u)n_n
4078
4079 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4080 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4081 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4082 (match_operand:SI 2
4083 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4084 VQSHRN_N))]
4085 "TARGET_SIMD"
4086 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4087 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4088 )
4089
4090
4091 ;; cm(eq|ge|gt|lt|le)
4092 ;; Note, we have constraints for Dz and Z as different expanders
4093 ;; have different ideas of what should be passed to this pattern.
4094
4095 (define_insn "aarch64_cm<optab><mode>"
4096 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4097 (neg:<V_cmp_result>
4098 (COMPARISONS:<V_cmp_result>
4099 (match_operand:VDQ_I 1 "register_operand" "w,w")
4100 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4101 )))]
4102 "TARGET_SIMD"
4103 "@
4104 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4105 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4106 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4107 )
4108
4109 (define_insn_and_split "aarch64_cm<optab>di"
4110 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4111 (neg:DI
4112 (COMPARISONS:DI
4113 (match_operand:DI 1 "register_operand" "w,w,r")
4114 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4115 )))
4116 (clobber (reg:CC CC_REGNUM))]
4117 "TARGET_SIMD"
4118 "#"
4119 "reload_completed"
4120 [(set (match_operand:DI 0 "register_operand")
4121 (neg:DI
4122 (COMPARISONS:DI
4123 (match_operand:DI 1 "register_operand")
4124 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4125 )))]
4126 {
4127 /* If we are in the general purpose register file,
4128 we split to a sequence of comparison and store. */
4129 if (GP_REGNUM_P (REGNO (operands[0]))
4130 && GP_REGNUM_P (REGNO (operands[1])))
4131 {
4132 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4133 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4134 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4135 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4136 DONE;
4137 }
4138 /* Otherwise, we expand to a similar pattern which does not
4139 clobber CC_REGNUM. */
4140 }
4141 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4142 )
4143
4144 (define_insn "*aarch64_cm<optab>di"
4145 [(set (match_operand:DI 0 "register_operand" "=w,w")
4146 (neg:DI
4147 (COMPARISONS:DI
4148 (match_operand:DI 1 "register_operand" "w,w")
4149 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4150 )))]
4151 "TARGET_SIMD && reload_completed"
4152 "@
4153 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4154 cm<optab>\t%d0, %d1, #0"
4155 [(set_attr "type" "neon_compare, neon_compare_zero")]
4156 )
4157
4158 ;; cm(hs|hi)
4159
4160 (define_insn "aarch64_cm<optab><mode>"
4161 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4162 (neg:<V_cmp_result>
4163 (UCOMPARISONS:<V_cmp_result>
4164 (match_operand:VDQ_I 1 "register_operand" "w")
4165 (match_operand:VDQ_I 2 "register_operand" "w")
4166 )))]
4167 "TARGET_SIMD"
4168 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4169 [(set_attr "type" "neon_compare<q>")]
4170 )
4171
4172 (define_insn_and_split "aarch64_cm<optab>di"
4173 [(set (match_operand:DI 0 "register_operand" "=w,r")
4174 (neg:DI
4175 (UCOMPARISONS:DI
4176 (match_operand:DI 1 "register_operand" "w,r")
4177 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4178 )))
4179 (clobber (reg:CC CC_REGNUM))]
4180 "TARGET_SIMD"
4181 "#"
4182 "reload_completed"
4183 [(set (match_operand:DI 0 "register_operand")
4184 (neg:DI
4185 (UCOMPARISONS:DI
4186 (match_operand:DI 1 "register_operand")
4187 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4188 )))]
4189 {
4190 /* If we are in the general purpose register file,
4191 we split to a sequence of comparison and store. */
4192 if (GP_REGNUM_P (REGNO (operands[0]))
4193 && GP_REGNUM_P (REGNO (operands[1])))
4194 {
4195 machine_mode mode = CCmode;
4196 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4197 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4198 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4199 DONE;
4200 }
4201 /* Otherwise, we expand to a similar pattern which does not
4202 clobber CC_REGNUM. */
4203 }
4204 [(set_attr "type" "neon_compare,multiple")]
4205 )
4206
4207 (define_insn "*aarch64_cm<optab>di"
4208 [(set (match_operand:DI 0 "register_operand" "=w")
4209 (neg:DI
4210 (UCOMPARISONS:DI
4211 (match_operand:DI 1 "register_operand" "w")
4212 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4213 )))]
4214 "TARGET_SIMD && reload_completed"
4215 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4216 [(set_attr "type" "neon_compare")]
4217 )
4218
4219 ;; cmtst
4220
4221 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4222 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
4223 ;; not (neg (eq (and x y) 0))
4224 ;; which is rewritten by simplify_rtx as
4225 ;; plus (eq (and x y) 0) -1.
4226
4227 (define_insn "aarch64_cmtst<mode>"
4228 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4229 (plus:<V_cmp_result>
4230 (eq:<V_cmp_result>
4231 (and:VDQ_I
4232 (match_operand:VDQ_I 1 "register_operand" "w")
4233 (match_operand:VDQ_I 2 "register_operand" "w"))
4234 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4235 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4236 ]
4237 "TARGET_SIMD"
4238 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4239 [(set_attr "type" "neon_tst<q>")]
4240 )
4241
4242 (define_insn_and_split "aarch64_cmtstdi"
4243 [(set (match_operand:DI 0 "register_operand" "=w,r")
4244 (neg:DI
4245 (ne:DI
4246 (and:DI
4247 (match_operand:DI 1 "register_operand" "w,r")
4248 (match_operand:DI 2 "register_operand" "w,r"))
4249 (const_int 0))))
4250 (clobber (reg:CC CC_REGNUM))]
4251 "TARGET_SIMD"
4252 "#"
4253 "reload_completed"
4254 [(set (match_operand:DI 0 "register_operand")
4255 (neg:DI
4256 (ne:DI
4257 (and:DI
4258 (match_operand:DI 1 "register_operand")
4259 (match_operand:DI 2 "register_operand"))
4260 (const_int 0))))]
4261 {
4262 /* If we are in the general purpose register file,
4263 we split to a sequence of comparison and store. */
4264 if (GP_REGNUM_P (REGNO (operands[0]))
4265 && GP_REGNUM_P (REGNO (operands[1])))
4266 {
4267 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4268 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4269 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4270 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4271 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4272 DONE;
4273 }
4274 /* Otherwise, we expand to a similar pattern which does not
4275 clobber CC_REGNUM. */
4276 }
4277 [(set_attr "type" "neon_tst,multiple")]
4278 )
4279
4280 (define_insn "*aarch64_cmtstdi"
4281 [(set (match_operand:DI 0 "register_operand" "=w")
4282 (neg:DI
4283 (ne:DI
4284 (and:DI
4285 (match_operand:DI 1 "register_operand" "w")
4286 (match_operand:DI 2 "register_operand" "w"))
4287 (const_int 0))))]
4288 "TARGET_SIMD"
4289 "cmtst\t%d0, %d1, %d2"
4290 [(set_attr "type" "neon_tst")]
4291 )
4292
4293 ;; fcm(eq|ge|gt|le|lt)
4294
4295 (define_insn "aarch64_cm<optab><mode>"
4296 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4297 (neg:<V_cmp_result>
4298 (COMPARISONS:<V_cmp_result>
4299 (match_operand:VALLF 1 "register_operand" "w,w")
4300 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4301 )))]
4302 "TARGET_SIMD"
4303 "@
4304 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4305 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4306 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4307 )
4308
4309 ;; fac(ge|gt)
4310 ;; Note we can also handle what would be fac(le|lt) by
4311 ;; generating fac(ge|gt).
4312
4313 (define_insn "*aarch64_fac<optab><mode>"
4314 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4315 (neg:<V_cmp_result>
4316 (FAC_COMPARISONS:<V_cmp_result>
4317 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
4318 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
4319 )))]
4320 "TARGET_SIMD"
4321 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4322 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4323 )
4324
4325 ;; addp
4326
4327 (define_insn "aarch64_addp<mode>"
4328 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4329 (unspec:VD_BHSI
4330 [(match_operand:VD_BHSI 1 "register_operand" "w")
4331 (match_operand:VD_BHSI 2 "register_operand" "w")]
4332 UNSPEC_ADDP))]
4333 "TARGET_SIMD"
4334 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4335 [(set_attr "type" "neon_reduc_add<q>")]
4336 )
4337
4338 (define_insn "aarch64_addpdi"
4339 [(set (match_operand:DI 0 "register_operand" "=w")
4340 (unspec:DI
4341 [(match_operand:V2DI 1 "register_operand" "w")]
4342 UNSPEC_ADDP))]
4343 "TARGET_SIMD"
4344 "addp\t%d0, %1.2d"
4345 [(set_attr "type" "neon_reduc_add")]
4346 )
4347
4348 ;; sqrt
4349
4350 (define_expand "sqrt<mode>2"
4351 [(set (match_operand:VDQF 0 "register_operand")
4352 (sqrt:VDQF (match_operand:VDQF 1 "register_operand")))]
4353 "TARGET_SIMD"
4354 {
4355 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4356 DONE;
4357 })
4358
4359 (define_insn "*sqrt<mode>2"
4360 [(set (match_operand:VDQF 0 "register_operand" "=w")
4361 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
4362 "TARGET_SIMD"
4363 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4364 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
4365 )
4366
4367 ;; Patterns for vector struct loads and stores.
4368
4369 (define_insn "aarch64_simd_ld2<mode>"
4370 [(set (match_operand:OI 0 "register_operand" "=w")
4371 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4372 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4373 UNSPEC_LD2))]
4374 "TARGET_SIMD"
4375 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4376 [(set_attr "type" "neon_load2_2reg<q>")]
4377 )
4378
4379 (define_insn "aarch64_simd_ld2r<mode>"
4380 [(set (match_operand:OI 0 "register_operand" "=w")
4381 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4382 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4383 UNSPEC_LD2_DUP))]
4384 "TARGET_SIMD"
4385 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4386 [(set_attr "type" "neon_load2_all_lanes<q>")]
4387 )
4388
4389 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4390 [(set (match_operand:OI 0 "register_operand" "=w")
4391 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4392 (match_operand:OI 2 "register_operand" "0")
4393 (match_operand:SI 3 "immediate_operand" "i")
4394 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4395 UNSPEC_LD2_LANE))]
4396 "TARGET_SIMD"
4397 {
4398 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4399 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4400 }
4401 [(set_attr "type" "neon_load2_one_lane")]
4402 )
4403
4404 (define_expand "vec_load_lanesoi<mode>"
4405 [(set (match_operand:OI 0 "register_operand" "=w")
4406 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4407 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4408 UNSPEC_LD2))]
4409 "TARGET_SIMD"
4410 {
4411 if (BYTES_BIG_ENDIAN)
4412 {
4413 rtx tmp = gen_reg_rtx (OImode);
4414 rtx mask = aarch64_reverse_mask (<MODE>mode);
4415 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4416 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4417 }
4418 else
4419 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4420 DONE;
4421 })
4422
4423 (define_insn "aarch64_simd_st2<mode>"
4424 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4425 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4426 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4427 UNSPEC_ST2))]
4428 "TARGET_SIMD"
4429 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4430 [(set_attr "type" "neon_store2_2reg<q>")]
4431 )
4432
4433 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4434 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4435 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4436 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4437 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4438 (match_operand:SI 2 "immediate_operand" "i")]
4439 UNSPEC_ST2_LANE))]
4440 "TARGET_SIMD"
4441 {
4442 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4443 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4444 }
4445 [(set_attr "type" "neon_store2_one_lane<q>")]
4446 )
4447
4448 (define_expand "vec_store_lanesoi<mode>"
4449 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4450 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4451 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4452 UNSPEC_ST2))]
4453 "TARGET_SIMD"
4454 {
4455 if (BYTES_BIG_ENDIAN)
4456 {
4457 rtx tmp = gen_reg_rtx (OImode);
4458 rtx mask = aarch64_reverse_mask (<MODE>mode);
4459 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4460 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4461 }
4462 else
4463 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4464 DONE;
4465 })
4466
4467 (define_insn "aarch64_simd_ld3<mode>"
4468 [(set (match_operand:CI 0 "register_operand" "=w")
4469 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4470 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4471 UNSPEC_LD3))]
4472 "TARGET_SIMD"
4473 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4474 [(set_attr "type" "neon_load3_3reg<q>")]
4475 )
4476
4477 (define_insn "aarch64_simd_ld3r<mode>"
4478 [(set (match_operand:CI 0 "register_operand" "=w")
4479 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4480 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4481 UNSPEC_LD3_DUP))]
4482 "TARGET_SIMD"
4483 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4484 [(set_attr "type" "neon_load3_all_lanes<q>")]
4485 )
4486
4487 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4488 [(set (match_operand:CI 0 "register_operand" "=w")
4489 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4490 (match_operand:CI 2 "register_operand" "0")
4491 (match_operand:SI 3 "immediate_operand" "i")
4492 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4493 UNSPEC_LD3_LANE))]
4494 "TARGET_SIMD"
4495 {
4496 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4497 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4498 }
4499 [(set_attr "type" "neon_load3_one_lane")]
4500 )
4501
4502 (define_expand "vec_load_lanesci<mode>"
4503 [(set (match_operand:CI 0 "register_operand" "=w")
4504 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4505 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4506 UNSPEC_LD3))]
4507 "TARGET_SIMD"
4508 {
4509 if (BYTES_BIG_ENDIAN)
4510 {
4511 rtx tmp = gen_reg_rtx (CImode);
4512 rtx mask = aarch64_reverse_mask (<MODE>mode);
4513 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4514 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4515 }
4516 else
4517 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4518 DONE;
4519 })
4520
4521 (define_insn "aarch64_simd_st3<mode>"
4522 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4523 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4524 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4525 UNSPEC_ST3))]
4526 "TARGET_SIMD"
4527 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4528 [(set_attr "type" "neon_store3_3reg<q>")]
4529 )
4530
4531 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4532 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4533 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4534 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4535 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4536 (match_operand:SI 2 "immediate_operand" "i")]
4537 UNSPEC_ST3_LANE))]
4538 "TARGET_SIMD"
4539 {
4540 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4541 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4542 }
4543 [(set_attr "type" "neon_store3_one_lane<q>")]
4544 )
4545
4546 (define_expand "vec_store_lanesci<mode>"
4547 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4548 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4549 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4550 UNSPEC_ST3))]
4551 "TARGET_SIMD"
4552 {
4553 if (BYTES_BIG_ENDIAN)
4554 {
4555 rtx tmp = gen_reg_rtx (CImode);
4556 rtx mask = aarch64_reverse_mask (<MODE>mode);
4557 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4558 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4559 }
4560 else
4561 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4562 DONE;
4563 })
4564
4565 (define_insn "aarch64_simd_ld4<mode>"
4566 [(set (match_operand:XI 0 "register_operand" "=w")
4567 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4568 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4569 UNSPEC_LD4))]
4570 "TARGET_SIMD"
4571 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4572 [(set_attr "type" "neon_load4_4reg<q>")]
4573 )
4574
4575 (define_insn "aarch64_simd_ld4r<mode>"
4576 [(set (match_operand:XI 0 "register_operand" "=w")
4577 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4578 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4579 UNSPEC_LD4_DUP))]
4580 "TARGET_SIMD"
4581 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4582 [(set_attr "type" "neon_load4_all_lanes<q>")]
4583 )
4584
4585 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4586 [(set (match_operand:XI 0 "register_operand" "=w")
4587 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4588 (match_operand:XI 2 "register_operand" "0")
4589 (match_operand:SI 3 "immediate_operand" "i")
4590 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4591 UNSPEC_LD4_LANE))]
4592 "TARGET_SIMD"
4593 {
4594 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4595 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4596 }
4597 [(set_attr "type" "neon_load4_one_lane")]
4598 )
4599
4600 (define_expand "vec_load_lanesxi<mode>"
4601 [(set (match_operand:XI 0 "register_operand" "=w")
4602 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4603 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4604 UNSPEC_LD4))]
4605 "TARGET_SIMD"
4606 {
4607 if (BYTES_BIG_ENDIAN)
4608 {
4609 rtx tmp = gen_reg_rtx (XImode);
4610 rtx mask = aarch64_reverse_mask (<MODE>mode);
4611 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4612 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4613 }
4614 else
4615 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4616 DONE;
4617 })
4618
4619 (define_insn "aarch64_simd_st4<mode>"
4620 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4621 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4622 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4623 UNSPEC_ST4))]
4624 "TARGET_SIMD"
4625 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4626 [(set_attr "type" "neon_store4_4reg<q>")]
4627 )
4628
4629 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4630 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4631 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4632 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4633 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4634 (match_operand:SI 2 "immediate_operand" "i")]
4635 UNSPEC_ST4_LANE))]
4636 "TARGET_SIMD"
4637 {
4638 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4639 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4640 }
4641 [(set_attr "type" "neon_store4_one_lane<q>")]
4642 )
4643
4644 (define_expand "vec_store_lanesxi<mode>"
4645 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4646 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4647 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4648 UNSPEC_ST4))]
4649 "TARGET_SIMD"
4650 {
4651 if (BYTES_BIG_ENDIAN)
4652 {
4653 rtx tmp = gen_reg_rtx (XImode);
4654 rtx mask = aarch64_reverse_mask (<MODE>mode);
4655 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4656 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4657 }
4658 else
4659 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4660 DONE;
4661 })
4662
4663 (define_insn_and_split "aarch64_rev_reglist<mode>"
4664 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4665 (unspec:VSTRUCT
4666 [(match_operand:VSTRUCT 1 "register_operand" "w")
4667 (match_operand:V16QI 2 "register_operand" "w")]
4668 UNSPEC_REV_REGLIST))]
4669 "TARGET_SIMD"
4670 "#"
4671 "&& reload_completed"
4672 [(const_int 0)]
4673 {
4674 int i;
4675 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4676 for (i = 0; i < nregs; i++)
4677 {
4678 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4679 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4680 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4681 }
4682 DONE;
4683 }
4684 [(set_attr "type" "neon_tbl1_q")
4685 (set_attr "length" "<insn_count>")]
4686 )
4687
4688 ;; Reload patterns for AdvSIMD register list operands.
4689
4690 (define_expand "mov<mode>"
4691 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4692 (match_operand:VSTRUCT 1 "general_operand" ""))]
4693 "TARGET_SIMD"
4694 {
4695 if (can_create_pseudo_p ())
4696 {
4697 if (GET_CODE (operands[0]) != REG)
4698 operands[1] = force_reg (<MODE>mode, operands[1]);
4699 }
4700 })
4701
4702 (define_insn "*aarch64_mov<mode>"
4703 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4704 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4705 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4706 && (register_operand (operands[0], <MODE>mode)
4707 || register_operand (operands[1], <MODE>mode))"
4708 "@
4709 #
4710 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4711 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4712 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4713 neon_load<nregs>_<nregs>reg_q")
4714 (set_attr "length" "<insn_count>,4,4")]
4715 )
4716
4717 (define_insn "aarch64_be_ld1<mode>"
4718 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4719 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4720 "aarch64_simd_struct_operand" "Utv")]
4721 UNSPEC_LD1))]
4722 "TARGET_SIMD"
4723 "ld1\\t{%0<Vmtype>}, %1"
4724 [(set_attr "type" "neon_load1_1reg<q>")]
4725 )
4726
4727 (define_insn "aarch64_be_st1<mode>"
4728 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4729 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4730 UNSPEC_ST1))]
4731 "TARGET_SIMD"
4732 "st1\\t{%1<Vmtype>}, %0"
4733 [(set_attr "type" "neon_store1_1reg<q>")]
4734 )
4735
4736 (define_insn "*aarch64_be_movoi"
4737 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4738 (match_operand:OI 1 "general_operand" " w,w,m"))]
4739 "TARGET_SIMD && BYTES_BIG_ENDIAN
4740 && (register_operand (operands[0], OImode)
4741 || register_operand (operands[1], OImode))"
4742 "@
4743 #
4744 stp\\t%q1, %R1, %0
4745 ldp\\t%q0, %R0, %1"
4746 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4747 (set_attr "length" "8,4,4")]
4748 )
4749
4750 (define_insn "*aarch64_be_movci"
4751 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4752 (match_operand:CI 1 "general_operand" " w,w,o"))]
4753 "TARGET_SIMD && BYTES_BIG_ENDIAN
4754 && (register_operand (operands[0], CImode)
4755 || register_operand (operands[1], CImode))"
4756 "#"
4757 [(set_attr "type" "multiple")
4758 (set_attr "length" "12,4,4")]
4759 )
4760
4761 (define_insn "*aarch64_be_movxi"
4762 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4763 (match_operand:XI 1 "general_operand" " w,w,o"))]
4764 "TARGET_SIMD && BYTES_BIG_ENDIAN
4765 && (register_operand (operands[0], XImode)
4766 || register_operand (operands[1], XImode))"
4767 "#"
4768 [(set_attr "type" "multiple")
4769 (set_attr "length" "16,4,4")]
4770 )
4771
4772 (define_split
4773 [(set (match_operand:OI 0 "register_operand")
4774 (match_operand:OI 1 "register_operand"))]
4775 "TARGET_SIMD && reload_completed"
4776 [(const_int 0)]
4777 {
4778 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4779 DONE;
4780 })
4781
4782 (define_split
4783 [(set (match_operand:CI 0 "nonimmediate_operand")
4784 (match_operand:CI 1 "general_operand"))]
4785 "TARGET_SIMD && reload_completed"
4786 [(const_int 0)]
4787 {
4788 if (register_operand (operands[0], CImode)
4789 && register_operand (operands[1], CImode))
4790 {
4791 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4792 DONE;
4793 }
4794 else if (BYTES_BIG_ENDIAN)
4795 {
4796 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4797 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4798 emit_move_insn (gen_lowpart (V16QImode,
4799 simplify_gen_subreg (TImode, operands[0],
4800 CImode, 32)),
4801 gen_lowpart (V16QImode,
4802 simplify_gen_subreg (TImode, operands[1],
4803 CImode, 32)));
4804 DONE;
4805 }
4806 else
4807 FAIL;
4808 })
4809
4810 (define_split
4811 [(set (match_operand:XI 0 "nonimmediate_operand")
4812 (match_operand:XI 1 "general_operand"))]
4813 "TARGET_SIMD && reload_completed"
4814 [(const_int 0)]
4815 {
4816 if (register_operand (operands[0], XImode)
4817 && register_operand (operands[1], XImode))
4818 {
4819 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4820 DONE;
4821 }
4822 else if (BYTES_BIG_ENDIAN)
4823 {
4824 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4825 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4826 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4827 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4828 DONE;
4829 }
4830 else
4831 FAIL;
4832 })
4833
4834 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4835 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4836 (match_operand:DI 1 "register_operand" "w")
4837 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4838 "TARGET_SIMD"
4839 {
4840 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4841 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4842 * <VSTRUCT:nregs>);
4843
4844 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4845 mem));
4846 DONE;
4847 })
4848
4849 (define_insn "aarch64_ld2<mode>_dreg"
4850 [(set (match_operand:OI 0 "register_operand" "=w")
4851 (subreg:OI
4852 (vec_concat:<VRL2>
4853 (vec_concat:<VDBL>
4854 (unspec:VD
4855 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4856 UNSPEC_LD2)
4857 (vec_duplicate:VD (const_int 0)))
4858 (vec_concat:<VDBL>
4859 (unspec:VD [(match_dup 1)]
4860 UNSPEC_LD2)
4861 (vec_duplicate:VD (const_int 0)))) 0))]
4862 "TARGET_SIMD"
4863 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4864 [(set_attr "type" "neon_load2_2reg<q>")]
4865 )
4866
4867 (define_insn "aarch64_ld2<mode>_dreg"
4868 [(set (match_operand:OI 0 "register_operand" "=w")
4869 (subreg:OI
4870 (vec_concat:<VRL2>
4871 (vec_concat:<VDBL>
4872 (unspec:DX
4873 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4874 UNSPEC_LD2)
4875 (const_int 0))
4876 (vec_concat:<VDBL>
4877 (unspec:DX [(match_dup 1)]
4878 UNSPEC_LD2)
4879 (const_int 0))) 0))]
4880 "TARGET_SIMD"
4881 "ld1\\t{%S0.1d - %T0.1d}, %1"
4882 [(set_attr "type" "neon_load1_2reg<q>")]
4883 )
4884
4885 (define_insn "aarch64_ld3<mode>_dreg"
4886 [(set (match_operand:CI 0 "register_operand" "=w")
4887 (subreg:CI
4888 (vec_concat:<VRL3>
4889 (vec_concat:<VRL2>
4890 (vec_concat:<VDBL>
4891 (unspec:VD
4892 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4893 UNSPEC_LD3)
4894 (vec_duplicate:VD (const_int 0)))
4895 (vec_concat:<VDBL>
4896 (unspec:VD [(match_dup 1)]
4897 UNSPEC_LD3)
4898 (vec_duplicate:VD (const_int 0))))
4899 (vec_concat:<VDBL>
4900 (unspec:VD [(match_dup 1)]
4901 UNSPEC_LD3)
4902 (vec_duplicate:VD (const_int 0)))) 0))]
4903 "TARGET_SIMD"
4904 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4905 [(set_attr "type" "neon_load3_3reg<q>")]
4906 )
4907
4908 (define_insn "aarch64_ld3<mode>_dreg"
4909 [(set (match_operand:CI 0 "register_operand" "=w")
4910 (subreg:CI
4911 (vec_concat:<VRL3>
4912 (vec_concat:<VRL2>
4913 (vec_concat:<VDBL>
4914 (unspec:DX
4915 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4916 UNSPEC_LD3)
4917 (const_int 0))
4918 (vec_concat:<VDBL>
4919 (unspec:DX [(match_dup 1)]
4920 UNSPEC_LD3)
4921 (const_int 0)))
4922 (vec_concat:<VDBL>
4923 (unspec:DX [(match_dup 1)]
4924 UNSPEC_LD3)
4925 (const_int 0))) 0))]
4926 "TARGET_SIMD"
4927 "ld1\\t{%S0.1d - %U0.1d}, %1"
4928 [(set_attr "type" "neon_load1_3reg<q>")]
4929 )
4930
4931 (define_insn "aarch64_ld4<mode>_dreg"
4932 [(set (match_operand:XI 0 "register_operand" "=w")
4933 (subreg:XI
4934 (vec_concat:<VRL4>
4935 (vec_concat:<VRL2>
4936 (vec_concat:<VDBL>
4937 (unspec:VD
4938 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4939 UNSPEC_LD4)
4940 (vec_duplicate:VD (const_int 0)))
4941 (vec_concat:<VDBL>
4942 (unspec:VD [(match_dup 1)]
4943 UNSPEC_LD4)
4944 (vec_duplicate:VD (const_int 0))))
4945 (vec_concat:<VRL2>
4946 (vec_concat:<VDBL>
4947 (unspec:VD [(match_dup 1)]
4948 UNSPEC_LD4)
4949 (vec_duplicate:VD (const_int 0)))
4950 (vec_concat:<VDBL>
4951 (unspec:VD [(match_dup 1)]
4952 UNSPEC_LD4)
4953 (vec_duplicate:VD (const_int 0))))) 0))]
4954 "TARGET_SIMD"
4955 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4956 [(set_attr "type" "neon_load4_4reg<q>")]
4957 )
4958
4959 (define_insn "aarch64_ld4<mode>_dreg"
4960 [(set (match_operand:XI 0 "register_operand" "=w")
4961 (subreg:XI
4962 (vec_concat:<VRL4>
4963 (vec_concat:<VRL2>
4964 (vec_concat:<VDBL>
4965 (unspec:DX
4966 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4967 UNSPEC_LD4)
4968 (const_int 0))
4969 (vec_concat:<VDBL>
4970 (unspec:DX [(match_dup 1)]
4971 UNSPEC_LD4)
4972 (const_int 0)))
4973 (vec_concat:<VRL2>
4974 (vec_concat:<VDBL>
4975 (unspec:DX [(match_dup 1)]
4976 UNSPEC_LD4)
4977 (const_int 0))
4978 (vec_concat:<VDBL>
4979 (unspec:DX [(match_dup 1)]
4980 UNSPEC_LD4)
4981 (const_int 0)))) 0))]
4982 "TARGET_SIMD"
4983 "ld1\\t{%S0.1d - %V0.1d}, %1"
4984 [(set_attr "type" "neon_load1_4reg<q>")]
4985 )
4986
4987 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4988 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4989 (match_operand:DI 1 "register_operand" "r")
4990 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4991 "TARGET_SIMD"
4992 {
4993 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4994 set_mem_size (mem, <VSTRUCT:nregs> * 8);
4995
4996 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
4997 DONE;
4998 })
4999
5000 (define_expand "aarch64_ld1<VALL_F16:mode>"
5001 [(match_operand:VALL_F16 0 "register_operand")
5002 (match_operand:DI 1 "register_operand")]
5003 "TARGET_SIMD"
5004 {
5005 machine_mode mode = <VALL_F16:MODE>mode;
5006 rtx mem = gen_rtx_MEM (mode, operands[1]);
5007
5008 if (BYTES_BIG_ENDIAN)
5009 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5010 else
5011 emit_move_insn (operands[0], mem);
5012 DONE;
5013 })
5014
5015 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5016 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5017 (match_operand:DI 1 "register_operand" "r")
5018 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5019 "TARGET_SIMD"
5020 {
5021 machine_mode mode = <VSTRUCT:MODE>mode;
5022 rtx mem = gen_rtx_MEM (mode, operands[1]);
5023
5024 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5025 DONE;
5026 })
5027
5028 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5029 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5030 (match_operand:DI 1 "register_operand" "w")
5031 (match_operand:VSTRUCT 2 "register_operand" "0")
5032 (match_operand:SI 3 "immediate_operand" "i")
5033 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5034 "TARGET_SIMD"
5035 {
5036 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5037 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5038 * <VSTRUCT:nregs>);
5039
5040 aarch64_simd_lane_bounds (operands[3], 0,
5041 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5042 NULL);
5043 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5044 operands[0], mem, operands[2], operands[3]));
5045 DONE;
5046 })
5047
5048 ;; Expanders for builtins to extract vector registers from large
5049 ;; opaque integer modes.
5050
5051 ;; D-register list.
5052
5053 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5054 [(match_operand:VDC 0 "register_operand" "=w")
5055 (match_operand:VSTRUCT 1 "register_operand" "w")
5056 (match_operand:SI 2 "immediate_operand" "i")]
5057 "TARGET_SIMD"
5058 {
5059 int part = INTVAL (operands[2]);
5060 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5061 int offset = part * 16;
5062
5063 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5064 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5065 DONE;
5066 })
5067
5068 ;; Q-register list.
5069
5070 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5071 [(match_operand:VQ 0 "register_operand" "=w")
5072 (match_operand:VSTRUCT 1 "register_operand" "w")
5073 (match_operand:SI 2 "immediate_operand" "i")]
5074 "TARGET_SIMD"
5075 {
5076 int part = INTVAL (operands[2]);
5077 int offset = part * 16;
5078
5079 emit_move_insn (operands[0],
5080 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5081 DONE;
5082 })
5083
5084 ;; Permuted-store expanders for neon intrinsics.
5085
5086 ;; Permute instructions
5087
5088 ;; vec_perm support
5089
5090 (define_expand "vec_perm_const<mode>"
5091 [(match_operand:VALL_F16 0 "register_operand")
5092 (match_operand:VALL_F16 1 "register_operand")
5093 (match_operand:VALL_F16 2 "register_operand")
5094 (match_operand:<V_cmp_result> 3)]
5095 "TARGET_SIMD"
5096 {
5097 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5098 operands[2], operands[3]))
5099 DONE;
5100 else
5101 FAIL;
5102 })
5103
5104 (define_expand "vec_perm<mode>"
5105 [(match_operand:VB 0 "register_operand")
5106 (match_operand:VB 1 "register_operand")
5107 (match_operand:VB 2 "register_operand")
5108 (match_operand:VB 3 "register_operand")]
5109 "TARGET_SIMD"
5110 {
5111 aarch64_expand_vec_perm (operands[0], operands[1],
5112 operands[2], operands[3]);
5113 DONE;
5114 })
5115
5116 (define_insn "aarch64_tbl1<mode>"
5117 [(set (match_operand:VB 0 "register_operand" "=w")
5118 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5119 (match_operand:VB 2 "register_operand" "w")]
5120 UNSPEC_TBL))]
5121 "TARGET_SIMD"
5122 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5123 [(set_attr "type" "neon_tbl1<q>")]
5124 )
5125
5126 ;; Two source registers.
5127
5128 (define_insn "aarch64_tbl2v16qi"
5129 [(set (match_operand:V16QI 0 "register_operand" "=w")
5130 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5131 (match_operand:V16QI 2 "register_operand" "w")]
5132 UNSPEC_TBL))]
5133 "TARGET_SIMD"
5134 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5135 [(set_attr "type" "neon_tbl2_q")]
5136 )
5137
5138 (define_insn "aarch64_tbl3<mode>"
5139 [(set (match_operand:VB 0 "register_operand" "=w")
5140 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5141 (match_operand:VB 2 "register_operand" "w")]
5142 UNSPEC_TBL))]
5143 "TARGET_SIMD"
5144 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5145 [(set_attr "type" "neon_tbl3")]
5146 )
5147
5148 (define_insn "aarch64_tbx4<mode>"
5149 [(set (match_operand:VB 0 "register_operand" "=w")
5150 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5151 (match_operand:OI 2 "register_operand" "w")
5152 (match_operand:VB 3 "register_operand" "w")]
5153 UNSPEC_TBX))]
5154 "TARGET_SIMD"
5155 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5156 [(set_attr "type" "neon_tbl4")]
5157 )
5158
5159 ;; Three source registers.
5160
5161 (define_insn "aarch64_qtbl3<mode>"
5162 [(set (match_operand:VB 0 "register_operand" "=w")
5163 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5164 (match_operand:VB 2 "register_operand" "w")]
5165 UNSPEC_TBL))]
5166 "TARGET_SIMD"
5167 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5168 [(set_attr "type" "neon_tbl3")]
5169 )
5170
5171 (define_insn "aarch64_qtbx3<mode>"
5172 [(set (match_operand:VB 0 "register_operand" "=w")
5173 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5174 (match_operand:CI 2 "register_operand" "w")
5175 (match_operand:VB 3 "register_operand" "w")]
5176 UNSPEC_TBX))]
5177 "TARGET_SIMD"
5178 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5179 [(set_attr "type" "neon_tbl3")]
5180 )
5181
5182 ;; Four source registers.
5183
5184 (define_insn "aarch64_qtbl4<mode>"
5185 [(set (match_operand:VB 0 "register_operand" "=w")
5186 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5187 (match_operand:VB 2 "register_operand" "w")]
5188 UNSPEC_TBL))]
5189 "TARGET_SIMD"
5190 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5191 [(set_attr "type" "neon_tbl4")]
5192 )
5193
5194 (define_insn "aarch64_qtbx4<mode>"
5195 [(set (match_operand:VB 0 "register_operand" "=w")
5196 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5197 (match_operand:XI 2 "register_operand" "w")
5198 (match_operand:VB 3 "register_operand" "w")]
5199 UNSPEC_TBX))]
5200 "TARGET_SIMD"
5201 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5202 [(set_attr "type" "neon_tbl4")]
5203 )
5204
5205 (define_insn_and_split "aarch64_combinev16qi"
5206 [(set (match_operand:OI 0 "register_operand" "=w")
5207 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5208 (match_operand:V16QI 2 "register_operand" "w")]
5209 UNSPEC_CONCAT))]
5210 "TARGET_SIMD"
5211 "#"
5212 "&& reload_completed"
5213 [(const_int 0)]
5214 {
5215 aarch64_split_combinev16qi (operands);
5216 DONE;
5217 }
5218 [(set_attr "type" "multiple")]
5219 )
5220
5221 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5222 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5223 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5224 (match_operand:VALL_F16 2 "register_operand" "w")]
5225 PERMUTE))]
5226 "TARGET_SIMD"
5227 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5228 [(set_attr "type" "neon_permute<q>")]
5229 )
5230
5231 ;; Note immediate (third) operand is lane index not byte index.
5232 (define_insn "aarch64_ext<mode>"
5233 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5234 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5235 (match_operand:VALL_F16 2 "register_operand" "w")
5236 (match_operand:SI 3 "immediate_operand" "i")]
5237 UNSPEC_EXT))]
5238 "TARGET_SIMD"
5239 {
5240 operands[3] = GEN_INT (INTVAL (operands[3])
5241 * GET_MODE_UNIT_SIZE (<MODE>mode));
5242 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5243 }
5244 [(set_attr "type" "neon_ext<q>")]
5245 )
5246
5247 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5248 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5249 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5250 REVERSE))]
5251 "TARGET_SIMD"
5252 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5253 [(set_attr "type" "neon_rev<q>")]
5254 )
5255
5256 (define_insn "aarch64_st2<mode>_dreg"
5257 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5258 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5259 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5260 UNSPEC_ST2))]
5261 "TARGET_SIMD"
5262 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5263 [(set_attr "type" "neon_store2_2reg")]
5264 )
5265
5266 (define_insn "aarch64_st2<mode>_dreg"
5267 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5268 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5269 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5270 UNSPEC_ST2))]
5271 "TARGET_SIMD"
5272 "st1\\t{%S1.1d - %T1.1d}, %0"
5273 [(set_attr "type" "neon_store1_2reg")]
5274 )
5275
5276 (define_insn "aarch64_st3<mode>_dreg"
5277 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5278 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5279 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5280 UNSPEC_ST3))]
5281 "TARGET_SIMD"
5282 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5283 [(set_attr "type" "neon_store3_3reg")]
5284 )
5285
5286 (define_insn "aarch64_st3<mode>_dreg"
5287 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5288 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5289 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5290 UNSPEC_ST3))]
5291 "TARGET_SIMD"
5292 "st1\\t{%S1.1d - %U1.1d}, %0"
5293 [(set_attr "type" "neon_store1_3reg")]
5294 )
5295
5296 (define_insn "aarch64_st4<mode>_dreg"
5297 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5298 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5299 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5300 UNSPEC_ST4))]
5301 "TARGET_SIMD"
5302 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5303 [(set_attr "type" "neon_store4_4reg")]
5304 )
5305
5306 (define_insn "aarch64_st4<mode>_dreg"
5307 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5308 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5309 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5310 UNSPEC_ST4))]
5311 "TARGET_SIMD"
5312 "st1\\t{%S1.1d - %V1.1d}, %0"
5313 [(set_attr "type" "neon_store1_4reg")]
5314 )
5315
5316 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5317 [(match_operand:DI 0 "register_operand" "r")
5318 (match_operand:VSTRUCT 1 "register_operand" "w")
5319 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5320 "TARGET_SIMD"
5321 {
5322 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5323 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5324
5325 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5326 DONE;
5327 })
5328
5329 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5330 [(match_operand:DI 0 "register_operand" "r")
5331 (match_operand:VSTRUCT 1 "register_operand" "w")
5332 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5333 "TARGET_SIMD"
5334 {
5335 machine_mode mode = <VSTRUCT:MODE>mode;
5336 rtx mem = gen_rtx_MEM (mode, operands[0]);
5337
5338 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5339 DONE;
5340 })
5341
5342 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5343 [(match_operand:DI 0 "register_operand" "r")
5344 (match_operand:VSTRUCT 1 "register_operand" "w")
5345 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5346 (match_operand:SI 2 "immediate_operand")]
5347 "TARGET_SIMD"
5348 {
5349 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5350 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5351 * <VSTRUCT:nregs>);
5352
5353 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5354 mem, operands[1], operands[2]));
5355 DONE;
5356 })
5357
5358 (define_expand "aarch64_st1<VALL_F16:mode>"
5359 [(match_operand:DI 0 "register_operand")
5360 (match_operand:VALL_F16 1 "register_operand")]
5361 "TARGET_SIMD"
5362 {
5363 machine_mode mode = <VALL_F16:MODE>mode;
5364 rtx mem = gen_rtx_MEM (mode, operands[0]);
5365
5366 if (BYTES_BIG_ENDIAN)
5367 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5368 else
5369 emit_move_insn (mem, operands[1]);
5370 DONE;
5371 })
5372
5373 ;; Expander for builtins to insert vector registers into large
5374 ;; opaque integer modes.
5375
5376 ;; Q-register list. We don't need a D-reg inserter as we zero
5377 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5378
5379 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5380 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5381 (match_operand:VSTRUCT 1 "register_operand" "0")
5382 (match_operand:VQ 2 "register_operand" "w")
5383 (match_operand:SI 3 "immediate_operand" "i")]
5384 "TARGET_SIMD"
5385 {
5386 int part = INTVAL (operands[3]);
5387 int offset = part * 16;
5388
5389 emit_move_insn (operands[0], operands[1]);
5390 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5391 operands[2]);
5392 DONE;
5393 })
5394
5395 ;; Standard pattern name vec_init<mode>.
5396
5397 (define_expand "vec_init<mode>"
5398 [(match_operand:VALL_F16 0 "register_operand" "")
5399 (match_operand 1 "" "")]
5400 "TARGET_SIMD"
5401 {
5402 aarch64_expand_vector_init (operands[0], operands[1]);
5403 DONE;
5404 })
5405
5406 (define_insn "*aarch64_simd_ld1r<mode>"
5407 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5408 (vec_duplicate:VALL_F16
5409 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5410 "TARGET_SIMD"
5411 "ld1r\\t{%0.<Vtype>}, %1"
5412 [(set_attr "type" "neon_load1_all_lanes")]
5413 )
5414
5415 (define_insn "aarch64_frecpe<mode>"
5416 [(set (match_operand:VDQF 0 "register_operand" "=w")
5417 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
5418 UNSPEC_FRECPE))]
5419 "TARGET_SIMD"
5420 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5421 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
5422 )
5423
5424 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5425 [(set (match_operand:GPF 0 "register_operand" "=w")
5426 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
5427 FRECP))]
5428 "TARGET_SIMD"
5429 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5430 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
5431 )
5432
5433 (define_insn "aarch64_frecps<mode>"
5434 [(set (match_operand:VALLF 0 "register_operand" "=w")
5435 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
5436 (match_operand:VALLF 2 "register_operand" "w")]
5437 UNSPEC_FRECPS))]
5438 "TARGET_SIMD"
5439 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5440 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
5441 )
5442
5443 (define_insn "aarch64_urecpe<mode>"
5444 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5445 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5446 UNSPEC_URECPE))]
5447 "TARGET_SIMD"
5448 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5449 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5450
5451 ;; Standard pattern name vec_extract<mode>.
5452
5453 (define_expand "vec_extract<mode>"
5454 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5455 (match_operand:VALL_F16 1 "register_operand" "")
5456 (match_operand:SI 2 "immediate_operand" "")]
5457 "TARGET_SIMD"
5458 {
5459 emit_insn
5460 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5461 DONE;
5462 })
5463
5464 ;; aes
5465
5466 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5467 [(set (match_operand:V16QI 0 "register_operand" "=w")
5468 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5469 (match_operand:V16QI 2 "register_operand" "w")]
5470 CRYPTO_AES))]
5471 "TARGET_SIMD && TARGET_CRYPTO"
5472 "aes<aes_op>\\t%0.16b, %2.16b"
5473 [(set_attr "type" "crypto_aese")]
5474 )
5475
5476 ;; When AES/AESMC fusion is enabled we want the register allocation to
5477 ;; look like:
5478 ;; AESE Vn, _
5479 ;; AESMC Vn, Vn
5480 ;; So prefer to tie operand 1 to operand 0 when fusing.
5481
5482 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5483 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5484 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5485 CRYPTO_AESMC))]
5486 "TARGET_SIMD && TARGET_CRYPTO"
5487 "aes<aesmc_op>\\t%0.16b, %1.16b"
5488 [(set_attr "type" "crypto_aesmc")
5489 (set_attr_alternative "enabled"
5490 [(if_then_else (match_test
5491 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5492 (const_string "yes" )
5493 (const_string "no"))
5494 (const_string "yes")])]
5495 )
5496
5497 ;; sha1
5498
5499 (define_insn "aarch64_crypto_sha1hsi"
5500 [(set (match_operand:SI 0 "register_operand" "=w")
5501 (unspec:SI [(match_operand:SI 1
5502 "register_operand" "w")]
5503 UNSPEC_SHA1H))]
5504 "TARGET_SIMD && TARGET_CRYPTO"
5505 "sha1h\\t%s0, %s1"
5506 [(set_attr "type" "crypto_sha1_fast")]
5507 )
5508
5509 (define_insn "aarch64_crypto_sha1su1v4si"
5510 [(set (match_operand:V4SI 0 "register_operand" "=w")
5511 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5512 (match_operand:V4SI 2 "register_operand" "w")]
5513 UNSPEC_SHA1SU1))]
5514 "TARGET_SIMD && TARGET_CRYPTO"
5515 "sha1su1\\t%0.4s, %2.4s"
5516 [(set_attr "type" "crypto_sha1_fast")]
5517 )
5518
5519 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5520 [(set (match_operand:V4SI 0 "register_operand" "=w")
5521 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5522 (match_operand:SI 2 "register_operand" "w")
5523 (match_operand:V4SI 3 "register_operand" "w")]
5524 CRYPTO_SHA1))]
5525 "TARGET_SIMD && TARGET_CRYPTO"
5526 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5527 [(set_attr "type" "crypto_sha1_slow")]
5528 )
5529
5530 (define_insn "aarch64_crypto_sha1su0v4si"
5531 [(set (match_operand:V4SI 0 "register_operand" "=w")
5532 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5533 (match_operand:V4SI 2 "register_operand" "w")
5534 (match_operand:V4SI 3 "register_operand" "w")]
5535 UNSPEC_SHA1SU0))]
5536 "TARGET_SIMD && TARGET_CRYPTO"
5537 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5538 [(set_attr "type" "crypto_sha1_xor")]
5539 )
5540
5541 ;; sha256
5542
5543 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5544 [(set (match_operand:V4SI 0 "register_operand" "=w")
5545 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5546 (match_operand:V4SI 2 "register_operand" "w")
5547 (match_operand:V4SI 3 "register_operand" "w")]
5548 CRYPTO_SHA256))]
5549 "TARGET_SIMD && TARGET_CRYPTO"
5550 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5551 [(set_attr "type" "crypto_sha256_slow")]
5552 )
5553
5554 (define_insn "aarch64_crypto_sha256su0v4si"
5555 [(set (match_operand:V4SI 0 "register_operand" "=w")
5556 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5557 (match_operand:V4SI 2 "register_operand" "w")]
5558 UNSPEC_SHA256SU0))]
5559 "TARGET_SIMD &&TARGET_CRYPTO"
5560 "sha256su0\\t%0.4s, %2.4s"
5561 [(set_attr "type" "crypto_sha256_fast")]
5562 )
5563
5564 (define_insn "aarch64_crypto_sha256su1v4si"
5565 [(set (match_operand:V4SI 0 "register_operand" "=w")
5566 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5567 (match_operand:V4SI 2 "register_operand" "w")
5568 (match_operand:V4SI 3 "register_operand" "w")]
5569 UNSPEC_SHA256SU1))]
5570 "TARGET_SIMD &&TARGET_CRYPTO"
5571 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5572 [(set_attr "type" "crypto_sha256_slow")]
5573 )
5574
5575 ;; pmull
5576
5577 (define_insn "aarch64_crypto_pmulldi"
5578 [(set (match_operand:TI 0 "register_operand" "=w")
5579 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5580 (match_operand:DI 2 "register_operand" "w")]
5581 UNSPEC_PMULL))]
5582 "TARGET_SIMD && TARGET_CRYPTO"
5583 "pmull\\t%0.1q, %1.1d, %2.1d"
5584 [(set_attr "type" "neon_mul_d_long")]
5585 )
5586
5587 (define_insn "aarch64_crypto_pmullv2di"
5588 [(set (match_operand:TI 0 "register_operand" "=w")
5589 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5590 (match_operand:V2DI 2 "register_operand" "w")]
5591 UNSPEC_PMULL2))]
5592 "TARGET_SIMD && TARGET_CRYPTO"
5593 "pmull2\\t%0.1q, %1.2d, %2.2d"
5594 [(set_attr "type" "neon_mul_d_long")]
5595 )