]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
[AArch64][5/10] ARMv8.2-A FP16 lane vector intrinsics
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
28 "
29 )
30
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
34 "TARGET_SIMD"
35 {
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
42 })
43
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
46 (vec_duplicate:VDQ_I
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
48 "TARGET_SIMD"
49 "@
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
53 )
54
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
59 "TARGET_SIMD"
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
62 )
63
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
67 (vec_select:<VEL>
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
70 )))]
71 "TARGET_SIMD"
72 {
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
75 }
76 [(set_attr "type" "neon_dup<q>")]
77 )
78
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16
82 (vec_select:<VEL>
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
85 )))]
86 "TARGET_SIMD"
87 {
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
91 }
92 [(set_attr "type" "neon_dup<q>")]
93 )
94
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
100 "TARGET_SIMD
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
103 {
104 switch (which_alternative)
105 {
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "ins\t%0.d[0], %1";
111 case 5: return "mov\t%0, %1";
112 case 6:
113 return aarch64_output_simd_mov_immediate (operands[1],
114 <MODE>mode, 64);
115 default: gcc_unreachable ();
116 }
117 }
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
120 mov_reg, neon_move<q>")]
121 )
122
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
128 "TARGET_SIMD
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
131 {
132 switch (which_alternative)
133 {
134 case 0:
135 return "ldr\\t%q0, %1";
136 case 1:
137 return "str\\t%q1, %0";
138 case 2:
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
140 case 3:
141 case 4:
142 case 5:
143 return "#";
144 case 6:
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
146 default:
147 gcc_unreachable ();
148 }
149 }
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
152 neon_move<q>")
153 (set_attr "length" "4,4,4,8,8,8,4")]
154 )
155
156 (define_insn "load_pair<mode>"
157 [(set (match_operand:VD 0 "register_operand" "=w")
158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159 (set (match_operand:VD 2 "register_operand" "=w")
160 (match_operand:VD 3 "memory_operand" "m"))]
161 "TARGET_SIMD
162 && rtx_equal_p (XEXP (operands[3], 0),
163 plus_constant (Pmode,
164 XEXP (operands[1], 0),
165 GET_MODE_SIZE (<MODE>mode)))"
166 "ldp\\t%d0, %d2, %1"
167 [(set_attr "type" "neon_ldp")]
168 )
169
170 (define_insn "store_pair<mode>"
171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172 (match_operand:VD 1 "register_operand" "w"))
173 (set (match_operand:VD 2 "memory_operand" "=m")
174 (match_operand:VD 3 "register_operand" "w"))]
175 "TARGET_SIMD
176 && rtx_equal_p (XEXP (operands[2], 0),
177 plus_constant (Pmode,
178 XEXP (operands[0], 0),
179 GET_MODE_SIZE (<MODE>mode)))"
180 "stp\\t%d1, %d3, %0"
181 [(set_attr "type" "neon_stp")]
182 )
183
184 (define_split
185 [(set (match_operand:VQ 0 "register_operand" "")
186 (match_operand:VQ 1 "register_operand" ""))]
187 "TARGET_SIMD && reload_completed
188 && GP_REGNUM_P (REGNO (operands[0]))
189 && GP_REGNUM_P (REGNO (operands[1]))"
190 [(const_int 0)]
191 {
192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
193 DONE;
194 })
195
196 (define_split
197 [(set (match_operand:VQ 0 "register_operand" "")
198 (match_operand:VQ 1 "register_operand" ""))]
199 "TARGET_SIMD && reload_completed
200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
202 [(const_int 0)]
203 {
204 aarch64_split_simd_move (operands[0], operands[1]);
205 DONE;
206 })
207
208 (define_expand "aarch64_split_simd_mov<mode>"
209 [(set (match_operand:VQ 0)
210 (match_operand:VQ 1))]
211 "TARGET_SIMD"
212 {
213 rtx dst = operands[0];
214 rtx src = operands[1];
215
216 if (GP_REGNUM_P (REGNO (src)))
217 {
218 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219 rtx src_high_part = gen_highpart (<VHALF>mode, src);
220
221 emit_insn
222 (gen_move_lo_quad_<mode> (dst, src_low_part));
223 emit_insn
224 (gen_move_hi_quad_<mode> (dst, src_high_part));
225 }
226
227 else
228 {
229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
233
234 emit_insn
235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
236 emit_insn
237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
238 }
239 DONE;
240 }
241 )
242
243 (define_insn "aarch64_simd_mov_from_<mode>low"
244 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
245 (vec_select:<VHALF>
246 (match_operand:VQ 1 "register_operand" "w")
247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248 "TARGET_SIMD && reload_completed"
249 "umov\t%0, %1.d[0]"
250 [(set_attr "type" "neon_to_gp<q>")
251 (set_attr "length" "4")
252 ])
253
254 (define_insn "aarch64_simd_mov_from_<mode>high"
255 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
256 (vec_select:<VHALF>
257 (match_operand:VQ 1 "register_operand" "w")
258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259 "TARGET_SIMD && reload_completed"
260 "umov\t%0, %1.d[1]"
261 [(set_attr "type" "neon_to_gp<q>")
262 (set_attr "length" "4")
263 ])
264
265 (define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268 (match_operand:VDQ_I 2 "register_operand" "w")))]
269 "TARGET_SIMD"
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271 [(set_attr "type" "neon_logic<q>")]
272 )
273
274 (define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277 (match_operand:VDQ_I 2 "register_operand" "w")))]
278 "TARGET_SIMD"
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280 [(set_attr "type" "neon_logic<q>")]
281 )
282
283 (define_insn "add<mode>3"
284 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286 (match_operand:VDQ_I 2 "register_operand" "w")))]
287 "TARGET_SIMD"
288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289 [(set_attr "type" "neon_add<q>")]
290 )
291
292 (define_insn "sub<mode>3"
293 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295 (match_operand:VDQ_I 2 "register_operand" "w")))]
296 "TARGET_SIMD"
297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298 [(set_attr "type" "neon_sub<q>")]
299 )
300
301 (define_insn "mul<mode>3"
302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
305 "TARGET_SIMD"
306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307 [(set_attr "type" "neon_mul_<Vetype><q>")]
308 )
309
310 (define_insn "bswap<mode>2"
311 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
313 "TARGET_SIMD"
314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315 [(set_attr "type" "neon_rev<q>")]
316 )
317
318 (define_insn "aarch64_rbit<mode>"
319 [(set (match_operand:VB 0 "register_operand" "=w")
320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
321 UNSPEC_RBIT))]
322 "TARGET_SIMD"
323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324 [(set_attr "type" "neon_rbit")]
325 )
326
327 (define_expand "ctz<mode>2"
328 [(set (match_operand:VS 0 "register_operand")
329 (ctz:VS (match_operand:VS 1 "register_operand")))]
330 "TARGET_SIMD"
331 {
332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
334 <MODE>mode, 0);
335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
337 DONE;
338 }
339 )
340
341 (define_insn "*aarch64_mul3_elt<mode>"
342 [(set (match_operand:VMUL 0 "register_operand" "=w")
343 (mult:VMUL
344 (vec_duplicate:VMUL
345 (vec_select:<VEL>
346 (match_operand:VMUL 1 "register_operand" "<h_con>")
347 (parallel [(match_operand:SI 2 "immediate_operand")])))
348 (match_operand:VMUL 3 "register_operand" "w")))]
349 "TARGET_SIMD"
350 {
351 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
352 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
353 }
354 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
355 )
356
357 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
358 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
359 (mult:VMUL_CHANGE_NLANES
360 (vec_duplicate:VMUL_CHANGE_NLANES
361 (vec_select:<VEL>
362 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
363 (parallel [(match_operand:SI 2 "immediate_operand")])))
364 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
365 "TARGET_SIMD"
366 {
367 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
368 INTVAL (operands[2])));
369 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
370 }
371 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
372 )
373
374 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
375 [(set (match_operand:VMUL 0 "register_operand" "=w")
376 (mult:VMUL
377 (vec_duplicate:VMUL
378 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
379 (match_operand:VMUL 2 "register_operand" "w")))]
380 "TARGET_SIMD"
381 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
382 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
383 )
384
385 (define_insn "aarch64_rsqrte<mode>"
386 [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
387 (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")]
388 UNSPEC_RSQRTE))]
389 "TARGET_SIMD"
390 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
391 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
392
393 (define_insn "aarch64_rsqrts<mode>"
394 [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
395 (unspec:VHSDF_SDF [(match_operand:VHSDF_SDF 1 "register_operand" "w")
396 (match_operand:VHSDF_SDF 2 "register_operand" "w")]
397 UNSPEC_RSQRTS))]
398 "TARGET_SIMD"
399 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
400 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
401
402 (define_expand "rsqrt<mode>2"
403 [(set (match_operand:VALLF 0 "register_operand" "=w")
404 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
405 UNSPEC_RSQRT))]
406 "TARGET_SIMD"
407 {
408 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
409 DONE;
410 })
411
412 (define_insn "*aarch64_mul3_elt_to_64v2df"
413 [(set (match_operand:DF 0 "register_operand" "=w")
414 (mult:DF
415 (vec_select:DF
416 (match_operand:V2DF 1 "register_operand" "w")
417 (parallel [(match_operand:SI 2 "immediate_operand")]))
418 (match_operand:DF 3 "register_operand" "w")))]
419 "TARGET_SIMD"
420 {
421 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
422 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
423 }
424 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
425 )
426
427 (define_insn "neg<mode>2"
428 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
429 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
430 "TARGET_SIMD"
431 "neg\t%0.<Vtype>, %1.<Vtype>"
432 [(set_attr "type" "neon_neg<q>")]
433 )
434
435 (define_insn "abs<mode>2"
436 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
437 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
438 "TARGET_SIMD"
439 "abs\t%0.<Vtype>, %1.<Vtype>"
440 [(set_attr "type" "neon_abs<q>")]
441 )
442
443 ;; The intrinsic version of integer ABS must not be allowed to
444 ;; combine with any operation with an integerated ABS step, such
445 ;; as SABD.
446 (define_insn "aarch64_abs<mode>"
447 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
448 (unspec:VSDQ_I_DI
449 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
450 UNSPEC_ABS))]
451 "TARGET_SIMD"
452 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
453 [(set_attr "type" "neon_abs<q>")]
454 )
455
456 (define_insn "abd<mode>_3"
457 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
458 (abs:VDQ_BHSI (minus:VDQ_BHSI
459 (match_operand:VDQ_BHSI 1 "register_operand" "w")
460 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
461 "TARGET_SIMD"
462 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
463 [(set_attr "type" "neon_abd<q>")]
464 )
465
466 (define_insn "aba<mode>_3"
467 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
468 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
469 (match_operand:VDQ_BHSI 1 "register_operand" "w")
470 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
471 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
472 "TARGET_SIMD"
473 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
474 [(set_attr "type" "neon_arith_acc<q>")]
475 )
476
477 (define_insn "fabd<mode>3"
478 [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
479 (abs:VHSDF_SDF
480 (minus:VHSDF_SDF
481 (match_operand:VHSDF_SDF 1 "register_operand" "w")
482 (match_operand:VHSDF_SDF 2 "register_operand" "w"))))]
483 "TARGET_SIMD"
484 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
485 [(set_attr "type" "neon_fp_abd_<stype><q>")]
486 )
487
488 (define_insn "and<mode>3"
489 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
490 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
491 (match_operand:VDQ_I 2 "register_operand" "w")))]
492 "TARGET_SIMD"
493 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
494 [(set_attr "type" "neon_logic<q>")]
495 )
496
497 (define_insn "ior<mode>3"
498 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
499 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
500 (match_operand:VDQ_I 2 "register_operand" "w")))]
501 "TARGET_SIMD"
502 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
503 [(set_attr "type" "neon_logic<q>")]
504 )
505
506 (define_insn "xor<mode>3"
507 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509 (match_operand:VDQ_I 2 "register_operand" "w")))]
510 "TARGET_SIMD"
511 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512 [(set_attr "type" "neon_logic<q>")]
513 )
514
515 (define_insn "one_cmpl<mode>2"
516 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
518 "TARGET_SIMD"
519 "not\t%0.<Vbtype>, %1.<Vbtype>"
520 [(set_attr "type" "neon_logic<q>")]
521 )
522
523 (define_insn "aarch64_simd_vec_set<mode>"
524 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
525 (vec_merge:VDQ_BHSI
526 (vec_duplicate:VDQ_BHSI
527 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
528 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
529 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
530 "TARGET_SIMD"
531 {
532 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
533 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
534 switch (which_alternative)
535 {
536 case 0:
537 return "ins\\t%0.<Vetype>[%p2], %w1";
538 case 1:
539 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
540 case 2:
541 return "ld1\\t{%0.<Vetype>}[%p2], %1";
542 default:
543 gcc_unreachable ();
544 }
545 }
546 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
547 )
548
549 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
550 [(set (match_operand:VALL 0 "register_operand" "=w")
551 (vec_merge:VALL
552 (vec_duplicate:VALL
553 (vec_select:<VEL>
554 (match_operand:VALL 3 "register_operand" "w")
555 (parallel
556 [(match_operand:SI 4 "immediate_operand" "i")])))
557 (match_operand:VALL 1 "register_operand" "0")
558 (match_operand:SI 2 "immediate_operand" "i")))]
559 "TARGET_SIMD"
560 {
561 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
562 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
563 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
564
565 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
566 }
567 [(set_attr "type" "neon_ins<q>")]
568 )
569
570 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
571 [(set (match_operand:VALL 0 "register_operand" "=w")
572 (vec_merge:VALL
573 (vec_duplicate:VALL
574 (vec_select:<VEL>
575 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
576 (parallel
577 [(match_operand:SI 4 "immediate_operand" "i")])))
578 (match_operand:VALL 1 "register_operand" "0")
579 (match_operand:SI 2 "immediate_operand" "i")))]
580 "TARGET_SIMD"
581 {
582 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
583 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
584 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
585 INTVAL (operands[4])));
586
587 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
588 }
589 [(set_attr "type" "neon_ins<q>")]
590 )
591
592 (define_insn "aarch64_simd_lshr<mode>"
593 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
594 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
595 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
596 "TARGET_SIMD"
597 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
598 [(set_attr "type" "neon_shift_imm<q>")]
599 )
600
601 (define_insn "aarch64_simd_ashr<mode>"
602 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
603 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
604 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
605 "TARGET_SIMD"
606 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
607 [(set_attr "type" "neon_shift_imm<q>")]
608 )
609
610 (define_insn "aarch64_simd_imm_shl<mode>"
611 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
612 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
613 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
614 "TARGET_SIMD"
615 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
616 [(set_attr "type" "neon_shift_imm<q>")]
617 )
618
619 (define_insn "aarch64_simd_reg_sshl<mode>"
620 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
621 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
622 (match_operand:VDQ_I 2 "register_operand" "w")))]
623 "TARGET_SIMD"
624 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
625 [(set_attr "type" "neon_shift_reg<q>")]
626 )
627
628 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
629 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
630 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
631 (match_operand:VDQ_I 2 "register_operand" "w")]
632 UNSPEC_ASHIFT_UNSIGNED))]
633 "TARGET_SIMD"
634 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
635 [(set_attr "type" "neon_shift_reg<q>")]
636 )
637
638 (define_insn "aarch64_simd_reg_shl<mode>_signed"
639 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
640 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
641 (match_operand:VDQ_I 2 "register_operand" "w")]
642 UNSPEC_ASHIFT_SIGNED))]
643 "TARGET_SIMD"
644 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
645 [(set_attr "type" "neon_shift_reg<q>")]
646 )
647
648 (define_expand "ashl<mode>3"
649 [(match_operand:VDQ_I 0 "register_operand" "")
650 (match_operand:VDQ_I 1 "register_operand" "")
651 (match_operand:SI 2 "general_operand" "")]
652 "TARGET_SIMD"
653 {
654 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
655 int shift_amount;
656
657 if (CONST_INT_P (operands[2]))
658 {
659 shift_amount = INTVAL (operands[2]);
660 if (shift_amount >= 0 && shift_amount < bit_width)
661 {
662 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
663 shift_amount);
664 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
665 operands[1],
666 tmp));
667 DONE;
668 }
669 else
670 {
671 operands[2] = force_reg (SImode, operands[2]);
672 }
673 }
674 else if (MEM_P (operands[2]))
675 {
676 operands[2] = force_reg (SImode, operands[2]);
677 }
678
679 if (REG_P (operands[2]))
680 {
681 rtx tmp = gen_reg_rtx (<MODE>mode);
682 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
683 convert_to_mode (<VEL>mode,
684 operands[2],
685 0)));
686 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
687 tmp));
688 DONE;
689 }
690 else
691 FAIL;
692 }
693 )
694
695 (define_expand "lshr<mode>3"
696 [(match_operand:VDQ_I 0 "register_operand" "")
697 (match_operand:VDQ_I 1 "register_operand" "")
698 (match_operand:SI 2 "general_operand" "")]
699 "TARGET_SIMD"
700 {
701 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
702 int shift_amount;
703
704 if (CONST_INT_P (operands[2]))
705 {
706 shift_amount = INTVAL (operands[2]);
707 if (shift_amount > 0 && shift_amount <= bit_width)
708 {
709 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
710 shift_amount);
711 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
712 operands[1],
713 tmp));
714 DONE;
715 }
716 else
717 operands[2] = force_reg (SImode, operands[2]);
718 }
719 else if (MEM_P (operands[2]))
720 {
721 operands[2] = force_reg (SImode, operands[2]);
722 }
723
724 if (REG_P (operands[2]))
725 {
726 rtx tmp = gen_reg_rtx (SImode);
727 rtx tmp1 = gen_reg_rtx (<MODE>mode);
728 emit_insn (gen_negsi2 (tmp, operands[2]));
729 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
730 convert_to_mode (<VEL>mode,
731 tmp, 0)));
732 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
733 operands[1],
734 tmp1));
735 DONE;
736 }
737 else
738 FAIL;
739 }
740 )
741
742 (define_expand "ashr<mode>3"
743 [(match_operand:VDQ_I 0 "register_operand" "")
744 (match_operand:VDQ_I 1 "register_operand" "")
745 (match_operand:SI 2 "general_operand" "")]
746 "TARGET_SIMD"
747 {
748 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
749 int shift_amount;
750
751 if (CONST_INT_P (operands[2]))
752 {
753 shift_amount = INTVAL (operands[2]);
754 if (shift_amount > 0 && shift_amount <= bit_width)
755 {
756 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
757 shift_amount);
758 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
759 operands[1],
760 tmp));
761 DONE;
762 }
763 else
764 operands[2] = force_reg (SImode, operands[2]);
765 }
766 else if (MEM_P (operands[2]))
767 {
768 operands[2] = force_reg (SImode, operands[2]);
769 }
770
771 if (REG_P (operands[2]))
772 {
773 rtx tmp = gen_reg_rtx (SImode);
774 rtx tmp1 = gen_reg_rtx (<MODE>mode);
775 emit_insn (gen_negsi2 (tmp, operands[2]));
776 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
777 convert_to_mode (<VEL>mode,
778 tmp, 0)));
779 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
780 operands[1],
781 tmp1));
782 DONE;
783 }
784 else
785 FAIL;
786 }
787 )
788
789 (define_expand "vashl<mode>3"
790 [(match_operand:VDQ_I 0 "register_operand" "")
791 (match_operand:VDQ_I 1 "register_operand" "")
792 (match_operand:VDQ_I 2 "register_operand" "")]
793 "TARGET_SIMD"
794 {
795 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
796 operands[2]));
797 DONE;
798 })
799
800 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
801 ;; Negating individual lanes most certainly offsets the
802 ;; gain from vectorization.
803 (define_expand "vashr<mode>3"
804 [(match_operand:VDQ_BHSI 0 "register_operand" "")
805 (match_operand:VDQ_BHSI 1 "register_operand" "")
806 (match_operand:VDQ_BHSI 2 "register_operand" "")]
807 "TARGET_SIMD"
808 {
809 rtx neg = gen_reg_rtx (<MODE>mode);
810 emit (gen_neg<mode>2 (neg, operands[2]));
811 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
812 neg));
813 DONE;
814 })
815
816 ;; DI vector shift
817 (define_expand "aarch64_ashr_simddi"
818 [(match_operand:DI 0 "register_operand" "=w")
819 (match_operand:DI 1 "register_operand" "w")
820 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
821 "TARGET_SIMD"
822 {
823 /* An arithmetic shift right by 64 fills the result with copies of the sign
824 bit, just like asr by 63 - however the standard pattern does not handle
825 a shift by 64. */
826 if (INTVAL (operands[2]) == 64)
827 operands[2] = GEN_INT (63);
828 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
829 DONE;
830 }
831 )
832
833 (define_expand "vlshr<mode>3"
834 [(match_operand:VDQ_BHSI 0 "register_operand" "")
835 (match_operand:VDQ_BHSI 1 "register_operand" "")
836 (match_operand:VDQ_BHSI 2 "register_operand" "")]
837 "TARGET_SIMD"
838 {
839 rtx neg = gen_reg_rtx (<MODE>mode);
840 emit (gen_neg<mode>2 (neg, operands[2]));
841 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
842 neg));
843 DONE;
844 })
845
846 (define_expand "aarch64_lshr_simddi"
847 [(match_operand:DI 0 "register_operand" "=w")
848 (match_operand:DI 1 "register_operand" "w")
849 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
850 "TARGET_SIMD"
851 {
852 if (INTVAL (operands[2]) == 64)
853 emit_move_insn (operands[0], const0_rtx);
854 else
855 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
856 DONE;
857 }
858 )
859
860 (define_expand "vec_set<mode>"
861 [(match_operand:VDQ_BHSI 0 "register_operand")
862 (match_operand:<VEL> 1 "register_operand")
863 (match_operand:SI 2 "immediate_operand")]
864 "TARGET_SIMD"
865 {
866 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
867 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
868 GEN_INT (elem), operands[0]));
869 DONE;
870 }
871 )
872
873 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
874 (define_insn "vec_shr_<mode>"
875 [(set (match_operand:VD 0 "register_operand" "=w")
876 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
877 (match_operand:SI 2 "immediate_operand" "i")]
878 UNSPEC_VEC_SHR))]
879 "TARGET_SIMD"
880 {
881 if (BYTES_BIG_ENDIAN)
882 return "shl %d0, %d1, %2";
883 else
884 return "ushr %d0, %d1, %2";
885 }
886 [(set_attr "type" "neon_shift_imm")]
887 )
888
889 (define_insn "aarch64_simd_vec_setv2di"
890 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
891 (vec_merge:V2DI
892 (vec_duplicate:V2DI
893 (match_operand:DI 1 "register_operand" "r,w"))
894 (match_operand:V2DI 3 "register_operand" "0,0")
895 (match_operand:SI 2 "immediate_operand" "i,i")))]
896 "TARGET_SIMD"
897 {
898 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
899 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
900 switch (which_alternative)
901 {
902 case 0:
903 return "ins\\t%0.d[%p2], %1";
904 case 1:
905 return "ins\\t%0.d[%p2], %1.d[0]";
906 default:
907 gcc_unreachable ();
908 }
909 }
910 [(set_attr "type" "neon_from_gp, neon_ins_q")]
911 )
912
913 (define_expand "vec_setv2di"
914 [(match_operand:V2DI 0 "register_operand")
915 (match_operand:DI 1 "register_operand")
916 (match_operand:SI 2 "immediate_operand")]
917 "TARGET_SIMD"
918 {
919 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
920 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
921 GEN_INT (elem), operands[0]));
922 DONE;
923 }
924 )
925
926 (define_insn "aarch64_simd_vec_set<mode>"
927 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
928 (vec_merge:VDQF_F16
929 (vec_duplicate:VDQF_F16
930 (match_operand:<VEL> 1 "register_operand" "w"))
931 (match_operand:VDQF_F16 3 "register_operand" "0")
932 (match_operand:SI 2 "immediate_operand" "i")))]
933 "TARGET_SIMD"
934 {
935 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
936
937 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
938 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
939 }
940 [(set_attr "type" "neon_ins<q>")]
941 )
942
943 (define_expand "vec_set<mode>"
944 [(match_operand:VDQF_F16 0 "register_operand" "+w")
945 (match_operand:<VEL> 1 "register_operand" "w")
946 (match_operand:SI 2 "immediate_operand" "")]
947 "TARGET_SIMD"
948 {
949 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
950 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
951 GEN_INT (elem), operands[0]));
952 DONE;
953 }
954 )
955
956
957 (define_insn "aarch64_mla<mode>"
958 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
959 (plus:VDQ_BHSI (mult:VDQ_BHSI
960 (match_operand:VDQ_BHSI 2 "register_operand" "w")
961 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
962 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
963 "TARGET_SIMD"
964 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
965 [(set_attr "type" "neon_mla_<Vetype><q>")]
966 )
967
968 (define_insn "*aarch64_mla_elt<mode>"
969 [(set (match_operand:VDQHS 0 "register_operand" "=w")
970 (plus:VDQHS
971 (mult:VDQHS
972 (vec_duplicate:VDQHS
973 (vec_select:<VEL>
974 (match_operand:VDQHS 1 "register_operand" "<h_con>")
975 (parallel [(match_operand:SI 2 "immediate_operand")])))
976 (match_operand:VDQHS 3 "register_operand" "w"))
977 (match_operand:VDQHS 4 "register_operand" "0")))]
978 "TARGET_SIMD"
979 {
980 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
981 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
982 }
983 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
984 )
985
986 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
987 [(set (match_operand:VDQHS 0 "register_operand" "=w")
988 (plus:VDQHS
989 (mult:VDQHS
990 (vec_duplicate:VDQHS
991 (vec_select:<VEL>
992 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
993 (parallel [(match_operand:SI 2 "immediate_operand")])))
994 (match_operand:VDQHS 3 "register_operand" "w"))
995 (match_operand:VDQHS 4 "register_operand" "0")))]
996 "TARGET_SIMD"
997 {
998 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
999 INTVAL (operands[2])));
1000 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1001 }
1002 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1003 )
1004
1005 (define_insn "aarch64_mls<mode>"
1006 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1007 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1008 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1009 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1010 "TARGET_SIMD"
1011 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1012 [(set_attr "type" "neon_mla_<Vetype><q>")]
1013 )
1014
1015 (define_insn "*aarch64_mls_elt<mode>"
1016 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1017 (minus:VDQHS
1018 (match_operand:VDQHS 4 "register_operand" "0")
1019 (mult:VDQHS
1020 (vec_duplicate:VDQHS
1021 (vec_select:<VEL>
1022 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1023 (parallel [(match_operand:SI 2 "immediate_operand")])))
1024 (match_operand:VDQHS 3 "register_operand" "w"))))]
1025 "TARGET_SIMD"
1026 {
1027 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1028 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1029 }
1030 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1031 )
1032
1033 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1034 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1035 (minus:VDQHS
1036 (match_operand:VDQHS 4 "register_operand" "0")
1037 (mult:VDQHS
1038 (vec_duplicate:VDQHS
1039 (vec_select:<VEL>
1040 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1041 (parallel [(match_operand:SI 2 "immediate_operand")])))
1042 (match_operand:VDQHS 3 "register_operand" "w"))))]
1043 "TARGET_SIMD"
1044 {
1045 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1046 INTVAL (operands[2])));
1047 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1048 }
1049 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1050 )
1051
1052 ;; Max/Min operations.
1053 (define_insn "<su><maxmin><mode>3"
1054 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1055 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1056 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1057 "TARGET_SIMD"
1058 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1059 [(set_attr "type" "neon_minmax<q>")]
1060 )
1061
1062 (define_expand "<su><maxmin>v2di3"
1063 [(set (match_operand:V2DI 0 "register_operand" "")
1064 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1065 (match_operand:V2DI 2 "register_operand" "")))]
1066 "TARGET_SIMD"
1067 {
1068 enum rtx_code cmp_operator;
1069 rtx cmp_fmt;
1070
1071 switch (<CODE>)
1072 {
1073 case UMIN:
1074 cmp_operator = LTU;
1075 break;
1076 case SMIN:
1077 cmp_operator = LT;
1078 break;
1079 case UMAX:
1080 cmp_operator = GTU;
1081 break;
1082 case SMAX:
1083 cmp_operator = GT;
1084 break;
1085 default:
1086 gcc_unreachable ();
1087 }
1088
1089 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1090 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1091 operands[2], cmp_fmt, operands[1], operands[2]));
1092 DONE;
1093 })
1094
1095 ;; Pairwise Integer Max/Min operations.
1096 (define_insn "aarch64_<maxmin_uns>p<mode>"
1097 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1098 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1099 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1100 MAXMINV))]
1101 "TARGET_SIMD"
1102 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1103 [(set_attr "type" "neon_minmax<q>")]
1104 )
1105
1106 ;; Pairwise FP Max/Min operations.
1107 (define_insn "aarch64_<maxmin_uns>p<mode>"
1108 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1109 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1110 (match_operand:VHSDF 2 "register_operand" "w")]
1111 FMAXMINV))]
1112 "TARGET_SIMD"
1113 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1114 [(set_attr "type" "neon_minmax<q>")]
1115 )
1116
1117 ;; vec_concat gives a new vector with the low elements from operand 1, and
1118 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1119 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1120 ;; What that means, is that the RTL descriptions of the below patterns
1121 ;; need to change depending on endianness.
1122
1123 ;; Move to the low architectural bits of the register.
1124 ;; On little-endian this is { operand, zeroes }
1125 ;; On big-endian this is { zeroes, operand }
1126
1127 (define_insn "move_lo_quad_internal_<mode>"
1128 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1129 (vec_concat:VQ_NO2E
1130 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1131 (vec_duplicate:<VHALF> (const_int 0))))]
1132 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1133 "@
1134 dup\\t%d0, %1.d[0]
1135 fmov\\t%d0, %1
1136 dup\\t%d0, %1"
1137 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1138 (set_attr "simd" "yes,*,yes")
1139 (set_attr "fp" "*,yes,*")
1140 (set_attr "length" "4")]
1141 )
1142
1143 (define_insn "move_lo_quad_internal_<mode>"
1144 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1145 (vec_concat:VQ_2E
1146 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1147 (const_int 0)))]
1148 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1149 "@
1150 dup\\t%d0, %1.d[0]
1151 fmov\\t%d0, %1
1152 dup\\t%d0, %1"
1153 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1154 (set_attr "simd" "yes,*,yes")
1155 (set_attr "fp" "*,yes,*")
1156 (set_attr "length" "4")]
1157 )
1158
1159 (define_insn "move_lo_quad_internal_be_<mode>"
1160 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1161 (vec_concat:VQ_NO2E
1162 (vec_duplicate:<VHALF> (const_int 0))
1163 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1164 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1165 "@
1166 dup\\t%d0, %1.d[0]
1167 fmov\\t%d0, %1
1168 dup\\t%d0, %1"
1169 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1170 (set_attr "simd" "yes,*,yes")
1171 (set_attr "fp" "*,yes,*")
1172 (set_attr "length" "4")]
1173 )
1174
1175 (define_insn "move_lo_quad_internal_be_<mode>"
1176 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1177 (vec_concat:VQ_2E
1178 (const_int 0)
1179 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1180 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1181 "@
1182 dup\\t%d0, %1.d[0]
1183 fmov\\t%d0, %1
1184 dup\\t%d0, %1"
1185 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1186 (set_attr "simd" "yes,*,yes")
1187 (set_attr "fp" "*,yes,*")
1188 (set_attr "length" "4")]
1189 )
1190
1191 (define_expand "move_lo_quad_<mode>"
1192 [(match_operand:VQ 0 "register_operand")
1193 (match_operand:VQ 1 "register_operand")]
1194 "TARGET_SIMD"
1195 {
1196 if (BYTES_BIG_ENDIAN)
1197 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1198 else
1199 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1200 DONE;
1201 }
1202 )
1203
1204 ;; Move operand1 to the high architectural bits of the register, keeping
1205 ;; the low architectural bits of operand2.
1206 ;; For little-endian this is { operand2, operand1 }
1207 ;; For big-endian this is { operand1, operand2 }
1208
1209 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1210 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1211 (vec_concat:VQ
1212 (vec_select:<VHALF>
1213 (match_dup 0)
1214 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1215 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1216 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1217 "@
1218 ins\\t%0.d[1], %1.d[0]
1219 ins\\t%0.d[1], %1"
1220 [(set_attr "type" "neon_ins")]
1221 )
1222
1223 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1224 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1225 (vec_concat:VQ
1226 (match_operand:<VHALF> 1 "register_operand" "w,r")
1227 (vec_select:<VHALF>
1228 (match_dup 0)
1229 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1230 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1231 "@
1232 ins\\t%0.d[1], %1.d[0]
1233 ins\\t%0.d[1], %1"
1234 [(set_attr "type" "neon_ins")]
1235 )
1236
1237 (define_expand "move_hi_quad_<mode>"
1238 [(match_operand:VQ 0 "register_operand" "")
1239 (match_operand:<VHALF> 1 "register_operand" "")]
1240 "TARGET_SIMD"
1241 {
1242 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1243 if (BYTES_BIG_ENDIAN)
1244 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1245 operands[1], p));
1246 else
1247 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1248 operands[1], p));
1249 DONE;
1250 })
1251
1252 ;; Narrowing operations.
1253
1254 ;; For doubles.
1255 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1256 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1257 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1258 "TARGET_SIMD"
1259 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1260 [(set_attr "type" "neon_shift_imm_narrow_q")]
1261 )
1262
1263 (define_expand "vec_pack_trunc_<mode>"
1264 [(match_operand:<VNARROWD> 0 "register_operand" "")
1265 (match_operand:VDN 1 "register_operand" "")
1266 (match_operand:VDN 2 "register_operand" "")]
1267 "TARGET_SIMD"
1268 {
1269 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1270 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1271 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1272
1273 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1274 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1275 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1276 DONE;
1277 })
1278
1279 ;; For quads.
1280
1281 (define_insn "vec_pack_trunc_<mode>"
1282 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1283 (vec_concat:<VNARROWQ2>
1284 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1285 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1286 "TARGET_SIMD"
1287 {
1288 if (BYTES_BIG_ENDIAN)
1289 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1290 else
1291 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1292 }
1293 [(set_attr "type" "multiple")
1294 (set_attr "length" "8")]
1295 )
1296
1297 ;; Widening operations.
1298
1299 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1301 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1302 (match_operand:VQW 1 "register_operand" "w")
1303 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1304 )))]
1305 "TARGET_SIMD"
1306 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1307 [(set_attr "type" "neon_shift_imm_long")]
1308 )
1309
1310 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1311 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1312 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1313 (match_operand:VQW 1 "register_operand" "w")
1314 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1315 )))]
1316 "TARGET_SIMD"
1317 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1318 [(set_attr "type" "neon_shift_imm_long")]
1319 )
1320
1321 (define_expand "vec_unpack<su>_hi_<mode>"
1322 [(match_operand:<VWIDE> 0 "register_operand" "")
1323 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1324 "TARGET_SIMD"
1325 {
1326 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1327 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1328 operands[1], p));
1329 DONE;
1330 }
1331 )
1332
1333 (define_expand "vec_unpack<su>_lo_<mode>"
1334 [(match_operand:<VWIDE> 0 "register_operand" "")
1335 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1336 "TARGET_SIMD"
1337 {
1338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1339 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1340 operands[1], p));
1341 DONE;
1342 }
1343 )
1344
1345 ;; Widening arithmetic.
1346
1347 (define_insn "*aarch64_<su>mlal_lo<mode>"
1348 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1349 (plus:<VWIDE>
1350 (mult:<VWIDE>
1351 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1352 (match_operand:VQW 2 "register_operand" "w")
1353 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1354 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1355 (match_operand:VQW 4 "register_operand" "w")
1356 (match_dup 3))))
1357 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1358 "TARGET_SIMD"
1359 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1360 [(set_attr "type" "neon_mla_<Vetype>_long")]
1361 )
1362
1363 (define_insn "*aarch64_<su>mlal_hi<mode>"
1364 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1365 (plus:<VWIDE>
1366 (mult:<VWIDE>
1367 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1368 (match_operand:VQW 2 "register_operand" "w")
1369 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1370 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1371 (match_operand:VQW 4 "register_operand" "w")
1372 (match_dup 3))))
1373 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1374 "TARGET_SIMD"
1375 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1376 [(set_attr "type" "neon_mla_<Vetype>_long")]
1377 )
1378
1379 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1380 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1381 (minus:<VWIDE>
1382 (match_operand:<VWIDE> 1 "register_operand" "0")
1383 (mult:<VWIDE>
1384 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1385 (match_operand:VQW 2 "register_operand" "w")
1386 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1387 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1388 (match_operand:VQW 4 "register_operand" "w")
1389 (match_dup 3))))))]
1390 "TARGET_SIMD"
1391 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1392 [(set_attr "type" "neon_mla_<Vetype>_long")]
1393 )
1394
1395 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1396 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1397 (minus:<VWIDE>
1398 (match_operand:<VWIDE> 1 "register_operand" "0")
1399 (mult:<VWIDE>
1400 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1401 (match_operand:VQW 2 "register_operand" "w")
1402 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1403 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1404 (match_operand:VQW 4 "register_operand" "w")
1405 (match_dup 3))))))]
1406 "TARGET_SIMD"
1407 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1408 [(set_attr "type" "neon_mla_<Vetype>_long")]
1409 )
1410
1411 (define_insn "*aarch64_<su>mlal<mode>"
1412 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1413 (plus:<VWIDE>
1414 (mult:<VWIDE>
1415 (ANY_EXTEND:<VWIDE>
1416 (match_operand:VD_BHSI 1 "register_operand" "w"))
1417 (ANY_EXTEND:<VWIDE>
1418 (match_operand:VD_BHSI 2 "register_operand" "w")))
1419 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1420 "TARGET_SIMD"
1421 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1422 [(set_attr "type" "neon_mla_<Vetype>_long")]
1423 )
1424
1425 (define_insn "*aarch64_<su>mlsl<mode>"
1426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1427 (minus:<VWIDE>
1428 (match_operand:<VWIDE> 1 "register_operand" "0")
1429 (mult:<VWIDE>
1430 (ANY_EXTEND:<VWIDE>
1431 (match_operand:VD_BHSI 2 "register_operand" "w"))
1432 (ANY_EXTEND:<VWIDE>
1433 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1434 "TARGET_SIMD"
1435 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1436 [(set_attr "type" "neon_mla_<Vetype>_long")]
1437 )
1438
1439 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1440 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1441 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1442 (match_operand:VQW 1 "register_operand" "w")
1443 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1444 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1445 (match_operand:VQW 2 "register_operand" "w")
1446 (match_dup 3)))))]
1447 "TARGET_SIMD"
1448 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1449 [(set_attr "type" "neon_mul_<Vetype>_long")]
1450 )
1451
1452 (define_expand "vec_widen_<su>mult_lo_<mode>"
1453 [(match_operand:<VWIDE> 0 "register_operand" "")
1454 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1455 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1456 "TARGET_SIMD"
1457 {
1458 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1459 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1460 operands[1],
1461 operands[2], p));
1462 DONE;
1463 }
1464 )
1465
1466 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1467 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1468 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1469 (match_operand:VQW 1 "register_operand" "w")
1470 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1471 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1472 (match_operand:VQW 2 "register_operand" "w")
1473 (match_dup 3)))))]
1474 "TARGET_SIMD"
1475 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1476 [(set_attr "type" "neon_mul_<Vetype>_long")]
1477 )
1478
1479 (define_expand "vec_widen_<su>mult_hi_<mode>"
1480 [(match_operand:<VWIDE> 0 "register_operand" "")
1481 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1482 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1483 "TARGET_SIMD"
1484 {
1485 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1486 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1487 operands[1],
1488 operands[2], p));
1489 DONE;
1490
1491 }
1492 )
1493
1494 ;; FP vector operations.
1495 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1496 ;; double-precision (64-bit) floating-point data types and arithmetic as
1497 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1498 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1499 ;;
1500 ;; Floating-point operations can raise an exception. Vectorizing such
1501 ;; operations are safe because of reasons explained below.
1502 ;;
1503 ;; ARMv8 permits an extension to enable trapped floating-point
1504 ;; exception handling, however this is an optional feature. In the
1505 ;; event of a floating-point exception being raised by vectorised
1506 ;; code then:
1507 ;; 1. If trapped floating-point exceptions are available, then a trap
1508 ;; will be taken when any lane raises an enabled exception. A trap
1509 ;; handler may determine which lane raised the exception.
1510 ;; 2. Alternatively a sticky exception flag is set in the
1511 ;; floating-point status register (FPSR). Software may explicitly
1512 ;; test the exception flags, in which case the tests will either
1513 ;; prevent vectorisation, allowing precise identification of the
1514 ;; failing operation, or if tested outside of vectorisable regions
1515 ;; then the specific operation and lane are not of interest.
1516
1517 ;; FP arithmetic operations.
1518
1519 (define_insn "add<mode>3"
1520 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1521 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1522 (match_operand:VHSDF 2 "register_operand" "w")))]
1523 "TARGET_SIMD"
1524 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1525 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1526 )
1527
1528 (define_insn "sub<mode>3"
1529 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1530 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1531 (match_operand:VHSDF 2 "register_operand" "w")))]
1532 "TARGET_SIMD"
1533 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1534 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1535 )
1536
1537 (define_insn "mul<mode>3"
1538 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1539 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1540 (match_operand:VHSDF 2 "register_operand" "w")))]
1541 "TARGET_SIMD"
1542 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1543 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1544 )
1545
1546 (define_expand "div<mode>3"
1547 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1548 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1549 (match_operand:VHSDF 2 "register_operand" "w")))]
1550 "TARGET_SIMD"
1551 {
1552 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1553 DONE;
1554
1555 operands[1] = force_reg (<MODE>mode, operands[1]);
1556 })
1557
1558 (define_insn "*div<mode>3"
1559 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1560 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1561 (match_operand:VHSDF 2 "register_operand" "w")))]
1562 "TARGET_SIMD"
1563 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1564 [(set_attr "type" "neon_fp_div_<stype><q>")]
1565 )
1566
1567 (define_insn "neg<mode>2"
1568 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1569 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1570 "TARGET_SIMD"
1571 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1572 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1573 )
1574
1575 (define_insn "abs<mode>2"
1576 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1577 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1578 "TARGET_SIMD"
1579 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1580 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1581 )
1582
1583 (define_insn "fma<mode>4"
1584 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1585 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1586 (match_operand:VHSDF 2 "register_operand" "w")
1587 (match_operand:VHSDF 3 "register_operand" "0")))]
1588 "TARGET_SIMD"
1589 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1590 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1591 )
1592
1593 (define_insn "*aarch64_fma4_elt<mode>"
1594 [(set (match_operand:VDQF 0 "register_operand" "=w")
1595 (fma:VDQF
1596 (vec_duplicate:VDQF
1597 (vec_select:<VEL>
1598 (match_operand:VDQF 1 "register_operand" "<h_con>")
1599 (parallel [(match_operand:SI 2 "immediate_operand")])))
1600 (match_operand:VDQF 3 "register_operand" "w")
1601 (match_operand:VDQF 4 "register_operand" "0")))]
1602 "TARGET_SIMD"
1603 {
1604 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1605 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1606 }
1607 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1608 )
1609
1610 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1611 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1612 (fma:VDQSF
1613 (vec_duplicate:VDQSF
1614 (vec_select:<VEL>
1615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1616 (parallel [(match_operand:SI 2 "immediate_operand")])))
1617 (match_operand:VDQSF 3 "register_operand" "w")
1618 (match_operand:VDQSF 4 "register_operand" "0")))]
1619 "TARGET_SIMD"
1620 {
1621 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1622 INTVAL (operands[2])));
1623 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1624 }
1625 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1626 )
1627
1628 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1629 [(set (match_operand:VMUL 0 "register_operand" "=w")
1630 (fma:VMUL
1631 (vec_duplicate:VMUL
1632 (match_operand:<VEL> 1 "register_operand" "w"))
1633 (match_operand:VMUL 2 "register_operand" "w")
1634 (match_operand:VMUL 3 "register_operand" "0")))]
1635 "TARGET_SIMD"
1636 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1637 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1638 )
1639
1640 (define_insn "*aarch64_fma4_elt_to_64v2df"
1641 [(set (match_operand:DF 0 "register_operand" "=w")
1642 (fma:DF
1643 (vec_select:DF
1644 (match_operand:V2DF 1 "register_operand" "w")
1645 (parallel [(match_operand:SI 2 "immediate_operand")]))
1646 (match_operand:DF 3 "register_operand" "w")
1647 (match_operand:DF 4 "register_operand" "0")))]
1648 "TARGET_SIMD"
1649 {
1650 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1651 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1652 }
1653 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1654 )
1655
1656 (define_insn "fnma<mode>4"
1657 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1658 (fma:VHSDF
1659 (match_operand:VHSDF 1 "register_operand" "w")
1660 (neg:VHSDF
1661 (match_operand:VHSDF 2 "register_operand" "w"))
1662 (match_operand:VHSDF 3 "register_operand" "0")))]
1663 "TARGET_SIMD"
1664 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1665 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1666 )
1667
1668 (define_insn "*aarch64_fnma4_elt<mode>"
1669 [(set (match_operand:VDQF 0 "register_operand" "=w")
1670 (fma:VDQF
1671 (neg:VDQF
1672 (match_operand:VDQF 3 "register_operand" "w"))
1673 (vec_duplicate:VDQF
1674 (vec_select:<VEL>
1675 (match_operand:VDQF 1 "register_operand" "<h_con>")
1676 (parallel [(match_operand:SI 2 "immediate_operand")])))
1677 (match_operand:VDQF 4 "register_operand" "0")))]
1678 "TARGET_SIMD"
1679 {
1680 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1681 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1682 }
1683 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1684 )
1685
1686 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1687 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1688 (fma:VDQSF
1689 (neg:VDQSF
1690 (match_operand:VDQSF 3 "register_operand" "w"))
1691 (vec_duplicate:VDQSF
1692 (vec_select:<VEL>
1693 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1694 (parallel [(match_operand:SI 2 "immediate_operand")])))
1695 (match_operand:VDQSF 4 "register_operand" "0")))]
1696 "TARGET_SIMD"
1697 {
1698 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1699 INTVAL (operands[2])));
1700 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1701 }
1702 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1703 )
1704
1705 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1706 [(set (match_operand:VMUL 0 "register_operand" "=w")
1707 (fma:VMUL
1708 (neg:VMUL
1709 (match_operand:VMUL 2 "register_operand" "w"))
1710 (vec_duplicate:VMUL
1711 (match_operand:<VEL> 1 "register_operand" "w"))
1712 (match_operand:VMUL 3 "register_operand" "0")))]
1713 "TARGET_SIMD"
1714 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1715 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1716 )
1717
1718 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1719 [(set (match_operand:DF 0 "register_operand" "=w")
1720 (fma:DF
1721 (vec_select:DF
1722 (match_operand:V2DF 1 "register_operand" "w")
1723 (parallel [(match_operand:SI 2 "immediate_operand")]))
1724 (neg:DF
1725 (match_operand:DF 3 "register_operand" "w"))
1726 (match_operand:DF 4 "register_operand" "0")))]
1727 "TARGET_SIMD"
1728 {
1729 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1730 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1731 }
1732 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1733 )
1734
1735 ;; Vector versions of the floating-point frint patterns.
1736 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1737 (define_insn "<frint_pattern><mode>2"
1738 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1739 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1740 FRINT))]
1741 "TARGET_SIMD"
1742 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1743 [(set_attr "type" "neon_fp_round_<stype><q>")]
1744 )
1745
1746 ;; Vector versions of the fcvt standard patterns.
1747 ;; Expands to lbtrunc, lround, lceil, lfloor
1748 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1749 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1750 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1751 [(match_operand:VHSDF 1 "register_operand" "w")]
1752 FCVT)))]
1753 "TARGET_SIMD"
1754 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1755 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1756 )
1757
1758 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1759 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1760 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1761 [(mult:VDQF
1762 (match_operand:VDQF 1 "register_operand" "w")
1763 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1764 UNSPEC_FRINTZ)))]
1765 "TARGET_SIMD
1766 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1767 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1768 {
1769 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1770 char buf[64];
1771 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1772 output_asm_insn (buf, operands);
1773 return "";
1774 }
1775 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1776 )
1777
1778 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1779 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1780 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1781 [(match_operand:VHSDF 1 "register_operand")]
1782 UNSPEC_FRINTZ)))]
1783 "TARGET_SIMD"
1784 {})
1785
1786 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1787 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1788 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1789 [(match_operand:VHSDF 1 "register_operand")]
1790 UNSPEC_FRINTZ)))]
1791 "TARGET_SIMD"
1792 {})
1793
1794 (define_expand "ftrunc<VHSDF:mode>2"
1795 [(set (match_operand:VHSDF 0 "register_operand")
1796 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1797 UNSPEC_FRINTZ))]
1798 "TARGET_SIMD"
1799 {})
1800
1801 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1802 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1803 (FLOATUORS:VHSDF
1804 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1805 "TARGET_SIMD"
1806 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1807 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1808 )
1809
1810 ;; Conversions between vectors of floats and doubles.
1811 ;; Contains a mix of patterns to match standard pattern names
1812 ;; and those for intrinsics.
1813
1814 ;; Float widening operations.
1815
1816 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1817 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1818 (float_extend:<VWIDE> (vec_select:<VHALF>
1819 (match_operand:VQ_HSF 1 "register_operand" "w")
1820 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1821 )))]
1822 "TARGET_SIMD"
1823 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1824 [(set_attr "type" "neon_fp_cvt_widen_s")]
1825 )
1826
1827 ;; Convert between fixed-point and floating-point (vector modes)
1828
1829 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1830 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1831 (unspec:<VHSDF:FCVT_TARGET>
1832 [(match_operand:VHSDF 1 "register_operand" "w")
1833 (match_operand:SI 2 "immediate_operand" "i")]
1834 FCVT_F2FIXED))]
1835 "TARGET_SIMD"
1836 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1837 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1838 )
1839
1840 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1841 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1842 (unspec:<VDQ_HSDI:FCVT_TARGET>
1843 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1844 (match_operand:SI 2 "immediate_operand" "i")]
1845 FCVT_FIXED2F))]
1846 "TARGET_SIMD"
1847 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1848 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1849 )
1850
1851 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1852 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1853 ;; the meaning of HI and LO changes depending on the target endianness.
1854 ;; While elsewhere we map the higher numbered elements of a vector to
1855 ;; the lower architectural lanes of the vector, for these patterns we want
1856 ;; to always treat "hi" as referring to the higher architectural lanes.
1857 ;; Consequently, while the patterns below look inconsistent with our
1858 ;; other big-endian patterns their behavior is as required.
1859
1860 (define_expand "vec_unpacks_lo_<mode>"
1861 [(match_operand:<VWIDE> 0 "register_operand" "")
1862 (match_operand:VQ_HSF 1 "register_operand" "")]
1863 "TARGET_SIMD"
1864 {
1865 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1866 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1867 operands[1], p));
1868 DONE;
1869 }
1870 )
1871
1872 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1873 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1874 (float_extend:<VWIDE> (vec_select:<VHALF>
1875 (match_operand:VQ_HSF 1 "register_operand" "w")
1876 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1877 )))]
1878 "TARGET_SIMD"
1879 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1880 [(set_attr "type" "neon_fp_cvt_widen_s")]
1881 )
1882
1883 (define_expand "vec_unpacks_hi_<mode>"
1884 [(match_operand:<VWIDE> 0 "register_operand" "")
1885 (match_operand:VQ_HSF 1 "register_operand" "")]
1886 "TARGET_SIMD"
1887 {
1888 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1889 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1890 operands[1], p));
1891 DONE;
1892 }
1893 )
1894 (define_insn "aarch64_float_extend_lo_<Vwide>"
1895 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1896 (float_extend:<VWIDE>
1897 (match_operand:VDF 1 "register_operand" "w")))]
1898 "TARGET_SIMD"
1899 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1900 [(set_attr "type" "neon_fp_cvt_widen_s")]
1901 )
1902
1903 ;; Float narrowing operations.
1904
1905 (define_insn "aarch64_float_truncate_lo_<mode>"
1906 [(set (match_operand:VDF 0 "register_operand" "=w")
1907 (float_truncate:VDF
1908 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1909 "TARGET_SIMD"
1910 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1911 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1912 )
1913
1914 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1915 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1916 (vec_concat:<VDBL>
1917 (match_operand:VDF 1 "register_operand" "0")
1918 (float_truncate:VDF
1919 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1920 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1921 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1922 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1923 )
1924
1925 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1926 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1927 (vec_concat:<VDBL>
1928 (float_truncate:VDF
1929 (match_operand:<VWIDE> 2 "register_operand" "w"))
1930 (match_operand:VDF 1 "register_operand" "0")))]
1931 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1932 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1933 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1934 )
1935
1936 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1937 [(match_operand:<VDBL> 0 "register_operand" "=w")
1938 (match_operand:VDF 1 "register_operand" "0")
1939 (match_operand:<VWIDE> 2 "register_operand" "w")]
1940 "TARGET_SIMD"
1941 {
1942 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1943 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1944 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1945 emit_insn (gen (operands[0], operands[1], operands[2]));
1946 DONE;
1947 }
1948 )
1949
1950 (define_expand "vec_pack_trunc_v2df"
1951 [(set (match_operand:V4SF 0 "register_operand")
1952 (vec_concat:V4SF
1953 (float_truncate:V2SF
1954 (match_operand:V2DF 1 "register_operand"))
1955 (float_truncate:V2SF
1956 (match_operand:V2DF 2 "register_operand"))
1957 ))]
1958 "TARGET_SIMD"
1959 {
1960 rtx tmp = gen_reg_rtx (V2SFmode);
1961 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1962 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1963
1964 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1965 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1966 tmp, operands[hi]));
1967 DONE;
1968 }
1969 )
1970
1971 (define_expand "vec_pack_trunc_df"
1972 [(set (match_operand:V2SF 0 "register_operand")
1973 (vec_concat:V2SF
1974 (float_truncate:SF
1975 (match_operand:DF 1 "register_operand"))
1976 (float_truncate:SF
1977 (match_operand:DF 2 "register_operand"))
1978 ))]
1979 "TARGET_SIMD"
1980 {
1981 rtx tmp = gen_reg_rtx (V2SFmode);
1982 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1983 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1984
1985 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1986 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1987 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1988 DONE;
1989 }
1990 )
1991
1992 ;; FP Max/Min
1993 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1994 ;; expression like:
1995 ;; a = (b < c) ? b : c;
1996 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1997 ;; either explicitly or indirectly via -ffast-math.
1998 ;;
1999 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2000 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2001 ;; operand will be returned when both operands are zero (i.e. they may not
2002 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2003 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2004 ;; NaNs.
2005
2006 (define_insn "<su><maxmin><mode>3"
2007 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2008 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2009 (match_operand:VHSDF 2 "register_operand" "w")))]
2010 "TARGET_SIMD"
2011 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2012 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2013 )
2014
2015 (define_insn "<maxmin_uns><mode>3"
2016 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2017 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2018 (match_operand:VHSDF 2 "register_operand" "w")]
2019 FMAXMIN_UNS))]
2020 "TARGET_SIMD"
2021 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2022 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2023 )
2024
2025 ;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
2026 (define_insn "<fmaxmin><mode>3"
2027 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2028 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2029 (match_operand:VHSDF 2 "register_operand" "w")]
2030 FMAXMIN))]
2031 "TARGET_SIMD"
2032 "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2033 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2034 )
2035
2036 ;; 'across lanes' add.
2037
2038 (define_expand "reduc_plus_scal_<mode>"
2039 [(match_operand:<VEL> 0 "register_operand" "=w")
2040 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2041 UNSPEC_ADDV)]
2042 "TARGET_SIMD"
2043 {
2044 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2045 rtx scratch = gen_reg_rtx (<MODE>mode);
2046 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2047 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2048 DONE;
2049 }
2050 )
2051
2052 (define_insn "aarch64_faddp<mode>"
2053 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2054 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2055 (match_operand:VHSDF 2 "register_operand" "w")]
2056 UNSPEC_FADDV))]
2057 "TARGET_SIMD"
2058 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2059 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2060 )
2061
2062 (define_insn "aarch64_reduc_plus_internal<mode>"
2063 [(set (match_operand:VDQV 0 "register_operand" "=w")
2064 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2065 UNSPEC_ADDV))]
2066 "TARGET_SIMD"
2067 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2068 [(set_attr "type" "neon_reduc_add<q>")]
2069 )
2070
2071 (define_insn "aarch64_reduc_plus_internalv2si"
2072 [(set (match_operand:V2SI 0 "register_operand" "=w")
2073 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2074 UNSPEC_ADDV))]
2075 "TARGET_SIMD"
2076 "addp\\t%0.2s, %1.2s, %1.2s"
2077 [(set_attr "type" "neon_reduc_add")]
2078 )
2079
2080 (define_insn "reduc_plus_scal_<mode>"
2081 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2082 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2083 UNSPEC_FADDV))]
2084 "TARGET_SIMD"
2085 "faddp\\t%<Vetype>0, %1.<Vtype>"
2086 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2087 )
2088
2089 (define_expand "reduc_plus_scal_v4sf"
2090 [(set (match_operand:SF 0 "register_operand")
2091 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2092 UNSPEC_FADDV))]
2093 "TARGET_SIMD"
2094 {
2095 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2096 rtx scratch = gen_reg_rtx (V4SFmode);
2097 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2098 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2099 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2100 DONE;
2101 })
2102
2103 (define_insn "clrsb<mode>2"
2104 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2105 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2106 "TARGET_SIMD"
2107 "cls\\t%0.<Vtype>, %1.<Vtype>"
2108 [(set_attr "type" "neon_cls<q>")]
2109 )
2110
2111 (define_insn "clz<mode>2"
2112 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2113 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2114 "TARGET_SIMD"
2115 "clz\\t%0.<Vtype>, %1.<Vtype>"
2116 [(set_attr "type" "neon_cls<q>")]
2117 )
2118
2119 (define_insn "popcount<mode>2"
2120 [(set (match_operand:VB 0 "register_operand" "=w")
2121 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2122 "TARGET_SIMD"
2123 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2124 [(set_attr "type" "neon_cnt<q>")]
2125 )
2126
2127 ;; 'across lanes' max and min ops.
2128
2129 ;; Template for outputting a scalar, so we can create __builtins which can be
2130 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2131 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2132 [(match_operand:<VEL> 0 "register_operand")
2133 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
2134 FMAXMINV)]
2135 "TARGET_SIMD"
2136 {
2137 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2138 rtx scratch = gen_reg_rtx (<MODE>mode);
2139 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2140 operands[1]));
2141 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2142 DONE;
2143 }
2144 )
2145
2146 ;; Likewise for integer cases, signed and unsigned.
2147 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2148 [(match_operand:<VEL> 0 "register_operand")
2149 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2150 MAXMINV)]
2151 "TARGET_SIMD"
2152 {
2153 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2154 rtx scratch = gen_reg_rtx (<MODE>mode);
2155 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2156 operands[1]));
2157 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2158 DONE;
2159 }
2160 )
2161
2162 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2163 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2164 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2165 MAXMINV))]
2166 "TARGET_SIMD"
2167 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2168 [(set_attr "type" "neon_reduc_minmax<q>")]
2169 )
2170
2171 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2172 [(set (match_operand:V2SI 0 "register_operand" "=w")
2173 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2174 MAXMINV))]
2175 "TARGET_SIMD"
2176 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2177 [(set_attr "type" "neon_reduc_minmax")]
2178 )
2179
2180 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2181 [(set (match_operand:VDQF 0 "register_operand" "=w")
2182 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
2183 FMAXMINV))]
2184 "TARGET_SIMD"
2185 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2186 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
2187 )
2188
2189 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2190 ;; allocation.
2191 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2192 ;; to select.
2193 ;;
2194 ;; Thus our BSL is of the form:
2195 ;; op0 = bsl (mask, op2, op3)
2196 ;; We can use any of:
2197 ;;
2198 ;; if (op0 = mask)
2199 ;; bsl mask, op1, op2
2200 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2201 ;; bit op0, op2, mask
2202 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2203 ;; bif op0, op1, mask
2204 ;;
2205 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2206 ;; Some forms of straight-line code may generate the equivalent form
2207 ;; in *aarch64_simd_bsl<mode>_alt.
2208
2209 (define_insn "aarch64_simd_bsl<mode>_internal"
2210 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2211 (xor:VSDQ_I_DI
2212 (and:VSDQ_I_DI
2213 (xor:VSDQ_I_DI
2214 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2215 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2216 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2217 (match_dup:<V_cmp_result> 3)
2218 ))]
2219 "TARGET_SIMD"
2220 "@
2221 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2222 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2223 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2224 [(set_attr "type" "neon_bsl<q>")]
2225 )
2226
2227 ;; We need this form in addition to the above pattern to match the case
2228 ;; when combine tries merging three insns such that the second operand of
2229 ;; the outer XOR matches the second operand of the inner XOR rather than
2230 ;; the first. The two are equivalent but since recog doesn't try all
2231 ;; permutations of commutative operations, we have to have a separate pattern.
2232
2233 (define_insn "*aarch64_simd_bsl<mode>_alt"
2234 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2235 (xor:VSDQ_I_DI
2236 (and:VSDQ_I_DI
2237 (xor:VSDQ_I_DI
2238 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2239 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2240 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2241 (match_dup:VSDQ_I_DI 2)))]
2242 "TARGET_SIMD"
2243 "@
2244 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2245 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2246 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2247 [(set_attr "type" "neon_bsl<q>")]
2248 )
2249
2250 (define_expand "aarch64_simd_bsl<mode>"
2251 [(match_operand:VALLDIF 0 "register_operand")
2252 (match_operand:<V_cmp_result> 1 "register_operand")
2253 (match_operand:VALLDIF 2 "register_operand")
2254 (match_operand:VALLDIF 3 "register_operand")]
2255 "TARGET_SIMD"
2256 {
2257 /* We can't alias operands together if they have different modes. */
2258 rtx tmp = operands[0];
2259 if (FLOAT_MODE_P (<MODE>mode))
2260 {
2261 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2262 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2263 tmp = gen_reg_rtx (<V_cmp_result>mode);
2264 }
2265 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2266 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2267 operands[1],
2268 operands[2],
2269 operands[3]));
2270 if (tmp != operands[0])
2271 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2272
2273 DONE;
2274 })
2275
2276 (define_expand "aarch64_vcond_internal<mode><mode>"
2277 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2278 (if_then_else:VSDQ_I_DI
2279 (match_operator 3 "comparison_operator"
2280 [(match_operand:VSDQ_I_DI 4 "register_operand")
2281 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2282 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2283 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2284 "TARGET_SIMD"
2285 {
2286 rtx op1 = operands[1];
2287 rtx op2 = operands[2];
2288 rtx mask = gen_reg_rtx (<MODE>mode);
2289 enum rtx_code code = GET_CODE (operands[3]);
2290
2291 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2292 and desirable for other comparisons if it results in FOO ? -1 : 0
2293 (this allows direct use of the comparison result without a bsl). */
2294 if (code == NE
2295 || (code != EQ
2296 && op1 == CONST0_RTX (<V_cmp_result>mode)
2297 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2298 {
2299 op1 = operands[2];
2300 op2 = operands[1];
2301 switch (code)
2302 {
2303 case LE: code = GT; break;
2304 case LT: code = GE; break;
2305 case GE: code = LT; break;
2306 case GT: code = LE; break;
2307 /* No case EQ. */
2308 case NE: code = EQ; break;
2309 case LTU: code = GEU; break;
2310 case LEU: code = GTU; break;
2311 case GTU: code = LEU; break;
2312 case GEU: code = LTU; break;
2313 default: gcc_unreachable ();
2314 }
2315 }
2316
2317 /* Make sure we can handle the last operand. */
2318 switch (code)
2319 {
2320 case NE:
2321 /* Normalized to EQ above. */
2322 gcc_unreachable ();
2323
2324 case LE:
2325 case LT:
2326 case GE:
2327 case GT:
2328 case EQ:
2329 /* These instructions have a form taking an immediate zero. */
2330 if (operands[5] == CONST0_RTX (<MODE>mode))
2331 break;
2332 /* Fall through, as may need to load into register. */
2333 default:
2334 if (!REG_P (operands[5]))
2335 operands[5] = force_reg (<MODE>mode, operands[5]);
2336 break;
2337 }
2338
2339 switch (code)
2340 {
2341 case LT:
2342 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2343 break;
2344
2345 case GE:
2346 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2347 break;
2348
2349 case LE:
2350 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2351 break;
2352
2353 case GT:
2354 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2355 break;
2356
2357 case LTU:
2358 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2359 break;
2360
2361 case GEU:
2362 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2363 break;
2364
2365 case LEU:
2366 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2367 break;
2368
2369 case GTU:
2370 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2371 break;
2372
2373 /* NE has been normalized to EQ above. */
2374 case EQ:
2375 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2376 break;
2377
2378 default:
2379 gcc_unreachable ();
2380 }
2381
2382 /* If we have (a = (b CMP c) ? -1 : 0);
2383 Then we can simply move the generated mask. */
2384
2385 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2386 && op2 == CONST0_RTX (<V_cmp_result>mode))
2387 emit_move_insn (operands[0], mask);
2388 else
2389 {
2390 if (!REG_P (op1))
2391 op1 = force_reg (<MODE>mode, op1);
2392 if (!REG_P (op2))
2393 op2 = force_reg (<MODE>mode, op2);
2394 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2395 op1, op2));
2396 }
2397
2398 DONE;
2399 })
2400
2401 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2402 [(set (match_operand:VDQF_COND 0 "register_operand")
2403 (if_then_else:VDQF
2404 (match_operator 3 "comparison_operator"
2405 [(match_operand:VDQF 4 "register_operand")
2406 (match_operand:VDQF 5 "nonmemory_operand")])
2407 (match_operand:VDQF_COND 1 "nonmemory_operand")
2408 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2409 "TARGET_SIMD"
2410 {
2411 int inverse = 0;
2412 int use_zero_form = 0;
2413 int swap_bsl_operands = 0;
2414 rtx op1 = operands[1];
2415 rtx op2 = operands[2];
2416 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2417 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2418
2419 rtx (*base_comparison) (rtx, rtx, rtx);
2420 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2421
2422 switch (GET_CODE (operands[3]))
2423 {
2424 case GE:
2425 case GT:
2426 case LE:
2427 case LT:
2428 case EQ:
2429 if (operands[5] == CONST0_RTX (<MODE>mode))
2430 {
2431 use_zero_form = 1;
2432 break;
2433 }
2434 /* Fall through. */
2435 default:
2436 if (!REG_P (operands[5]))
2437 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2438 }
2439
2440 switch (GET_CODE (operands[3]))
2441 {
2442 case LT:
2443 case UNLT:
2444 inverse = 1;
2445 /* Fall through. */
2446 case GE:
2447 case UNGE:
2448 case ORDERED:
2449 case UNORDERED:
2450 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2451 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2452 break;
2453 case LE:
2454 case UNLE:
2455 inverse = 1;
2456 /* Fall through. */
2457 case GT:
2458 case UNGT:
2459 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2460 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2461 break;
2462 case EQ:
2463 case NE:
2464 case UNEQ:
2465 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2466 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2467 break;
2468 default:
2469 gcc_unreachable ();
2470 }
2471
2472 switch (GET_CODE (operands[3]))
2473 {
2474 case LT:
2475 case LE:
2476 case GT:
2477 case GE:
2478 case EQ:
2479 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2480 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2481 a GE b -> a GE b
2482 a GT b -> a GT b
2483 a LE b -> b GE a
2484 a LT b -> b GT a
2485 a EQ b -> a EQ b
2486 Note that there also exist direct comparison against 0 forms,
2487 so catch those as a special case. */
2488 if (use_zero_form)
2489 {
2490 inverse = 0;
2491 switch (GET_CODE (operands[3]))
2492 {
2493 case LT:
2494 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2495 break;
2496 case LE:
2497 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2498 break;
2499 default:
2500 /* Do nothing, other zero form cases already have the correct
2501 base_comparison. */
2502 break;
2503 }
2504 }
2505
2506 if (!inverse)
2507 emit_insn (base_comparison (mask, operands[4], operands[5]));
2508 else
2509 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2510 break;
2511 case UNLT:
2512 case UNLE:
2513 case UNGT:
2514 case UNGE:
2515 case NE:
2516 /* FCM returns false for lanes which are unordered, so if we use
2517 the inverse of the comparison we actually want to emit, then
2518 swap the operands to BSL, we will end up with the correct result.
2519 Note that a NE NaN and NaN NE b are true for all a, b.
2520
2521 Our transformations are:
2522 a GE b -> !(b GT a)
2523 a GT b -> !(b GE a)
2524 a LE b -> !(a GT b)
2525 a LT b -> !(a GE b)
2526 a NE b -> !(a EQ b) */
2527
2528 if (inverse)
2529 emit_insn (base_comparison (mask, operands[4], operands[5]));
2530 else
2531 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2532
2533 swap_bsl_operands = 1;
2534 break;
2535 case UNEQ:
2536 /* We check (a > b || b > a). combining these comparisons give us
2537 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2538 will then give us (a == b || a UNORDERED b) as intended. */
2539
2540 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2541 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2542 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2543 swap_bsl_operands = 1;
2544 break;
2545 case UNORDERED:
2546 /* Operands are ORDERED iff (a > b || b >= a).
2547 Swapping the operands to BSL will give the UNORDERED case. */
2548 swap_bsl_operands = 1;
2549 /* Fall through. */
2550 case ORDERED:
2551 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2552 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2553 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2554 break;
2555 default:
2556 gcc_unreachable ();
2557 }
2558
2559 if (swap_bsl_operands)
2560 {
2561 op1 = operands[2];
2562 op2 = operands[1];
2563 }
2564
2565 /* If we have (a = (b CMP c) ? -1 : 0);
2566 Then we can simply move the generated mask. */
2567
2568 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2569 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2570 emit_move_insn (operands[0], mask);
2571 else
2572 {
2573 if (!REG_P (op1))
2574 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2575 if (!REG_P (op2))
2576 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2577 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2578 op1, op2));
2579 }
2580
2581 DONE;
2582 })
2583
2584 (define_expand "vcond<mode><mode>"
2585 [(set (match_operand:VALLDI 0 "register_operand")
2586 (if_then_else:VALLDI
2587 (match_operator 3 "comparison_operator"
2588 [(match_operand:VALLDI 4 "register_operand")
2589 (match_operand:VALLDI 5 "nonmemory_operand")])
2590 (match_operand:VALLDI 1 "nonmemory_operand")
2591 (match_operand:VALLDI 2 "nonmemory_operand")))]
2592 "TARGET_SIMD"
2593 {
2594 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2595 operands[2], operands[3],
2596 operands[4], operands[5]));
2597 DONE;
2598 })
2599
2600 (define_expand "vcond<v_cmp_result><mode>"
2601 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2602 (if_then_else:<V_cmp_result>
2603 (match_operator 3 "comparison_operator"
2604 [(match_operand:VDQF 4 "register_operand")
2605 (match_operand:VDQF 5 "nonmemory_operand")])
2606 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2607 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2608 "TARGET_SIMD"
2609 {
2610 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2611 operands[0], operands[1],
2612 operands[2], operands[3],
2613 operands[4], operands[5]));
2614 DONE;
2615 })
2616
2617 (define_expand "vcondu<mode><mode>"
2618 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2619 (if_then_else:VSDQ_I_DI
2620 (match_operator 3 "comparison_operator"
2621 [(match_operand:VSDQ_I_DI 4 "register_operand")
2622 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2623 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2624 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2625 "TARGET_SIMD"
2626 {
2627 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2628 operands[2], operands[3],
2629 operands[4], operands[5]));
2630 DONE;
2631 })
2632
2633 ;; Patterns for AArch64 SIMD Intrinsics.
2634
2635 ;; Lane extraction with sign extension to general purpose register.
2636 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2637 [(set (match_operand:GPI 0 "register_operand" "=r")
2638 (sign_extend:GPI
2639 (vec_select:<VEL>
2640 (match_operand:VDQQH 1 "register_operand" "w")
2641 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2642 "TARGET_SIMD"
2643 {
2644 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2645 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2646 }
2647 [(set_attr "type" "neon_to_gp<q>")]
2648 )
2649
2650 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2651 [(set (match_operand:SI 0 "register_operand" "=r")
2652 (zero_extend:SI
2653 (vec_select:<VEL>
2654 (match_operand:VDQQH 1 "register_operand" "w")
2655 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2656 "TARGET_SIMD"
2657 {
2658 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2659 return "umov\\t%w0, %1.<Vetype>[%2]";
2660 }
2661 [(set_attr "type" "neon_to_gp<q>")]
2662 )
2663
2664 ;; Lane extraction of a value, neither sign nor zero extension
2665 ;; is guaranteed so upper bits should be considered undefined.
2666 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2667 (define_insn "aarch64_get_lane<mode>"
2668 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2669 (vec_select:<VEL>
2670 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2671 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2672 "TARGET_SIMD"
2673 {
2674 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2675 switch (which_alternative)
2676 {
2677 case 0:
2678 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2679 case 1:
2680 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2681 case 2:
2682 return "st1\\t{%1.<Vetype>}[%2], %0";
2683 default:
2684 gcc_unreachable ();
2685 }
2686 }
2687 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2688 )
2689
2690 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2691 ;; dest vector.
2692
2693 (define_insn "*aarch64_combinez<mode>"
2694 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2695 (vec_concat:<VDBL>
2696 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2697 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2698 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2699 "@
2700 mov\\t%0.8b, %1.8b
2701 fmov\t%d0, %1
2702 ldr\\t%d0, %1"
2703 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2704 (set_attr "simd" "yes,*,yes")
2705 (set_attr "fp" "*,yes,*")]
2706 )
2707
2708 (define_insn "*aarch64_combinez_be<mode>"
2709 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2710 (vec_concat:<VDBL>
2711 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2712 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2713 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2714 "@
2715 mov\\t%0.8b, %1.8b
2716 fmov\t%d0, %1
2717 ldr\\t%d0, %1"
2718 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2719 (set_attr "simd" "yes,*,yes")
2720 (set_attr "fp" "*,yes,*")]
2721 )
2722
2723 (define_expand "aarch64_combine<mode>"
2724 [(match_operand:<VDBL> 0 "register_operand")
2725 (match_operand:VDC 1 "register_operand")
2726 (match_operand:VDC 2 "register_operand")]
2727 "TARGET_SIMD"
2728 {
2729 rtx op1, op2;
2730 if (BYTES_BIG_ENDIAN)
2731 {
2732 op1 = operands[2];
2733 op2 = operands[1];
2734 }
2735 else
2736 {
2737 op1 = operands[1];
2738 op2 = operands[2];
2739 }
2740 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2741 DONE;
2742 }
2743 )
2744
2745 (define_insn_and_split "aarch64_combine_internal<mode>"
2746 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2747 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2748 (match_operand:VDC 2 "register_operand" "w")))]
2749 "TARGET_SIMD"
2750 "#"
2751 "&& reload_completed"
2752 [(const_int 0)]
2753 {
2754 if (BYTES_BIG_ENDIAN)
2755 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2756 else
2757 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2758 DONE;
2759 }
2760 [(set_attr "type" "multiple")]
2761 )
2762
2763 (define_expand "aarch64_simd_combine<mode>"
2764 [(match_operand:<VDBL> 0 "register_operand")
2765 (match_operand:VDC 1 "register_operand")
2766 (match_operand:VDC 2 "register_operand")]
2767 "TARGET_SIMD"
2768 {
2769 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2770 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2771 DONE;
2772 }
2773 [(set_attr "type" "multiple")]
2774 )
2775
2776 ;; <su><addsub>l<q>.
2777
2778 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2779 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2780 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2781 (match_operand:VQW 1 "register_operand" "w")
2782 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2783 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2784 (match_operand:VQW 2 "register_operand" "w")
2785 (match_dup 3)))))]
2786 "TARGET_SIMD"
2787 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2788 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2789 )
2790
2791 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2792 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2793 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2794 (match_operand:VQW 1 "register_operand" "w")
2795 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2796 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2797 (match_operand:VQW 2 "register_operand" "w")
2798 (match_dup 3)))))]
2799 "TARGET_SIMD"
2800 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2801 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2802 )
2803
2804
2805 (define_expand "aarch64_saddl2<mode>"
2806 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2807 (match_operand:VQW 1 "register_operand" "w")
2808 (match_operand:VQW 2 "register_operand" "w")]
2809 "TARGET_SIMD"
2810 {
2811 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2812 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2813 operands[2], p));
2814 DONE;
2815 })
2816
2817 (define_expand "aarch64_uaddl2<mode>"
2818 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2819 (match_operand:VQW 1 "register_operand" "w")
2820 (match_operand:VQW 2 "register_operand" "w")]
2821 "TARGET_SIMD"
2822 {
2823 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2824 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2825 operands[2], p));
2826 DONE;
2827 })
2828
2829 (define_expand "aarch64_ssubl2<mode>"
2830 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2831 (match_operand:VQW 1 "register_operand" "w")
2832 (match_operand:VQW 2 "register_operand" "w")]
2833 "TARGET_SIMD"
2834 {
2835 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2836 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2837 operands[2], p));
2838 DONE;
2839 })
2840
2841 (define_expand "aarch64_usubl2<mode>"
2842 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2843 (match_operand:VQW 1 "register_operand" "w")
2844 (match_operand:VQW 2 "register_operand" "w")]
2845 "TARGET_SIMD"
2846 {
2847 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2848 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2849 operands[2], p));
2850 DONE;
2851 })
2852
2853 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2854 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2855 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2856 (match_operand:VD_BHSI 1 "register_operand" "w"))
2857 (ANY_EXTEND:<VWIDE>
2858 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2859 "TARGET_SIMD"
2860 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2861 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2862 )
2863
2864 ;; <su><addsub>w<q>.
2865
2866 (define_expand "widen_ssum<mode>3"
2867 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2868 (plus:<VDBLW> (sign_extend:<VDBLW>
2869 (match_operand:VQW 1 "register_operand" ""))
2870 (match_operand:<VDBLW> 2 "register_operand" "")))]
2871 "TARGET_SIMD"
2872 {
2873 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2874 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2875
2876 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2877 operands[1], p));
2878 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2879 DONE;
2880 }
2881 )
2882
2883 (define_expand "widen_ssum<mode>3"
2884 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2885 (plus:<VWIDE> (sign_extend:<VWIDE>
2886 (match_operand:VD_BHSI 1 "register_operand" ""))
2887 (match_operand:<VWIDE> 2 "register_operand" "")))]
2888 "TARGET_SIMD"
2889 {
2890 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2891 DONE;
2892 })
2893
2894 (define_expand "widen_usum<mode>3"
2895 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2896 (plus:<VDBLW> (zero_extend:<VDBLW>
2897 (match_operand:VQW 1 "register_operand" ""))
2898 (match_operand:<VDBLW> 2 "register_operand" "")))]
2899 "TARGET_SIMD"
2900 {
2901 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2902 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2903
2904 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2905 operands[1], p));
2906 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2907 DONE;
2908 }
2909 )
2910
2911 (define_expand "widen_usum<mode>3"
2912 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2913 (plus:<VWIDE> (zero_extend:<VWIDE>
2914 (match_operand:VD_BHSI 1 "register_operand" ""))
2915 (match_operand:<VWIDE> 2 "register_operand" "")))]
2916 "TARGET_SIMD"
2917 {
2918 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2919 DONE;
2920 })
2921
2922 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2923 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2924 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2925 (ANY_EXTEND:<VWIDE>
2926 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2927 "TARGET_SIMD"
2928 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2929 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2930 )
2931
2932 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
2933 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2934 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2935 (ANY_EXTEND:<VWIDE>
2936 (vec_select:<VHALF>
2937 (match_operand:VQW 2 "register_operand" "w")
2938 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
2939 "TARGET_SIMD"
2940 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
2941 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2942 )
2943
2944 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2945 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2946 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2947 (ANY_EXTEND:<VWIDE>
2948 (vec_select:<VHALF>
2949 (match_operand:VQW 2 "register_operand" "w")
2950 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2951 "TARGET_SIMD"
2952 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2953 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2954 )
2955
2956 (define_expand "aarch64_saddw2<mode>"
2957 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2958 (match_operand:<VWIDE> 1 "register_operand" "w")
2959 (match_operand:VQW 2 "register_operand" "w")]
2960 "TARGET_SIMD"
2961 {
2962 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2963 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2964 operands[2], p));
2965 DONE;
2966 })
2967
2968 (define_expand "aarch64_uaddw2<mode>"
2969 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2970 (match_operand:<VWIDE> 1 "register_operand" "w")
2971 (match_operand:VQW 2 "register_operand" "w")]
2972 "TARGET_SIMD"
2973 {
2974 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2975 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2976 operands[2], p));
2977 DONE;
2978 })
2979
2980
2981 (define_expand "aarch64_ssubw2<mode>"
2982 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2983 (match_operand:<VWIDE> 1 "register_operand" "w")
2984 (match_operand:VQW 2 "register_operand" "w")]
2985 "TARGET_SIMD"
2986 {
2987 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2988 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2989 operands[2], p));
2990 DONE;
2991 })
2992
2993 (define_expand "aarch64_usubw2<mode>"
2994 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2995 (match_operand:<VWIDE> 1 "register_operand" "w")
2996 (match_operand:VQW 2 "register_operand" "w")]
2997 "TARGET_SIMD"
2998 {
2999 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3000 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3001 operands[2], p));
3002 DONE;
3003 })
3004
3005 ;; <su><r>h<addsub>.
3006
3007 (define_insn "aarch64_<sur>h<addsub><mode>"
3008 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3009 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3010 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3011 HADDSUB))]
3012 "TARGET_SIMD"
3013 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3014 [(set_attr "type" "neon_<addsub>_halve<q>")]
3015 )
3016
3017 ;; <r><addsub>hn<q>.
3018
3019 (define_insn "aarch64_<sur><addsub>hn<mode>"
3020 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3021 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3022 (match_operand:VQN 2 "register_operand" "w")]
3023 ADDSUBHN))]
3024 "TARGET_SIMD"
3025 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3026 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3027 )
3028
3029 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3030 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3031 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3032 (match_operand:VQN 2 "register_operand" "w")
3033 (match_operand:VQN 3 "register_operand" "w")]
3034 ADDSUBHN2))]
3035 "TARGET_SIMD"
3036 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3037 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3038 )
3039
3040 ;; pmul.
3041
3042 (define_insn "aarch64_pmul<mode>"
3043 [(set (match_operand:VB 0 "register_operand" "=w")
3044 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3045 (match_operand:VB 2 "register_operand" "w")]
3046 UNSPEC_PMUL))]
3047 "TARGET_SIMD"
3048 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3049 [(set_attr "type" "neon_mul_<Vetype><q>")]
3050 )
3051
3052 ;; fmulx.
3053
3054 (define_insn "aarch64_fmulx<mode>"
3055 [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
3056 (unspec:VHSDF_SDF
3057 [(match_operand:VHSDF_SDF 1 "register_operand" "w")
3058 (match_operand:VHSDF_SDF 2 "register_operand" "w")]
3059 UNSPEC_FMULX))]
3060 "TARGET_SIMD"
3061 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3062 [(set_attr "type" "neon_fp_mul_<stype>")]
3063 )
3064
3065 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3066
3067 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3068 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3069 (unspec:VDQSF
3070 [(match_operand:VDQSF 1 "register_operand" "w")
3071 (vec_duplicate:VDQSF
3072 (vec_select:<VEL>
3073 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3074 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3075 UNSPEC_FMULX))]
3076 "TARGET_SIMD"
3077 {
3078 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3079 INTVAL (operands[3])));
3080 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3081 }
3082 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3083 )
3084
3085 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3086
3087 (define_insn "*aarch64_mulx_elt<mode>"
3088 [(set (match_operand:VDQF 0 "register_operand" "=w")
3089 (unspec:VDQF
3090 [(match_operand:VDQF 1 "register_operand" "w")
3091 (vec_duplicate:VDQF
3092 (vec_select:<VEL>
3093 (match_operand:VDQF 2 "register_operand" "w")
3094 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3095 UNSPEC_FMULX))]
3096 "TARGET_SIMD"
3097 {
3098 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3099 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3100 }
3101 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3102 )
3103
3104 ;; vmulxq_lane
3105
3106 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3107 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3108 (unspec:VHSDF
3109 [(match_operand:VHSDF 1 "register_operand" "w")
3110 (vec_duplicate:VHSDF
3111 (match_operand:<VEL> 2 "register_operand" "w"))]
3112 UNSPEC_FMULX))]
3113 "TARGET_SIMD"
3114 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3115 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3116 )
3117
3118 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3119 ;; vmulxd_lane_f64 == vmulx_lane_f64
3120 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3121
3122 (define_insn "*aarch64_vgetfmulx<mode>"
3123 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3124 (unspec:<VEL>
3125 [(match_operand:<VEL> 1 "register_operand" "w")
3126 (vec_select:<VEL>
3127 (match_operand:VDQF_DF 2 "register_operand" "w")
3128 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3129 UNSPEC_FMULX))]
3130 "TARGET_SIMD"
3131 {
3132 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3133 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3134 }
3135 [(set_attr "type" "fmul<Vetype>")]
3136 )
3137 ;; <su>q<addsub>
3138
3139 (define_insn "aarch64_<su_optab><optab><mode>"
3140 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3141 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3142 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3143 "TARGET_SIMD"
3144 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3145 [(set_attr "type" "neon_<optab><q>")]
3146 )
3147
3148 ;; suqadd and usqadd
3149
3150 (define_insn "aarch64_<sur>qadd<mode>"
3151 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3152 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3153 (match_operand:VSDQ_I 2 "register_operand" "w")]
3154 USSUQADD))]
3155 "TARGET_SIMD"
3156 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3157 [(set_attr "type" "neon_qadd<q>")]
3158 )
3159
3160 ;; sqmovun
3161
3162 (define_insn "aarch64_sqmovun<mode>"
3163 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3164 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3165 UNSPEC_SQXTUN))]
3166 "TARGET_SIMD"
3167 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3168 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3169 )
3170
3171 ;; sqmovn and uqmovn
3172
3173 (define_insn "aarch64_<sur>qmovn<mode>"
3174 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3175 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3176 SUQMOVN))]
3177 "TARGET_SIMD"
3178 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3179 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3180 )
3181
3182 ;; <su>q<absneg>
3183
3184 (define_insn "aarch64_s<optab><mode>"
3185 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3186 (UNQOPS:VSDQ_I
3187 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3188 "TARGET_SIMD"
3189 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3190 [(set_attr "type" "neon_<optab><q>")]
3191 )
3192
3193 ;; sq<r>dmulh.
3194
3195 (define_insn "aarch64_sq<r>dmulh<mode>"
3196 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3197 (unspec:VSDQ_HSI
3198 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3199 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3200 VQDMULH))]
3201 "TARGET_SIMD"
3202 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3203 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3204 )
3205
3206 ;; sq<r>dmulh_lane
3207
3208 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3209 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3210 (unspec:VDQHS
3211 [(match_operand:VDQHS 1 "register_operand" "w")
3212 (vec_select:<VEL>
3213 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3214 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3215 VQDMULH))]
3216 "TARGET_SIMD"
3217 "*
3218 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3219 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3220 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3221 )
3222
3223 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3224 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3225 (unspec:VDQHS
3226 [(match_operand:VDQHS 1 "register_operand" "w")
3227 (vec_select:<VEL>
3228 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3229 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3230 VQDMULH))]
3231 "TARGET_SIMD"
3232 "*
3233 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3234 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3235 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3236 )
3237
3238 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3239 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3240 (unspec:SD_HSI
3241 [(match_operand:SD_HSI 1 "register_operand" "w")
3242 (vec_select:<VEL>
3243 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3244 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3245 VQDMULH))]
3246 "TARGET_SIMD"
3247 "*
3248 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3249 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3250 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3251 )
3252
3253 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3254 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3255 (unspec:SD_HSI
3256 [(match_operand:SD_HSI 1 "register_operand" "w")
3257 (vec_select:<VEL>
3258 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3259 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3260 VQDMULH))]
3261 "TARGET_SIMD"
3262 "*
3263 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3264 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3265 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3266 )
3267
3268 ;; sqrdml[as]h.
3269
3270 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3271 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3272 (unspec:VSDQ_HSI
3273 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3274 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3275 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3276 SQRDMLH_AS))]
3277 "TARGET_SIMD_RDMA"
3278 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3279 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3280 )
3281
3282 ;; sqrdml[as]h_lane.
3283
3284 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3285 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3286 (unspec:VDQHS
3287 [(match_operand:VDQHS 1 "register_operand" "0")
3288 (match_operand:VDQHS 2 "register_operand" "w")
3289 (vec_select:<VEL>
3290 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3291 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3292 SQRDMLH_AS))]
3293 "TARGET_SIMD_RDMA"
3294 {
3295 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3296 return
3297 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3298 }
3299 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3300 )
3301
3302 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3303 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3304 (unspec:SD_HSI
3305 [(match_operand:SD_HSI 1 "register_operand" "0")
3306 (match_operand:SD_HSI 2 "register_operand" "w")
3307 (vec_select:<VEL>
3308 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3309 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3310 SQRDMLH_AS))]
3311 "TARGET_SIMD_RDMA"
3312 {
3313 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3314 return
3315 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3316 }
3317 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3318 )
3319
3320 ;; sqrdml[as]h_laneq.
3321
3322 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3323 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3324 (unspec:VDQHS
3325 [(match_operand:VDQHS 1 "register_operand" "0")
3326 (match_operand:VDQHS 2 "register_operand" "w")
3327 (vec_select:<VEL>
3328 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3329 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3330 SQRDMLH_AS))]
3331 "TARGET_SIMD_RDMA"
3332 {
3333 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3334 return
3335 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3336 }
3337 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3338 )
3339
3340 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3341 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3342 (unspec:SD_HSI
3343 [(match_operand:SD_HSI 1 "register_operand" "0")
3344 (match_operand:SD_HSI 2 "register_operand" "w")
3345 (vec_select:<VEL>
3346 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3347 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3348 SQRDMLH_AS))]
3349 "TARGET_SIMD_RDMA"
3350 {
3351 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3352 return
3353 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3354 }
3355 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3356 )
3357
3358 ;; vqdml[sa]l
3359
3360 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3361 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3362 (SBINQOPS:<VWIDE>
3363 (match_operand:<VWIDE> 1 "register_operand" "0")
3364 (ss_ashift:<VWIDE>
3365 (mult:<VWIDE>
3366 (sign_extend:<VWIDE>
3367 (match_operand:VSD_HSI 2 "register_operand" "w"))
3368 (sign_extend:<VWIDE>
3369 (match_operand:VSD_HSI 3 "register_operand" "w")))
3370 (const_int 1))))]
3371 "TARGET_SIMD"
3372 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3373 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3374 )
3375
3376 ;; vqdml[sa]l_lane
3377
3378 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3379 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3380 (SBINQOPS:<VWIDE>
3381 (match_operand:<VWIDE> 1 "register_operand" "0")
3382 (ss_ashift:<VWIDE>
3383 (mult:<VWIDE>
3384 (sign_extend:<VWIDE>
3385 (match_operand:VD_HSI 2 "register_operand" "w"))
3386 (sign_extend:<VWIDE>
3387 (vec_duplicate:VD_HSI
3388 (vec_select:<VEL>
3389 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3390 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3391 ))
3392 (const_int 1))))]
3393 "TARGET_SIMD"
3394 {
3395 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3396 return
3397 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3398 }
3399 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3400 )
3401
3402 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3403 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3404 (SBINQOPS:<VWIDE>
3405 (match_operand:<VWIDE> 1 "register_operand" "0")
3406 (ss_ashift:<VWIDE>
3407 (mult:<VWIDE>
3408 (sign_extend:<VWIDE>
3409 (match_operand:VD_HSI 2 "register_operand" "w"))
3410 (sign_extend:<VWIDE>
3411 (vec_duplicate:VD_HSI
3412 (vec_select:<VEL>
3413 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3414 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3415 ))
3416 (const_int 1))))]
3417 "TARGET_SIMD"
3418 {
3419 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3420 return
3421 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3422 }
3423 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3424 )
3425
3426 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3427 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3428 (SBINQOPS:<VWIDE>
3429 (match_operand:<VWIDE> 1 "register_operand" "0")
3430 (ss_ashift:<VWIDE>
3431 (mult:<VWIDE>
3432 (sign_extend:<VWIDE>
3433 (match_operand:SD_HSI 2 "register_operand" "w"))
3434 (sign_extend:<VWIDE>
3435 (vec_select:<VEL>
3436 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3437 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3438 )
3439 (const_int 1))))]
3440 "TARGET_SIMD"
3441 {
3442 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3443 return
3444 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3445 }
3446 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3447 )
3448
3449 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3450 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3451 (SBINQOPS:<VWIDE>
3452 (match_operand:<VWIDE> 1 "register_operand" "0")
3453 (ss_ashift:<VWIDE>
3454 (mult:<VWIDE>
3455 (sign_extend:<VWIDE>
3456 (match_operand:SD_HSI 2 "register_operand" "w"))
3457 (sign_extend:<VWIDE>
3458 (vec_select:<VEL>
3459 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3460 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3461 )
3462 (const_int 1))))]
3463 "TARGET_SIMD"
3464 {
3465 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3466 return
3467 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3468 }
3469 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3470 )
3471
3472 ;; vqdml[sa]l_n
3473
3474 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3475 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3476 (SBINQOPS:<VWIDE>
3477 (match_operand:<VWIDE> 1 "register_operand" "0")
3478 (ss_ashift:<VWIDE>
3479 (mult:<VWIDE>
3480 (sign_extend:<VWIDE>
3481 (match_operand:VD_HSI 2 "register_operand" "w"))
3482 (sign_extend:<VWIDE>
3483 (vec_duplicate:VD_HSI
3484 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3485 (const_int 1))))]
3486 "TARGET_SIMD"
3487 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3488 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3489 )
3490
3491 ;; sqdml[as]l2
3492
3493 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3494 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3495 (SBINQOPS:<VWIDE>
3496 (match_operand:<VWIDE> 1 "register_operand" "0")
3497 (ss_ashift:<VWIDE>
3498 (mult:<VWIDE>
3499 (sign_extend:<VWIDE>
3500 (vec_select:<VHALF>
3501 (match_operand:VQ_HSI 2 "register_operand" "w")
3502 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3503 (sign_extend:<VWIDE>
3504 (vec_select:<VHALF>
3505 (match_operand:VQ_HSI 3 "register_operand" "w")
3506 (match_dup 4))))
3507 (const_int 1))))]
3508 "TARGET_SIMD"
3509 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3510 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3511 )
3512
3513 (define_expand "aarch64_sqdmlal2<mode>"
3514 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3515 (match_operand:<VWIDE> 1 "register_operand" "w")
3516 (match_operand:VQ_HSI 2 "register_operand" "w")
3517 (match_operand:VQ_HSI 3 "register_operand" "w")]
3518 "TARGET_SIMD"
3519 {
3520 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3521 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3522 operands[2], operands[3], p));
3523 DONE;
3524 })
3525
3526 (define_expand "aarch64_sqdmlsl2<mode>"
3527 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3528 (match_operand:<VWIDE> 1 "register_operand" "w")
3529 (match_operand:VQ_HSI 2 "register_operand" "w")
3530 (match_operand:VQ_HSI 3 "register_operand" "w")]
3531 "TARGET_SIMD"
3532 {
3533 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3534 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3535 operands[2], operands[3], p));
3536 DONE;
3537 })
3538
3539 ;; vqdml[sa]l2_lane
3540
3541 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3542 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3543 (SBINQOPS:<VWIDE>
3544 (match_operand:<VWIDE> 1 "register_operand" "0")
3545 (ss_ashift:<VWIDE>
3546 (mult:<VWIDE>
3547 (sign_extend:<VWIDE>
3548 (vec_select:<VHALF>
3549 (match_operand:VQ_HSI 2 "register_operand" "w")
3550 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3551 (sign_extend:<VWIDE>
3552 (vec_duplicate:<VHALF>
3553 (vec_select:<VEL>
3554 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3555 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3556 ))))
3557 (const_int 1))))]
3558 "TARGET_SIMD"
3559 {
3560 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3561 return
3562 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3563 }
3564 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3565 )
3566
3567 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3568 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3569 (SBINQOPS:<VWIDE>
3570 (match_operand:<VWIDE> 1 "register_operand" "0")
3571 (ss_ashift:<VWIDE>
3572 (mult:<VWIDE>
3573 (sign_extend:<VWIDE>
3574 (vec_select:<VHALF>
3575 (match_operand:VQ_HSI 2 "register_operand" "w")
3576 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3577 (sign_extend:<VWIDE>
3578 (vec_duplicate:<VHALF>
3579 (vec_select:<VEL>
3580 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3581 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3582 ))))
3583 (const_int 1))))]
3584 "TARGET_SIMD"
3585 {
3586 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3587 return
3588 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3589 }
3590 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3591 )
3592
3593 (define_expand "aarch64_sqdmlal2_lane<mode>"
3594 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3595 (match_operand:<VWIDE> 1 "register_operand" "w")
3596 (match_operand:VQ_HSI 2 "register_operand" "w")
3597 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3598 (match_operand:SI 4 "immediate_operand" "i")]
3599 "TARGET_SIMD"
3600 {
3601 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3602 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3603 operands[2], operands[3],
3604 operands[4], p));
3605 DONE;
3606 })
3607
3608 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3609 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3610 (match_operand:<VWIDE> 1 "register_operand" "w")
3611 (match_operand:VQ_HSI 2 "register_operand" "w")
3612 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3613 (match_operand:SI 4 "immediate_operand" "i")]
3614 "TARGET_SIMD"
3615 {
3616 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3617 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3618 operands[2], operands[3],
3619 operands[4], p));
3620 DONE;
3621 })
3622
3623 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3624 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3625 (match_operand:<VWIDE> 1 "register_operand" "w")
3626 (match_operand:VQ_HSI 2 "register_operand" "w")
3627 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3628 (match_operand:SI 4 "immediate_operand" "i")]
3629 "TARGET_SIMD"
3630 {
3631 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3632 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3633 operands[2], operands[3],
3634 operands[4], p));
3635 DONE;
3636 })
3637
3638 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3639 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3640 (match_operand:<VWIDE> 1 "register_operand" "w")
3641 (match_operand:VQ_HSI 2 "register_operand" "w")
3642 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3643 (match_operand:SI 4 "immediate_operand" "i")]
3644 "TARGET_SIMD"
3645 {
3646 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3647 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3648 operands[2], operands[3],
3649 operands[4], p));
3650 DONE;
3651 })
3652
3653 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3654 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3655 (SBINQOPS:<VWIDE>
3656 (match_operand:<VWIDE> 1 "register_operand" "0")
3657 (ss_ashift:<VWIDE>
3658 (mult:<VWIDE>
3659 (sign_extend:<VWIDE>
3660 (vec_select:<VHALF>
3661 (match_operand:VQ_HSI 2 "register_operand" "w")
3662 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3663 (sign_extend:<VWIDE>
3664 (vec_duplicate:<VHALF>
3665 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3666 (const_int 1))))]
3667 "TARGET_SIMD"
3668 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3669 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3670 )
3671
3672 (define_expand "aarch64_sqdmlal2_n<mode>"
3673 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3674 (match_operand:<VWIDE> 1 "register_operand" "w")
3675 (match_operand:VQ_HSI 2 "register_operand" "w")
3676 (match_operand:<VEL> 3 "register_operand" "w")]
3677 "TARGET_SIMD"
3678 {
3679 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3680 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3681 operands[2], operands[3],
3682 p));
3683 DONE;
3684 })
3685
3686 (define_expand "aarch64_sqdmlsl2_n<mode>"
3687 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3688 (match_operand:<VWIDE> 1 "register_operand" "w")
3689 (match_operand:VQ_HSI 2 "register_operand" "w")
3690 (match_operand:<VEL> 3 "register_operand" "w")]
3691 "TARGET_SIMD"
3692 {
3693 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3694 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3695 operands[2], operands[3],
3696 p));
3697 DONE;
3698 })
3699
3700 ;; vqdmull
3701
3702 (define_insn "aarch64_sqdmull<mode>"
3703 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3704 (ss_ashift:<VWIDE>
3705 (mult:<VWIDE>
3706 (sign_extend:<VWIDE>
3707 (match_operand:VSD_HSI 1 "register_operand" "w"))
3708 (sign_extend:<VWIDE>
3709 (match_operand:VSD_HSI 2 "register_operand" "w")))
3710 (const_int 1)))]
3711 "TARGET_SIMD"
3712 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3713 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3714 )
3715
3716 ;; vqdmull_lane
3717
3718 (define_insn "aarch64_sqdmull_lane<mode>"
3719 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3720 (ss_ashift:<VWIDE>
3721 (mult:<VWIDE>
3722 (sign_extend:<VWIDE>
3723 (match_operand:VD_HSI 1 "register_operand" "w"))
3724 (sign_extend:<VWIDE>
3725 (vec_duplicate:VD_HSI
3726 (vec_select:<VEL>
3727 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3728 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3729 ))
3730 (const_int 1)))]
3731 "TARGET_SIMD"
3732 {
3733 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3734 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3735 }
3736 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3737 )
3738
3739 (define_insn "aarch64_sqdmull_laneq<mode>"
3740 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3741 (ss_ashift:<VWIDE>
3742 (mult:<VWIDE>
3743 (sign_extend:<VWIDE>
3744 (match_operand:VD_HSI 1 "register_operand" "w"))
3745 (sign_extend:<VWIDE>
3746 (vec_duplicate:VD_HSI
3747 (vec_select:<VEL>
3748 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3749 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3750 ))
3751 (const_int 1)))]
3752 "TARGET_SIMD"
3753 {
3754 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3755 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3756 }
3757 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3758 )
3759
3760 (define_insn "aarch64_sqdmull_lane<mode>"
3761 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3762 (ss_ashift:<VWIDE>
3763 (mult:<VWIDE>
3764 (sign_extend:<VWIDE>
3765 (match_operand:SD_HSI 1 "register_operand" "w"))
3766 (sign_extend:<VWIDE>
3767 (vec_select:<VEL>
3768 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3769 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3770 ))
3771 (const_int 1)))]
3772 "TARGET_SIMD"
3773 {
3774 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3775 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3776 }
3777 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3778 )
3779
3780 (define_insn "aarch64_sqdmull_laneq<mode>"
3781 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3782 (ss_ashift:<VWIDE>
3783 (mult:<VWIDE>
3784 (sign_extend:<VWIDE>
3785 (match_operand:SD_HSI 1 "register_operand" "w"))
3786 (sign_extend:<VWIDE>
3787 (vec_select:<VEL>
3788 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3789 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3790 ))
3791 (const_int 1)))]
3792 "TARGET_SIMD"
3793 {
3794 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3795 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3796 }
3797 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3798 )
3799
3800 ;; vqdmull_n
3801
3802 (define_insn "aarch64_sqdmull_n<mode>"
3803 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3804 (ss_ashift:<VWIDE>
3805 (mult:<VWIDE>
3806 (sign_extend:<VWIDE>
3807 (match_operand:VD_HSI 1 "register_operand" "w"))
3808 (sign_extend:<VWIDE>
3809 (vec_duplicate:VD_HSI
3810 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3811 )
3812 (const_int 1)))]
3813 "TARGET_SIMD"
3814 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3815 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3816 )
3817
3818 ;; vqdmull2
3819
3820
3821
3822 (define_insn "aarch64_sqdmull2<mode>_internal"
3823 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3824 (ss_ashift:<VWIDE>
3825 (mult:<VWIDE>
3826 (sign_extend:<VWIDE>
3827 (vec_select:<VHALF>
3828 (match_operand:VQ_HSI 1 "register_operand" "w")
3829 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3830 (sign_extend:<VWIDE>
3831 (vec_select:<VHALF>
3832 (match_operand:VQ_HSI 2 "register_operand" "w")
3833 (match_dup 3)))
3834 )
3835 (const_int 1)))]
3836 "TARGET_SIMD"
3837 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3838 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3839 )
3840
3841 (define_expand "aarch64_sqdmull2<mode>"
3842 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3843 (match_operand:VQ_HSI 1 "register_operand" "w")
3844 (match_operand:VQ_HSI 2 "register_operand" "w")]
3845 "TARGET_SIMD"
3846 {
3847 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3848 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3849 operands[2], p));
3850 DONE;
3851 })
3852
3853 ;; vqdmull2_lane
3854
3855 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3856 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3857 (ss_ashift:<VWIDE>
3858 (mult:<VWIDE>
3859 (sign_extend:<VWIDE>
3860 (vec_select:<VHALF>
3861 (match_operand:VQ_HSI 1 "register_operand" "w")
3862 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3863 (sign_extend:<VWIDE>
3864 (vec_duplicate:<VHALF>
3865 (vec_select:<VEL>
3866 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3867 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3868 ))
3869 (const_int 1)))]
3870 "TARGET_SIMD"
3871 {
3872 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3873 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3874 }
3875 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3876 )
3877
3878 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3879 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3880 (ss_ashift:<VWIDE>
3881 (mult:<VWIDE>
3882 (sign_extend:<VWIDE>
3883 (vec_select:<VHALF>
3884 (match_operand:VQ_HSI 1 "register_operand" "w")
3885 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3886 (sign_extend:<VWIDE>
3887 (vec_duplicate:<VHALF>
3888 (vec_select:<VEL>
3889 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3890 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3891 ))
3892 (const_int 1)))]
3893 "TARGET_SIMD"
3894 {
3895 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3896 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3897 }
3898 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3899 )
3900
3901 (define_expand "aarch64_sqdmull2_lane<mode>"
3902 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3903 (match_operand:VQ_HSI 1 "register_operand" "w")
3904 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3905 (match_operand:SI 3 "immediate_operand" "i")]
3906 "TARGET_SIMD"
3907 {
3908 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3909 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3910 operands[2], operands[3],
3911 p));
3912 DONE;
3913 })
3914
3915 (define_expand "aarch64_sqdmull2_laneq<mode>"
3916 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3917 (match_operand:VQ_HSI 1 "register_operand" "w")
3918 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3919 (match_operand:SI 3 "immediate_operand" "i")]
3920 "TARGET_SIMD"
3921 {
3922 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3923 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3924 operands[2], operands[3],
3925 p));
3926 DONE;
3927 })
3928
3929 ;; vqdmull2_n
3930
3931 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3932 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3933 (ss_ashift:<VWIDE>
3934 (mult:<VWIDE>
3935 (sign_extend:<VWIDE>
3936 (vec_select:<VHALF>
3937 (match_operand:VQ_HSI 1 "register_operand" "w")
3938 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3939 (sign_extend:<VWIDE>
3940 (vec_duplicate:<VHALF>
3941 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3942 )
3943 (const_int 1)))]
3944 "TARGET_SIMD"
3945 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3946 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3947 )
3948
3949 (define_expand "aarch64_sqdmull2_n<mode>"
3950 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3951 (match_operand:VQ_HSI 1 "register_operand" "w")
3952 (match_operand:<VEL> 2 "register_operand" "w")]
3953 "TARGET_SIMD"
3954 {
3955 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3956 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3957 operands[2], p));
3958 DONE;
3959 })
3960
3961 ;; vshl
3962
3963 (define_insn "aarch64_<sur>shl<mode>"
3964 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3965 (unspec:VSDQ_I_DI
3966 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3967 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3968 VSHL))]
3969 "TARGET_SIMD"
3970 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3971 [(set_attr "type" "neon_shift_reg<q>")]
3972 )
3973
3974
3975 ;; vqshl
3976
3977 (define_insn "aarch64_<sur>q<r>shl<mode>"
3978 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3979 (unspec:VSDQ_I
3980 [(match_operand:VSDQ_I 1 "register_operand" "w")
3981 (match_operand:VSDQ_I 2 "register_operand" "w")]
3982 VQSHL))]
3983 "TARGET_SIMD"
3984 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3985 [(set_attr "type" "neon_sat_shift_reg<q>")]
3986 )
3987
3988 ;; vshll_n
3989
3990 (define_insn "aarch64_<sur>shll_n<mode>"
3991 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3992 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3993 (match_operand:SI 2
3994 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3995 VSHLL))]
3996 "TARGET_SIMD"
3997 {
3998 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
3999 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4000 else
4001 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4002 }
4003 [(set_attr "type" "neon_shift_imm_long")]
4004 )
4005
4006 ;; vshll_high_n
4007
4008 (define_insn "aarch64_<sur>shll2_n<mode>"
4009 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4010 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4011 (match_operand:SI 2 "immediate_operand" "i")]
4012 VSHLL))]
4013 "TARGET_SIMD"
4014 {
4015 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4016 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4017 else
4018 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4019 }
4020 [(set_attr "type" "neon_shift_imm_long")]
4021 )
4022
4023 ;; vrshr_n
4024
4025 (define_insn "aarch64_<sur>shr_n<mode>"
4026 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4027 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4028 (match_operand:SI 2
4029 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4030 VRSHR_N))]
4031 "TARGET_SIMD"
4032 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4033 [(set_attr "type" "neon_sat_shift_imm<q>")]
4034 )
4035
4036 ;; v(r)sra_n
4037
4038 (define_insn "aarch64_<sur>sra_n<mode>"
4039 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4040 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4041 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4042 (match_operand:SI 3
4043 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4044 VSRA))]
4045 "TARGET_SIMD"
4046 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4047 [(set_attr "type" "neon_shift_acc<q>")]
4048 )
4049
4050 ;; vs<lr>i_n
4051
4052 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4053 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4054 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4055 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4056 (match_operand:SI 3
4057 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4058 VSLRI))]
4059 "TARGET_SIMD"
4060 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4061 [(set_attr "type" "neon_shift_imm<q>")]
4062 )
4063
4064 ;; vqshl(u)
4065
4066 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4067 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4068 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4069 (match_operand:SI 2
4070 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4071 VQSHL_N))]
4072 "TARGET_SIMD"
4073 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4074 [(set_attr "type" "neon_sat_shift_imm<q>")]
4075 )
4076
4077
4078 ;; vq(r)shr(u)n_n
4079
4080 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4081 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4082 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4083 (match_operand:SI 2
4084 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4085 VQSHRN_N))]
4086 "TARGET_SIMD"
4087 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4088 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4089 )
4090
4091
4092 ;; cm(eq|ge|gt|lt|le)
4093 ;; Note, we have constraints for Dz and Z as different expanders
4094 ;; have different ideas of what should be passed to this pattern.
4095
4096 (define_insn "aarch64_cm<optab><mode>"
4097 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4098 (neg:<V_cmp_result>
4099 (COMPARISONS:<V_cmp_result>
4100 (match_operand:VDQ_I 1 "register_operand" "w,w")
4101 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4102 )))]
4103 "TARGET_SIMD"
4104 "@
4105 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4106 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4107 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4108 )
4109
4110 (define_insn_and_split "aarch64_cm<optab>di"
4111 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4112 (neg:DI
4113 (COMPARISONS:DI
4114 (match_operand:DI 1 "register_operand" "w,w,r")
4115 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4116 )))
4117 (clobber (reg:CC CC_REGNUM))]
4118 "TARGET_SIMD"
4119 "#"
4120 "reload_completed"
4121 [(set (match_operand:DI 0 "register_operand")
4122 (neg:DI
4123 (COMPARISONS:DI
4124 (match_operand:DI 1 "register_operand")
4125 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4126 )))]
4127 {
4128 /* If we are in the general purpose register file,
4129 we split to a sequence of comparison and store. */
4130 if (GP_REGNUM_P (REGNO (operands[0]))
4131 && GP_REGNUM_P (REGNO (operands[1])))
4132 {
4133 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4134 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4135 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4136 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4137 DONE;
4138 }
4139 /* Otherwise, we expand to a similar pattern which does not
4140 clobber CC_REGNUM. */
4141 }
4142 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4143 )
4144
4145 (define_insn "*aarch64_cm<optab>di"
4146 [(set (match_operand:DI 0 "register_operand" "=w,w")
4147 (neg:DI
4148 (COMPARISONS:DI
4149 (match_operand:DI 1 "register_operand" "w,w")
4150 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4151 )))]
4152 "TARGET_SIMD && reload_completed"
4153 "@
4154 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4155 cm<optab>\t%d0, %d1, #0"
4156 [(set_attr "type" "neon_compare, neon_compare_zero")]
4157 )
4158
4159 ;; cm(hs|hi)
4160
4161 (define_insn "aarch64_cm<optab><mode>"
4162 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4163 (neg:<V_cmp_result>
4164 (UCOMPARISONS:<V_cmp_result>
4165 (match_operand:VDQ_I 1 "register_operand" "w")
4166 (match_operand:VDQ_I 2 "register_operand" "w")
4167 )))]
4168 "TARGET_SIMD"
4169 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4170 [(set_attr "type" "neon_compare<q>")]
4171 )
4172
4173 (define_insn_and_split "aarch64_cm<optab>di"
4174 [(set (match_operand:DI 0 "register_operand" "=w,r")
4175 (neg:DI
4176 (UCOMPARISONS:DI
4177 (match_operand:DI 1 "register_operand" "w,r")
4178 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4179 )))
4180 (clobber (reg:CC CC_REGNUM))]
4181 "TARGET_SIMD"
4182 "#"
4183 "reload_completed"
4184 [(set (match_operand:DI 0 "register_operand")
4185 (neg:DI
4186 (UCOMPARISONS:DI
4187 (match_operand:DI 1 "register_operand")
4188 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4189 )))]
4190 {
4191 /* If we are in the general purpose register file,
4192 we split to a sequence of comparison and store. */
4193 if (GP_REGNUM_P (REGNO (operands[0]))
4194 && GP_REGNUM_P (REGNO (operands[1])))
4195 {
4196 machine_mode mode = CCmode;
4197 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4198 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4199 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4200 DONE;
4201 }
4202 /* Otherwise, we expand to a similar pattern which does not
4203 clobber CC_REGNUM. */
4204 }
4205 [(set_attr "type" "neon_compare,multiple")]
4206 )
4207
4208 (define_insn "*aarch64_cm<optab>di"
4209 [(set (match_operand:DI 0 "register_operand" "=w")
4210 (neg:DI
4211 (UCOMPARISONS:DI
4212 (match_operand:DI 1 "register_operand" "w")
4213 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4214 )))]
4215 "TARGET_SIMD && reload_completed"
4216 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4217 [(set_attr "type" "neon_compare")]
4218 )
4219
4220 ;; cmtst
4221
4222 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4223 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
4224 ;; not (neg (eq (and x y) 0))
4225 ;; which is rewritten by simplify_rtx as
4226 ;; plus (eq (and x y) 0) -1.
4227
4228 (define_insn "aarch64_cmtst<mode>"
4229 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4230 (plus:<V_cmp_result>
4231 (eq:<V_cmp_result>
4232 (and:VDQ_I
4233 (match_operand:VDQ_I 1 "register_operand" "w")
4234 (match_operand:VDQ_I 2 "register_operand" "w"))
4235 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4236 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4237 ]
4238 "TARGET_SIMD"
4239 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4240 [(set_attr "type" "neon_tst<q>")]
4241 )
4242
4243 (define_insn_and_split "aarch64_cmtstdi"
4244 [(set (match_operand:DI 0 "register_operand" "=w,r")
4245 (neg:DI
4246 (ne:DI
4247 (and:DI
4248 (match_operand:DI 1 "register_operand" "w,r")
4249 (match_operand:DI 2 "register_operand" "w,r"))
4250 (const_int 0))))
4251 (clobber (reg:CC CC_REGNUM))]
4252 "TARGET_SIMD"
4253 "#"
4254 "reload_completed"
4255 [(set (match_operand:DI 0 "register_operand")
4256 (neg:DI
4257 (ne:DI
4258 (and:DI
4259 (match_operand:DI 1 "register_operand")
4260 (match_operand:DI 2 "register_operand"))
4261 (const_int 0))))]
4262 {
4263 /* If we are in the general purpose register file,
4264 we split to a sequence of comparison and store. */
4265 if (GP_REGNUM_P (REGNO (operands[0]))
4266 && GP_REGNUM_P (REGNO (operands[1])))
4267 {
4268 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4269 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4270 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4271 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4272 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4273 DONE;
4274 }
4275 /* Otherwise, we expand to a similar pattern which does not
4276 clobber CC_REGNUM. */
4277 }
4278 [(set_attr "type" "neon_tst,multiple")]
4279 )
4280
4281 (define_insn "*aarch64_cmtstdi"
4282 [(set (match_operand:DI 0 "register_operand" "=w")
4283 (neg:DI
4284 (ne:DI
4285 (and:DI
4286 (match_operand:DI 1 "register_operand" "w")
4287 (match_operand:DI 2 "register_operand" "w"))
4288 (const_int 0))))]
4289 "TARGET_SIMD"
4290 "cmtst\t%d0, %d1, %d2"
4291 [(set_attr "type" "neon_tst")]
4292 )
4293
4294 ;; fcm(eq|ge|gt|le|lt)
4295
4296 (define_insn "aarch64_cm<optab><mode>"
4297 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4298 (neg:<V_cmp_result>
4299 (COMPARISONS:<V_cmp_result>
4300 (match_operand:VHSDF_SDF 1 "register_operand" "w,w")
4301 (match_operand:VHSDF_SDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4302 )))]
4303 "TARGET_SIMD"
4304 "@
4305 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4306 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4307 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4308 )
4309
4310 ;; fac(ge|gt)
4311 ;; Note we can also handle what would be fac(le|lt) by
4312 ;; generating fac(ge|gt).
4313
4314 (define_insn "aarch64_fac<optab><mode>"
4315 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4316 (neg:<V_cmp_result>
4317 (FAC_COMPARISONS:<V_cmp_result>
4318 (abs:VHSDF_SDF
4319 (match_operand:VHSDF_SDF 1 "register_operand" "w"))
4320 (abs:VHSDF_SDF
4321 (match_operand:VHSDF_SDF 2 "register_operand" "w"))
4322 )))]
4323 "TARGET_SIMD"
4324 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4325 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4326 )
4327
4328 ;; addp
4329
4330 (define_insn "aarch64_addp<mode>"
4331 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4332 (unspec:VD_BHSI
4333 [(match_operand:VD_BHSI 1 "register_operand" "w")
4334 (match_operand:VD_BHSI 2 "register_operand" "w")]
4335 UNSPEC_ADDP))]
4336 "TARGET_SIMD"
4337 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4338 [(set_attr "type" "neon_reduc_add<q>")]
4339 )
4340
4341 (define_insn "aarch64_addpdi"
4342 [(set (match_operand:DI 0 "register_operand" "=w")
4343 (unspec:DI
4344 [(match_operand:V2DI 1 "register_operand" "w")]
4345 UNSPEC_ADDP))]
4346 "TARGET_SIMD"
4347 "addp\t%d0, %1.2d"
4348 [(set_attr "type" "neon_reduc_add")]
4349 )
4350
4351 ;; sqrt
4352
4353 (define_expand "sqrt<mode>2"
4354 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4355 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4356 "TARGET_SIMD"
4357 {
4358 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4359 DONE;
4360 })
4361
4362 (define_insn "*sqrt<mode>2"
4363 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4364 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4365 "TARGET_SIMD"
4366 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4367 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4368 )
4369
4370 ;; Patterns for vector struct loads and stores.
4371
4372 (define_insn "aarch64_simd_ld2<mode>"
4373 [(set (match_operand:OI 0 "register_operand" "=w")
4374 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4375 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4376 UNSPEC_LD2))]
4377 "TARGET_SIMD"
4378 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4379 [(set_attr "type" "neon_load2_2reg<q>")]
4380 )
4381
4382 (define_insn "aarch64_simd_ld2r<mode>"
4383 [(set (match_operand:OI 0 "register_operand" "=w")
4384 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4385 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4386 UNSPEC_LD2_DUP))]
4387 "TARGET_SIMD"
4388 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4389 [(set_attr "type" "neon_load2_all_lanes<q>")]
4390 )
4391
4392 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4393 [(set (match_operand:OI 0 "register_operand" "=w")
4394 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4395 (match_operand:OI 2 "register_operand" "0")
4396 (match_operand:SI 3 "immediate_operand" "i")
4397 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4398 UNSPEC_LD2_LANE))]
4399 "TARGET_SIMD"
4400 {
4401 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4402 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4403 }
4404 [(set_attr "type" "neon_load2_one_lane")]
4405 )
4406
4407 (define_expand "vec_load_lanesoi<mode>"
4408 [(set (match_operand:OI 0 "register_operand" "=w")
4409 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4410 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4411 UNSPEC_LD2))]
4412 "TARGET_SIMD"
4413 {
4414 if (BYTES_BIG_ENDIAN)
4415 {
4416 rtx tmp = gen_reg_rtx (OImode);
4417 rtx mask = aarch64_reverse_mask (<MODE>mode);
4418 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4419 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4420 }
4421 else
4422 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4423 DONE;
4424 })
4425
4426 (define_insn "aarch64_simd_st2<mode>"
4427 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4428 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4429 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4430 UNSPEC_ST2))]
4431 "TARGET_SIMD"
4432 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4433 [(set_attr "type" "neon_store2_2reg<q>")]
4434 )
4435
4436 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4437 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4438 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4439 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4440 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4441 (match_operand:SI 2 "immediate_operand" "i")]
4442 UNSPEC_ST2_LANE))]
4443 "TARGET_SIMD"
4444 {
4445 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4446 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4447 }
4448 [(set_attr "type" "neon_store2_one_lane<q>")]
4449 )
4450
4451 (define_expand "vec_store_lanesoi<mode>"
4452 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4453 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4454 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4455 UNSPEC_ST2))]
4456 "TARGET_SIMD"
4457 {
4458 if (BYTES_BIG_ENDIAN)
4459 {
4460 rtx tmp = gen_reg_rtx (OImode);
4461 rtx mask = aarch64_reverse_mask (<MODE>mode);
4462 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4463 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4464 }
4465 else
4466 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4467 DONE;
4468 })
4469
4470 (define_insn "aarch64_simd_ld3<mode>"
4471 [(set (match_operand:CI 0 "register_operand" "=w")
4472 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4473 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4474 UNSPEC_LD3))]
4475 "TARGET_SIMD"
4476 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4477 [(set_attr "type" "neon_load3_3reg<q>")]
4478 )
4479
4480 (define_insn "aarch64_simd_ld3r<mode>"
4481 [(set (match_operand:CI 0 "register_operand" "=w")
4482 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4483 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4484 UNSPEC_LD3_DUP))]
4485 "TARGET_SIMD"
4486 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4487 [(set_attr "type" "neon_load3_all_lanes<q>")]
4488 )
4489
4490 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4491 [(set (match_operand:CI 0 "register_operand" "=w")
4492 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4493 (match_operand:CI 2 "register_operand" "0")
4494 (match_operand:SI 3 "immediate_operand" "i")
4495 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4496 UNSPEC_LD3_LANE))]
4497 "TARGET_SIMD"
4498 {
4499 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4500 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4501 }
4502 [(set_attr "type" "neon_load3_one_lane")]
4503 )
4504
4505 (define_expand "vec_load_lanesci<mode>"
4506 [(set (match_operand:CI 0 "register_operand" "=w")
4507 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4508 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4509 UNSPEC_LD3))]
4510 "TARGET_SIMD"
4511 {
4512 if (BYTES_BIG_ENDIAN)
4513 {
4514 rtx tmp = gen_reg_rtx (CImode);
4515 rtx mask = aarch64_reverse_mask (<MODE>mode);
4516 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4517 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4518 }
4519 else
4520 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4521 DONE;
4522 })
4523
4524 (define_insn "aarch64_simd_st3<mode>"
4525 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4526 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4527 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4528 UNSPEC_ST3))]
4529 "TARGET_SIMD"
4530 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4531 [(set_attr "type" "neon_store3_3reg<q>")]
4532 )
4533
4534 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4535 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4536 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4537 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4538 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4539 (match_operand:SI 2 "immediate_operand" "i")]
4540 UNSPEC_ST3_LANE))]
4541 "TARGET_SIMD"
4542 {
4543 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4544 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4545 }
4546 [(set_attr "type" "neon_store3_one_lane<q>")]
4547 )
4548
4549 (define_expand "vec_store_lanesci<mode>"
4550 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4551 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4552 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4553 UNSPEC_ST3))]
4554 "TARGET_SIMD"
4555 {
4556 if (BYTES_BIG_ENDIAN)
4557 {
4558 rtx tmp = gen_reg_rtx (CImode);
4559 rtx mask = aarch64_reverse_mask (<MODE>mode);
4560 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4561 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4562 }
4563 else
4564 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4565 DONE;
4566 })
4567
4568 (define_insn "aarch64_simd_ld4<mode>"
4569 [(set (match_operand:XI 0 "register_operand" "=w")
4570 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4571 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4572 UNSPEC_LD4))]
4573 "TARGET_SIMD"
4574 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4575 [(set_attr "type" "neon_load4_4reg<q>")]
4576 )
4577
4578 (define_insn "aarch64_simd_ld4r<mode>"
4579 [(set (match_operand:XI 0 "register_operand" "=w")
4580 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4581 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4582 UNSPEC_LD4_DUP))]
4583 "TARGET_SIMD"
4584 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4585 [(set_attr "type" "neon_load4_all_lanes<q>")]
4586 )
4587
4588 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4589 [(set (match_operand:XI 0 "register_operand" "=w")
4590 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4591 (match_operand:XI 2 "register_operand" "0")
4592 (match_operand:SI 3 "immediate_operand" "i")
4593 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4594 UNSPEC_LD4_LANE))]
4595 "TARGET_SIMD"
4596 {
4597 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4598 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4599 }
4600 [(set_attr "type" "neon_load4_one_lane")]
4601 )
4602
4603 (define_expand "vec_load_lanesxi<mode>"
4604 [(set (match_operand:XI 0 "register_operand" "=w")
4605 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4606 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4607 UNSPEC_LD4))]
4608 "TARGET_SIMD"
4609 {
4610 if (BYTES_BIG_ENDIAN)
4611 {
4612 rtx tmp = gen_reg_rtx (XImode);
4613 rtx mask = aarch64_reverse_mask (<MODE>mode);
4614 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4615 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4616 }
4617 else
4618 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4619 DONE;
4620 })
4621
4622 (define_insn "aarch64_simd_st4<mode>"
4623 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4624 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4625 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4626 UNSPEC_ST4))]
4627 "TARGET_SIMD"
4628 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4629 [(set_attr "type" "neon_store4_4reg<q>")]
4630 )
4631
4632 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4633 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4634 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4635 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4636 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4637 (match_operand:SI 2 "immediate_operand" "i")]
4638 UNSPEC_ST4_LANE))]
4639 "TARGET_SIMD"
4640 {
4641 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4642 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4643 }
4644 [(set_attr "type" "neon_store4_one_lane<q>")]
4645 )
4646
4647 (define_expand "vec_store_lanesxi<mode>"
4648 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4649 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4650 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4651 UNSPEC_ST4))]
4652 "TARGET_SIMD"
4653 {
4654 if (BYTES_BIG_ENDIAN)
4655 {
4656 rtx tmp = gen_reg_rtx (XImode);
4657 rtx mask = aarch64_reverse_mask (<MODE>mode);
4658 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4659 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4660 }
4661 else
4662 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4663 DONE;
4664 })
4665
4666 (define_insn_and_split "aarch64_rev_reglist<mode>"
4667 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4668 (unspec:VSTRUCT
4669 [(match_operand:VSTRUCT 1 "register_operand" "w")
4670 (match_operand:V16QI 2 "register_operand" "w")]
4671 UNSPEC_REV_REGLIST))]
4672 "TARGET_SIMD"
4673 "#"
4674 "&& reload_completed"
4675 [(const_int 0)]
4676 {
4677 int i;
4678 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4679 for (i = 0; i < nregs; i++)
4680 {
4681 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4682 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4683 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4684 }
4685 DONE;
4686 }
4687 [(set_attr "type" "neon_tbl1_q")
4688 (set_attr "length" "<insn_count>")]
4689 )
4690
4691 ;; Reload patterns for AdvSIMD register list operands.
4692
4693 (define_expand "mov<mode>"
4694 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4695 (match_operand:VSTRUCT 1 "general_operand" ""))]
4696 "TARGET_SIMD"
4697 {
4698 if (can_create_pseudo_p ())
4699 {
4700 if (GET_CODE (operands[0]) != REG)
4701 operands[1] = force_reg (<MODE>mode, operands[1]);
4702 }
4703 })
4704
4705 (define_insn "*aarch64_mov<mode>"
4706 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4707 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4708 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4709 && (register_operand (operands[0], <MODE>mode)
4710 || register_operand (operands[1], <MODE>mode))"
4711 "@
4712 #
4713 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4714 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4715 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4716 neon_load<nregs>_<nregs>reg_q")
4717 (set_attr "length" "<insn_count>,4,4")]
4718 )
4719
4720 (define_insn "aarch64_be_ld1<mode>"
4721 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4722 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4723 "aarch64_simd_struct_operand" "Utv")]
4724 UNSPEC_LD1))]
4725 "TARGET_SIMD"
4726 "ld1\\t{%0<Vmtype>}, %1"
4727 [(set_attr "type" "neon_load1_1reg<q>")]
4728 )
4729
4730 (define_insn "aarch64_be_st1<mode>"
4731 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4732 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4733 UNSPEC_ST1))]
4734 "TARGET_SIMD"
4735 "st1\\t{%1<Vmtype>}, %0"
4736 [(set_attr "type" "neon_store1_1reg<q>")]
4737 )
4738
4739 (define_insn "*aarch64_be_movoi"
4740 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4741 (match_operand:OI 1 "general_operand" " w,w,m"))]
4742 "TARGET_SIMD && BYTES_BIG_ENDIAN
4743 && (register_operand (operands[0], OImode)
4744 || register_operand (operands[1], OImode))"
4745 "@
4746 #
4747 stp\\t%q1, %R1, %0
4748 ldp\\t%q0, %R0, %1"
4749 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4750 (set_attr "length" "8,4,4")]
4751 )
4752
4753 (define_insn "*aarch64_be_movci"
4754 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4755 (match_operand:CI 1 "general_operand" " w,w,o"))]
4756 "TARGET_SIMD && BYTES_BIG_ENDIAN
4757 && (register_operand (operands[0], CImode)
4758 || register_operand (operands[1], CImode))"
4759 "#"
4760 [(set_attr "type" "multiple")
4761 (set_attr "length" "12,4,4")]
4762 )
4763
4764 (define_insn "*aarch64_be_movxi"
4765 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4766 (match_operand:XI 1 "general_operand" " w,w,o"))]
4767 "TARGET_SIMD && BYTES_BIG_ENDIAN
4768 && (register_operand (operands[0], XImode)
4769 || register_operand (operands[1], XImode))"
4770 "#"
4771 [(set_attr "type" "multiple")
4772 (set_attr "length" "16,4,4")]
4773 )
4774
4775 (define_split
4776 [(set (match_operand:OI 0 "register_operand")
4777 (match_operand:OI 1 "register_operand"))]
4778 "TARGET_SIMD && reload_completed"
4779 [(const_int 0)]
4780 {
4781 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4782 DONE;
4783 })
4784
4785 (define_split
4786 [(set (match_operand:CI 0 "nonimmediate_operand")
4787 (match_operand:CI 1 "general_operand"))]
4788 "TARGET_SIMD && reload_completed"
4789 [(const_int 0)]
4790 {
4791 if (register_operand (operands[0], CImode)
4792 && register_operand (operands[1], CImode))
4793 {
4794 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4795 DONE;
4796 }
4797 else if (BYTES_BIG_ENDIAN)
4798 {
4799 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4800 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4801 emit_move_insn (gen_lowpart (V16QImode,
4802 simplify_gen_subreg (TImode, operands[0],
4803 CImode, 32)),
4804 gen_lowpart (V16QImode,
4805 simplify_gen_subreg (TImode, operands[1],
4806 CImode, 32)));
4807 DONE;
4808 }
4809 else
4810 FAIL;
4811 })
4812
4813 (define_split
4814 [(set (match_operand:XI 0 "nonimmediate_operand")
4815 (match_operand:XI 1 "general_operand"))]
4816 "TARGET_SIMD && reload_completed"
4817 [(const_int 0)]
4818 {
4819 if (register_operand (operands[0], XImode)
4820 && register_operand (operands[1], XImode))
4821 {
4822 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4823 DONE;
4824 }
4825 else if (BYTES_BIG_ENDIAN)
4826 {
4827 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4828 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4829 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4830 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4831 DONE;
4832 }
4833 else
4834 FAIL;
4835 })
4836
4837 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4838 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4839 (match_operand:DI 1 "register_operand" "w")
4840 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4841 "TARGET_SIMD"
4842 {
4843 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4844 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4845 * <VSTRUCT:nregs>);
4846
4847 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4848 mem));
4849 DONE;
4850 })
4851
4852 (define_insn "aarch64_ld2<mode>_dreg"
4853 [(set (match_operand:OI 0 "register_operand" "=w")
4854 (subreg:OI
4855 (vec_concat:<VRL2>
4856 (vec_concat:<VDBL>
4857 (unspec:VD
4858 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4859 UNSPEC_LD2)
4860 (vec_duplicate:VD (const_int 0)))
4861 (vec_concat:<VDBL>
4862 (unspec:VD [(match_dup 1)]
4863 UNSPEC_LD2)
4864 (vec_duplicate:VD (const_int 0)))) 0))]
4865 "TARGET_SIMD"
4866 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4867 [(set_attr "type" "neon_load2_2reg<q>")]
4868 )
4869
4870 (define_insn "aarch64_ld2<mode>_dreg"
4871 [(set (match_operand:OI 0 "register_operand" "=w")
4872 (subreg:OI
4873 (vec_concat:<VRL2>
4874 (vec_concat:<VDBL>
4875 (unspec:DX
4876 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4877 UNSPEC_LD2)
4878 (const_int 0))
4879 (vec_concat:<VDBL>
4880 (unspec:DX [(match_dup 1)]
4881 UNSPEC_LD2)
4882 (const_int 0))) 0))]
4883 "TARGET_SIMD"
4884 "ld1\\t{%S0.1d - %T0.1d}, %1"
4885 [(set_attr "type" "neon_load1_2reg<q>")]
4886 )
4887
4888 (define_insn "aarch64_ld3<mode>_dreg"
4889 [(set (match_operand:CI 0 "register_operand" "=w")
4890 (subreg:CI
4891 (vec_concat:<VRL3>
4892 (vec_concat:<VRL2>
4893 (vec_concat:<VDBL>
4894 (unspec:VD
4895 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4896 UNSPEC_LD3)
4897 (vec_duplicate:VD (const_int 0)))
4898 (vec_concat:<VDBL>
4899 (unspec:VD [(match_dup 1)]
4900 UNSPEC_LD3)
4901 (vec_duplicate:VD (const_int 0))))
4902 (vec_concat:<VDBL>
4903 (unspec:VD [(match_dup 1)]
4904 UNSPEC_LD3)
4905 (vec_duplicate:VD (const_int 0)))) 0))]
4906 "TARGET_SIMD"
4907 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4908 [(set_attr "type" "neon_load3_3reg<q>")]
4909 )
4910
4911 (define_insn "aarch64_ld3<mode>_dreg"
4912 [(set (match_operand:CI 0 "register_operand" "=w")
4913 (subreg:CI
4914 (vec_concat:<VRL3>
4915 (vec_concat:<VRL2>
4916 (vec_concat:<VDBL>
4917 (unspec:DX
4918 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4919 UNSPEC_LD3)
4920 (const_int 0))
4921 (vec_concat:<VDBL>
4922 (unspec:DX [(match_dup 1)]
4923 UNSPEC_LD3)
4924 (const_int 0)))
4925 (vec_concat:<VDBL>
4926 (unspec:DX [(match_dup 1)]
4927 UNSPEC_LD3)
4928 (const_int 0))) 0))]
4929 "TARGET_SIMD"
4930 "ld1\\t{%S0.1d - %U0.1d}, %1"
4931 [(set_attr "type" "neon_load1_3reg<q>")]
4932 )
4933
4934 (define_insn "aarch64_ld4<mode>_dreg"
4935 [(set (match_operand:XI 0 "register_operand" "=w")
4936 (subreg:XI
4937 (vec_concat:<VRL4>
4938 (vec_concat:<VRL2>
4939 (vec_concat:<VDBL>
4940 (unspec:VD
4941 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4942 UNSPEC_LD4)
4943 (vec_duplicate:VD (const_int 0)))
4944 (vec_concat:<VDBL>
4945 (unspec:VD [(match_dup 1)]
4946 UNSPEC_LD4)
4947 (vec_duplicate:VD (const_int 0))))
4948 (vec_concat:<VRL2>
4949 (vec_concat:<VDBL>
4950 (unspec:VD [(match_dup 1)]
4951 UNSPEC_LD4)
4952 (vec_duplicate:VD (const_int 0)))
4953 (vec_concat:<VDBL>
4954 (unspec:VD [(match_dup 1)]
4955 UNSPEC_LD4)
4956 (vec_duplicate:VD (const_int 0))))) 0))]
4957 "TARGET_SIMD"
4958 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4959 [(set_attr "type" "neon_load4_4reg<q>")]
4960 )
4961
4962 (define_insn "aarch64_ld4<mode>_dreg"
4963 [(set (match_operand:XI 0 "register_operand" "=w")
4964 (subreg:XI
4965 (vec_concat:<VRL4>
4966 (vec_concat:<VRL2>
4967 (vec_concat:<VDBL>
4968 (unspec:DX
4969 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4970 UNSPEC_LD4)
4971 (const_int 0))
4972 (vec_concat:<VDBL>
4973 (unspec:DX [(match_dup 1)]
4974 UNSPEC_LD4)
4975 (const_int 0)))
4976 (vec_concat:<VRL2>
4977 (vec_concat:<VDBL>
4978 (unspec:DX [(match_dup 1)]
4979 UNSPEC_LD4)
4980 (const_int 0))
4981 (vec_concat:<VDBL>
4982 (unspec:DX [(match_dup 1)]
4983 UNSPEC_LD4)
4984 (const_int 0)))) 0))]
4985 "TARGET_SIMD"
4986 "ld1\\t{%S0.1d - %V0.1d}, %1"
4987 [(set_attr "type" "neon_load1_4reg<q>")]
4988 )
4989
4990 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4991 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4992 (match_operand:DI 1 "register_operand" "r")
4993 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4994 "TARGET_SIMD"
4995 {
4996 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4997 set_mem_size (mem, <VSTRUCT:nregs> * 8);
4998
4999 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5000 DONE;
5001 })
5002
5003 (define_expand "aarch64_ld1<VALL_F16:mode>"
5004 [(match_operand:VALL_F16 0 "register_operand")
5005 (match_operand:DI 1 "register_operand")]
5006 "TARGET_SIMD"
5007 {
5008 machine_mode mode = <VALL_F16:MODE>mode;
5009 rtx mem = gen_rtx_MEM (mode, operands[1]);
5010
5011 if (BYTES_BIG_ENDIAN)
5012 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5013 else
5014 emit_move_insn (operands[0], mem);
5015 DONE;
5016 })
5017
5018 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5019 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5020 (match_operand:DI 1 "register_operand" "r")
5021 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5022 "TARGET_SIMD"
5023 {
5024 machine_mode mode = <VSTRUCT:MODE>mode;
5025 rtx mem = gen_rtx_MEM (mode, operands[1]);
5026
5027 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5028 DONE;
5029 })
5030
5031 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5032 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5033 (match_operand:DI 1 "register_operand" "w")
5034 (match_operand:VSTRUCT 2 "register_operand" "0")
5035 (match_operand:SI 3 "immediate_operand" "i")
5036 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5037 "TARGET_SIMD"
5038 {
5039 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5040 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5041 * <VSTRUCT:nregs>);
5042
5043 aarch64_simd_lane_bounds (operands[3], 0,
5044 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5045 NULL);
5046 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5047 operands[0], mem, operands[2], operands[3]));
5048 DONE;
5049 })
5050
5051 ;; Expanders for builtins to extract vector registers from large
5052 ;; opaque integer modes.
5053
5054 ;; D-register list.
5055
5056 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5057 [(match_operand:VDC 0 "register_operand" "=w")
5058 (match_operand:VSTRUCT 1 "register_operand" "w")
5059 (match_operand:SI 2 "immediate_operand" "i")]
5060 "TARGET_SIMD"
5061 {
5062 int part = INTVAL (operands[2]);
5063 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5064 int offset = part * 16;
5065
5066 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5067 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5068 DONE;
5069 })
5070
5071 ;; Q-register list.
5072
5073 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5074 [(match_operand:VQ 0 "register_operand" "=w")
5075 (match_operand:VSTRUCT 1 "register_operand" "w")
5076 (match_operand:SI 2 "immediate_operand" "i")]
5077 "TARGET_SIMD"
5078 {
5079 int part = INTVAL (operands[2]);
5080 int offset = part * 16;
5081
5082 emit_move_insn (operands[0],
5083 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5084 DONE;
5085 })
5086
5087 ;; Permuted-store expanders for neon intrinsics.
5088
5089 ;; Permute instructions
5090
5091 ;; vec_perm support
5092
5093 (define_expand "vec_perm_const<mode>"
5094 [(match_operand:VALL_F16 0 "register_operand")
5095 (match_operand:VALL_F16 1 "register_operand")
5096 (match_operand:VALL_F16 2 "register_operand")
5097 (match_operand:<V_cmp_result> 3)]
5098 "TARGET_SIMD"
5099 {
5100 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5101 operands[2], operands[3]))
5102 DONE;
5103 else
5104 FAIL;
5105 })
5106
5107 (define_expand "vec_perm<mode>"
5108 [(match_operand:VB 0 "register_operand")
5109 (match_operand:VB 1 "register_operand")
5110 (match_operand:VB 2 "register_operand")
5111 (match_operand:VB 3 "register_operand")]
5112 "TARGET_SIMD"
5113 {
5114 aarch64_expand_vec_perm (operands[0], operands[1],
5115 operands[2], operands[3]);
5116 DONE;
5117 })
5118
5119 (define_insn "aarch64_tbl1<mode>"
5120 [(set (match_operand:VB 0 "register_operand" "=w")
5121 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5122 (match_operand:VB 2 "register_operand" "w")]
5123 UNSPEC_TBL))]
5124 "TARGET_SIMD"
5125 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5126 [(set_attr "type" "neon_tbl1<q>")]
5127 )
5128
5129 ;; Two source registers.
5130
5131 (define_insn "aarch64_tbl2v16qi"
5132 [(set (match_operand:V16QI 0 "register_operand" "=w")
5133 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5134 (match_operand:V16QI 2 "register_operand" "w")]
5135 UNSPEC_TBL))]
5136 "TARGET_SIMD"
5137 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5138 [(set_attr "type" "neon_tbl2_q")]
5139 )
5140
5141 (define_insn "aarch64_tbl3<mode>"
5142 [(set (match_operand:VB 0 "register_operand" "=w")
5143 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5144 (match_operand:VB 2 "register_operand" "w")]
5145 UNSPEC_TBL))]
5146 "TARGET_SIMD"
5147 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5148 [(set_attr "type" "neon_tbl3")]
5149 )
5150
5151 (define_insn "aarch64_tbx4<mode>"
5152 [(set (match_operand:VB 0 "register_operand" "=w")
5153 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5154 (match_operand:OI 2 "register_operand" "w")
5155 (match_operand:VB 3 "register_operand" "w")]
5156 UNSPEC_TBX))]
5157 "TARGET_SIMD"
5158 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5159 [(set_attr "type" "neon_tbl4")]
5160 )
5161
5162 ;; Three source registers.
5163
5164 (define_insn "aarch64_qtbl3<mode>"
5165 [(set (match_operand:VB 0 "register_operand" "=w")
5166 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5167 (match_operand:VB 2 "register_operand" "w")]
5168 UNSPEC_TBL))]
5169 "TARGET_SIMD"
5170 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5171 [(set_attr "type" "neon_tbl3")]
5172 )
5173
5174 (define_insn "aarch64_qtbx3<mode>"
5175 [(set (match_operand:VB 0 "register_operand" "=w")
5176 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5177 (match_operand:CI 2 "register_operand" "w")
5178 (match_operand:VB 3 "register_operand" "w")]
5179 UNSPEC_TBX))]
5180 "TARGET_SIMD"
5181 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5182 [(set_attr "type" "neon_tbl3")]
5183 )
5184
5185 ;; Four source registers.
5186
5187 (define_insn "aarch64_qtbl4<mode>"
5188 [(set (match_operand:VB 0 "register_operand" "=w")
5189 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5190 (match_operand:VB 2 "register_operand" "w")]
5191 UNSPEC_TBL))]
5192 "TARGET_SIMD"
5193 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5194 [(set_attr "type" "neon_tbl4")]
5195 )
5196
5197 (define_insn "aarch64_qtbx4<mode>"
5198 [(set (match_operand:VB 0 "register_operand" "=w")
5199 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5200 (match_operand:XI 2 "register_operand" "w")
5201 (match_operand:VB 3 "register_operand" "w")]
5202 UNSPEC_TBX))]
5203 "TARGET_SIMD"
5204 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5205 [(set_attr "type" "neon_tbl4")]
5206 )
5207
5208 (define_insn_and_split "aarch64_combinev16qi"
5209 [(set (match_operand:OI 0 "register_operand" "=w")
5210 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5211 (match_operand:V16QI 2 "register_operand" "w")]
5212 UNSPEC_CONCAT))]
5213 "TARGET_SIMD"
5214 "#"
5215 "&& reload_completed"
5216 [(const_int 0)]
5217 {
5218 aarch64_split_combinev16qi (operands);
5219 DONE;
5220 }
5221 [(set_attr "type" "multiple")]
5222 )
5223
5224 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5225 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5226 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5227 (match_operand:VALL_F16 2 "register_operand" "w")]
5228 PERMUTE))]
5229 "TARGET_SIMD"
5230 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5231 [(set_attr "type" "neon_permute<q>")]
5232 )
5233
5234 ;; Note immediate (third) operand is lane index not byte index.
5235 (define_insn "aarch64_ext<mode>"
5236 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5237 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5238 (match_operand:VALL_F16 2 "register_operand" "w")
5239 (match_operand:SI 3 "immediate_operand" "i")]
5240 UNSPEC_EXT))]
5241 "TARGET_SIMD"
5242 {
5243 operands[3] = GEN_INT (INTVAL (operands[3])
5244 * GET_MODE_UNIT_SIZE (<MODE>mode));
5245 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5246 }
5247 [(set_attr "type" "neon_ext<q>")]
5248 )
5249
5250 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5251 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5252 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5253 REVERSE))]
5254 "TARGET_SIMD"
5255 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5256 [(set_attr "type" "neon_rev<q>")]
5257 )
5258
5259 (define_insn "aarch64_st2<mode>_dreg"
5260 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5261 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5262 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5263 UNSPEC_ST2))]
5264 "TARGET_SIMD"
5265 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5266 [(set_attr "type" "neon_store2_2reg")]
5267 )
5268
5269 (define_insn "aarch64_st2<mode>_dreg"
5270 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5271 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5272 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5273 UNSPEC_ST2))]
5274 "TARGET_SIMD"
5275 "st1\\t{%S1.1d - %T1.1d}, %0"
5276 [(set_attr "type" "neon_store1_2reg")]
5277 )
5278
5279 (define_insn "aarch64_st3<mode>_dreg"
5280 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5281 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5282 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5283 UNSPEC_ST3))]
5284 "TARGET_SIMD"
5285 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5286 [(set_attr "type" "neon_store3_3reg")]
5287 )
5288
5289 (define_insn "aarch64_st3<mode>_dreg"
5290 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5291 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5292 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5293 UNSPEC_ST3))]
5294 "TARGET_SIMD"
5295 "st1\\t{%S1.1d - %U1.1d}, %0"
5296 [(set_attr "type" "neon_store1_3reg")]
5297 )
5298
5299 (define_insn "aarch64_st4<mode>_dreg"
5300 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5301 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5302 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5303 UNSPEC_ST4))]
5304 "TARGET_SIMD"
5305 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5306 [(set_attr "type" "neon_store4_4reg")]
5307 )
5308
5309 (define_insn "aarch64_st4<mode>_dreg"
5310 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5311 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5312 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5313 UNSPEC_ST4))]
5314 "TARGET_SIMD"
5315 "st1\\t{%S1.1d - %V1.1d}, %0"
5316 [(set_attr "type" "neon_store1_4reg")]
5317 )
5318
5319 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5320 [(match_operand:DI 0 "register_operand" "r")
5321 (match_operand:VSTRUCT 1 "register_operand" "w")
5322 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5323 "TARGET_SIMD"
5324 {
5325 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5326 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5327
5328 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5329 DONE;
5330 })
5331
5332 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5333 [(match_operand:DI 0 "register_operand" "r")
5334 (match_operand:VSTRUCT 1 "register_operand" "w")
5335 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5336 "TARGET_SIMD"
5337 {
5338 machine_mode mode = <VSTRUCT:MODE>mode;
5339 rtx mem = gen_rtx_MEM (mode, operands[0]);
5340
5341 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5342 DONE;
5343 })
5344
5345 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5346 [(match_operand:DI 0 "register_operand" "r")
5347 (match_operand:VSTRUCT 1 "register_operand" "w")
5348 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5349 (match_operand:SI 2 "immediate_operand")]
5350 "TARGET_SIMD"
5351 {
5352 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5353 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5354 * <VSTRUCT:nregs>);
5355
5356 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5357 mem, operands[1], operands[2]));
5358 DONE;
5359 })
5360
5361 (define_expand "aarch64_st1<VALL_F16:mode>"
5362 [(match_operand:DI 0 "register_operand")
5363 (match_operand:VALL_F16 1 "register_operand")]
5364 "TARGET_SIMD"
5365 {
5366 machine_mode mode = <VALL_F16:MODE>mode;
5367 rtx mem = gen_rtx_MEM (mode, operands[0]);
5368
5369 if (BYTES_BIG_ENDIAN)
5370 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5371 else
5372 emit_move_insn (mem, operands[1]);
5373 DONE;
5374 })
5375
5376 ;; Expander for builtins to insert vector registers into large
5377 ;; opaque integer modes.
5378
5379 ;; Q-register list. We don't need a D-reg inserter as we zero
5380 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5381
5382 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5383 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5384 (match_operand:VSTRUCT 1 "register_operand" "0")
5385 (match_operand:VQ 2 "register_operand" "w")
5386 (match_operand:SI 3 "immediate_operand" "i")]
5387 "TARGET_SIMD"
5388 {
5389 int part = INTVAL (operands[3]);
5390 int offset = part * 16;
5391
5392 emit_move_insn (operands[0], operands[1]);
5393 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5394 operands[2]);
5395 DONE;
5396 })
5397
5398 ;; Standard pattern name vec_init<mode>.
5399
5400 (define_expand "vec_init<mode>"
5401 [(match_operand:VALL_F16 0 "register_operand" "")
5402 (match_operand 1 "" "")]
5403 "TARGET_SIMD"
5404 {
5405 aarch64_expand_vector_init (operands[0], operands[1]);
5406 DONE;
5407 })
5408
5409 (define_insn "*aarch64_simd_ld1r<mode>"
5410 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5411 (vec_duplicate:VALL_F16
5412 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5413 "TARGET_SIMD"
5414 "ld1r\\t{%0.<Vtype>}, %1"
5415 [(set_attr "type" "neon_load1_all_lanes")]
5416 )
5417
5418 (define_insn "aarch64_frecpe<mode>"
5419 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5420 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5421 UNSPEC_FRECPE))]
5422 "TARGET_SIMD"
5423 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5424 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5425 )
5426
5427 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5428 [(set (match_operand:GPF 0 "register_operand" "=w")
5429 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
5430 FRECP))]
5431 "TARGET_SIMD"
5432 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5433 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
5434 )
5435
5436 (define_insn "aarch64_frecps<mode>"
5437 [(set (match_operand:VHSDF_SDF 0 "register_operand" "=w")
5438 (unspec:VHSDF_SDF
5439 [(match_operand:VHSDF_SDF 1 "register_operand" "w")
5440 (match_operand:VHSDF_SDF 2 "register_operand" "w")]
5441 UNSPEC_FRECPS))]
5442 "TARGET_SIMD"
5443 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5444 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5445 )
5446
5447 (define_insn "aarch64_urecpe<mode>"
5448 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5449 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5450 UNSPEC_URECPE))]
5451 "TARGET_SIMD"
5452 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5453 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5454
5455 ;; Standard pattern name vec_extract<mode>.
5456
5457 (define_expand "vec_extract<mode>"
5458 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5459 (match_operand:VALL_F16 1 "register_operand" "")
5460 (match_operand:SI 2 "immediate_operand" "")]
5461 "TARGET_SIMD"
5462 {
5463 emit_insn
5464 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5465 DONE;
5466 })
5467
5468 ;; aes
5469
5470 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5471 [(set (match_operand:V16QI 0 "register_operand" "=w")
5472 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5473 (match_operand:V16QI 2 "register_operand" "w")]
5474 CRYPTO_AES))]
5475 "TARGET_SIMD && TARGET_CRYPTO"
5476 "aes<aes_op>\\t%0.16b, %2.16b"
5477 [(set_attr "type" "crypto_aese")]
5478 )
5479
5480 ;; When AES/AESMC fusion is enabled we want the register allocation to
5481 ;; look like:
5482 ;; AESE Vn, _
5483 ;; AESMC Vn, Vn
5484 ;; So prefer to tie operand 1 to operand 0 when fusing.
5485
5486 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5487 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5488 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5489 CRYPTO_AESMC))]
5490 "TARGET_SIMD && TARGET_CRYPTO"
5491 "aes<aesmc_op>\\t%0.16b, %1.16b"
5492 [(set_attr "type" "crypto_aesmc")
5493 (set_attr_alternative "enabled"
5494 [(if_then_else (match_test
5495 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5496 (const_string "yes" )
5497 (const_string "no"))
5498 (const_string "yes")])]
5499 )
5500
5501 ;; sha1
5502
5503 (define_insn "aarch64_crypto_sha1hsi"
5504 [(set (match_operand:SI 0 "register_operand" "=w")
5505 (unspec:SI [(match_operand:SI 1
5506 "register_operand" "w")]
5507 UNSPEC_SHA1H))]
5508 "TARGET_SIMD && TARGET_CRYPTO"
5509 "sha1h\\t%s0, %s1"
5510 [(set_attr "type" "crypto_sha1_fast")]
5511 )
5512
5513 (define_insn "aarch64_crypto_sha1su1v4si"
5514 [(set (match_operand:V4SI 0 "register_operand" "=w")
5515 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5516 (match_operand:V4SI 2 "register_operand" "w")]
5517 UNSPEC_SHA1SU1))]
5518 "TARGET_SIMD && TARGET_CRYPTO"
5519 "sha1su1\\t%0.4s, %2.4s"
5520 [(set_attr "type" "crypto_sha1_fast")]
5521 )
5522
5523 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5524 [(set (match_operand:V4SI 0 "register_operand" "=w")
5525 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5526 (match_operand:SI 2 "register_operand" "w")
5527 (match_operand:V4SI 3 "register_operand" "w")]
5528 CRYPTO_SHA1))]
5529 "TARGET_SIMD && TARGET_CRYPTO"
5530 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5531 [(set_attr "type" "crypto_sha1_slow")]
5532 )
5533
5534 (define_insn "aarch64_crypto_sha1su0v4si"
5535 [(set (match_operand:V4SI 0 "register_operand" "=w")
5536 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5537 (match_operand:V4SI 2 "register_operand" "w")
5538 (match_operand:V4SI 3 "register_operand" "w")]
5539 UNSPEC_SHA1SU0))]
5540 "TARGET_SIMD && TARGET_CRYPTO"
5541 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5542 [(set_attr "type" "crypto_sha1_xor")]
5543 )
5544
5545 ;; sha256
5546
5547 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5548 [(set (match_operand:V4SI 0 "register_operand" "=w")
5549 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5550 (match_operand:V4SI 2 "register_operand" "w")
5551 (match_operand:V4SI 3 "register_operand" "w")]
5552 CRYPTO_SHA256))]
5553 "TARGET_SIMD && TARGET_CRYPTO"
5554 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5555 [(set_attr "type" "crypto_sha256_slow")]
5556 )
5557
5558 (define_insn "aarch64_crypto_sha256su0v4si"
5559 [(set (match_operand:V4SI 0 "register_operand" "=w")
5560 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5561 (match_operand:V4SI 2 "register_operand" "w")]
5562 UNSPEC_SHA256SU0))]
5563 "TARGET_SIMD &&TARGET_CRYPTO"
5564 "sha256su0\\t%0.4s, %2.4s"
5565 [(set_attr "type" "crypto_sha256_fast")]
5566 )
5567
5568 (define_insn "aarch64_crypto_sha256su1v4si"
5569 [(set (match_operand:V4SI 0 "register_operand" "=w")
5570 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5571 (match_operand:V4SI 2 "register_operand" "w")
5572 (match_operand:V4SI 3 "register_operand" "w")]
5573 UNSPEC_SHA256SU1))]
5574 "TARGET_SIMD &&TARGET_CRYPTO"
5575 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5576 [(set_attr "type" "crypto_sha256_slow")]
5577 )
5578
5579 ;; pmull
5580
5581 (define_insn "aarch64_crypto_pmulldi"
5582 [(set (match_operand:TI 0 "register_operand" "=w")
5583 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5584 (match_operand:DI 2 "register_operand" "w")]
5585 UNSPEC_PMULL))]
5586 "TARGET_SIMD && TARGET_CRYPTO"
5587 "pmull\\t%0.1q, %1.1d, %2.1d"
5588 [(set_attr "type" "neon_mul_d_long")]
5589 )
5590
5591 (define_insn "aarch64_crypto_pmullv2di"
5592 [(set (match_operand:TI 0 "register_operand" "=w")
5593 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5594 (match_operand:V2DI 2 "register_operand" "w")]
5595 UNSPEC_PMULL2))]
5596 "TARGET_SIMD && TARGET_CRYPTO"
5597 "pmull2\\t%0.1q, %1.2d, %2.2d"
5598 [(set_attr "type" "neon_mul_d_long")]
5599 )