]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
* config/aarch64/aarch64-simd.md (aarch64_crypto_pmulldi)
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
28 "
29 )
30
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
34 "TARGET_SIMD"
35 {
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
42 })
43
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
46 (vec_duplicate:VDQ_I
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
48 "TARGET_SIMD"
49 "@
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
53 )
54
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
59 "TARGET_SIMD"
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
62 )
63
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
67 (vec_select:<VEL>
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
70 )))]
71 "TARGET_SIMD"
72 {
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
75 }
76 [(set_attr "type" "neon_dup<q>")]
77 )
78
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16_NO_V2Q
82 (vec_select:<VEL>
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
85 )))]
86 "TARGET_SIMD"
87 {
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
91 }
92 [(set_attr "type" "neon_dup<q>")]
93 )
94
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
100 "TARGET_SIMD
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
103 {
104 switch (which_alternative)
105 {
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "fmov\t%d0, %1";
111 case 5: return "mov\t%0, %1";
112 case 6:
113 return aarch64_output_simd_mov_immediate (operands[1],
114 <MODE>mode, 64);
115 default: gcc_unreachable ();
116 }
117 }
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, f_mcr,\
120 mov_reg, neon_move<q>")]
121 )
122
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
128 "TARGET_SIMD
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
131 {
132 switch (which_alternative)
133 {
134 case 0:
135 return "ldr\\t%q0, %1";
136 case 1:
137 return "str\\t%q1, %0";
138 case 2:
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
140 case 3:
141 case 4:
142 case 5:
143 return "#";
144 case 6:
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
146 default:
147 gcc_unreachable ();
148 }
149 }
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
152 neon_move<q>")
153 (set_attr "length" "4,4,4,8,8,8,4")]
154 )
155
156 ;; When storing lane zero we can use the normal STR and its more permissive
157 ;; addressing modes.
158
159 (define_insn "aarch64_store_lane0<mode>"
160 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
161 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
162 (parallel [(match_operand 2 "const_int_operand" "n")])))]
163 "TARGET_SIMD
164 && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
165 "str\\t%<Vetype>1, %0"
166 [(set_attr "type" "neon_store1_1reg<q>")]
167 )
168
169 (define_insn "load_pair<mode>"
170 [(set (match_operand:VD 0 "register_operand" "=w")
171 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
172 (set (match_operand:VD 2 "register_operand" "=w")
173 (match_operand:VD 3 "memory_operand" "m"))]
174 "TARGET_SIMD
175 && rtx_equal_p (XEXP (operands[3], 0),
176 plus_constant (Pmode,
177 XEXP (operands[1], 0),
178 GET_MODE_SIZE (<MODE>mode)))"
179 "ldp\\t%d0, %d2, %1"
180 [(set_attr "type" "neon_ldp")]
181 )
182
183 (define_insn "store_pair<mode>"
184 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
185 (match_operand:VD 1 "register_operand" "w"))
186 (set (match_operand:VD 2 "memory_operand" "=m")
187 (match_operand:VD 3 "register_operand" "w"))]
188 "TARGET_SIMD
189 && rtx_equal_p (XEXP (operands[2], 0),
190 plus_constant (Pmode,
191 XEXP (operands[0], 0),
192 GET_MODE_SIZE (<MODE>mode)))"
193 "stp\\t%d1, %d3, %0"
194 [(set_attr "type" "neon_stp")]
195 )
196
197 (define_split
198 [(set (match_operand:VQ 0 "register_operand" "")
199 (match_operand:VQ 1 "register_operand" ""))]
200 "TARGET_SIMD && reload_completed
201 && GP_REGNUM_P (REGNO (operands[0]))
202 && GP_REGNUM_P (REGNO (operands[1]))"
203 [(const_int 0)]
204 {
205 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
206 DONE;
207 })
208
209 (define_split
210 [(set (match_operand:VQ 0 "register_operand" "")
211 (match_operand:VQ 1 "register_operand" ""))]
212 "TARGET_SIMD && reload_completed
213 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
214 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
215 [(const_int 0)]
216 {
217 aarch64_split_simd_move (operands[0], operands[1]);
218 DONE;
219 })
220
221 (define_expand "aarch64_split_simd_mov<mode>"
222 [(set (match_operand:VQ 0)
223 (match_operand:VQ 1))]
224 "TARGET_SIMD"
225 {
226 rtx dst = operands[0];
227 rtx src = operands[1];
228
229 if (GP_REGNUM_P (REGNO (src)))
230 {
231 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
232 rtx src_high_part = gen_highpart (<VHALF>mode, src);
233
234 emit_insn
235 (gen_move_lo_quad_<mode> (dst, src_low_part));
236 emit_insn
237 (gen_move_hi_quad_<mode> (dst, src_high_part));
238 }
239
240 else
241 {
242 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
243 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
244 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
245 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
246
247 emit_insn
248 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
249 emit_insn
250 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
251 }
252 DONE;
253 }
254 )
255
256 (define_insn "aarch64_simd_mov_from_<mode>low"
257 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
258 (vec_select:<VHALF>
259 (match_operand:VQ 1 "register_operand" "w")
260 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
261 "TARGET_SIMD && reload_completed"
262 "umov\t%0, %1.d[0]"
263 [(set_attr "type" "neon_to_gp<q>")
264 (set_attr "length" "4")
265 ])
266
267 (define_insn "aarch64_simd_mov_from_<mode>high"
268 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
269 (vec_select:<VHALF>
270 (match_operand:VQ 1 "register_operand" "w")
271 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
272 "TARGET_SIMD && reload_completed"
273 "umov\t%0, %1.d[1]"
274 [(set_attr "type" "neon_to_gp<q>")
275 (set_attr "length" "4")
276 ])
277
278 (define_insn "orn<mode>3"
279 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
280 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
281 (match_operand:VDQ_I 2 "register_operand" "w")))]
282 "TARGET_SIMD"
283 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
284 [(set_attr "type" "neon_logic<q>")]
285 )
286
287 (define_insn "bic<mode>3"
288 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
289 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
290 (match_operand:VDQ_I 2 "register_operand" "w")))]
291 "TARGET_SIMD"
292 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
293 [(set_attr "type" "neon_logic<q>")]
294 )
295
296 (define_insn "add<mode>3"
297 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
298 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
299 (match_operand:VDQ_I 2 "register_operand" "w")))]
300 "TARGET_SIMD"
301 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
302 [(set_attr "type" "neon_add<q>")]
303 )
304
305 (define_insn "sub<mode>3"
306 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
307 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
308 (match_operand:VDQ_I 2 "register_operand" "w")))]
309 "TARGET_SIMD"
310 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
311 [(set_attr "type" "neon_sub<q>")]
312 )
313
314 (define_insn "mul<mode>3"
315 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
316 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
317 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
318 "TARGET_SIMD"
319 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
320 [(set_attr "type" "neon_mul_<Vetype><q>")]
321 )
322
323 (define_insn "bswap<mode>2"
324 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
325 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
326 "TARGET_SIMD"
327 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
328 [(set_attr "type" "neon_rev<q>")]
329 )
330
331 (define_insn "aarch64_rbit<mode>"
332 [(set (match_operand:VB 0 "register_operand" "=w")
333 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
334 UNSPEC_RBIT))]
335 "TARGET_SIMD"
336 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
337 [(set_attr "type" "neon_rbit")]
338 )
339
340 (define_expand "ctz<mode>2"
341 [(set (match_operand:VS 0 "register_operand")
342 (ctz:VS (match_operand:VS 1 "register_operand")))]
343 "TARGET_SIMD"
344 {
345 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
346 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
347 <MODE>mode, 0);
348 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
349 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
350 DONE;
351 }
352 )
353
354 (define_expand "copysign<mode>3"
355 [(match_operand:VHSDF 0 "register_operand")
356 (match_operand:VHSDF 1 "register_operand")
357 (match_operand:VHSDF 2 "register_operand")]
358 "TARGET_FLOAT && TARGET_SIMD"
359 {
360 rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
361 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
362
363 emit_move_insn (v_bitmask,
364 aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
365 HOST_WIDE_INT_M1U << bits));
366 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
367 operands[2], operands[1]));
368 DONE;
369 }
370 )
371
372 (define_insn "*aarch64_mul3_elt<mode>"
373 [(set (match_operand:VMUL 0 "register_operand" "=w")
374 (mult:VMUL
375 (vec_duplicate:VMUL
376 (vec_select:<VEL>
377 (match_operand:VMUL 1 "register_operand" "<h_con>")
378 (parallel [(match_operand:SI 2 "immediate_operand")])))
379 (match_operand:VMUL 3 "register_operand" "w")))]
380 "TARGET_SIMD"
381 {
382 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
383 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
384 }
385 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
386 )
387
388 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
389 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
390 (mult:VMUL_CHANGE_NLANES
391 (vec_duplicate:VMUL_CHANGE_NLANES
392 (vec_select:<VEL>
393 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
394 (parallel [(match_operand:SI 2 "immediate_operand")])))
395 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
396 "TARGET_SIMD"
397 {
398 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
399 INTVAL (operands[2])));
400 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
401 }
402 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
403 )
404
405 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
406 [(set (match_operand:VMUL 0 "register_operand" "=w")
407 (mult:VMUL
408 (vec_duplicate:VMUL
409 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
410 (match_operand:VMUL 2 "register_operand" "w")))]
411 "TARGET_SIMD"
412 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
413 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
414 )
415
416 (define_insn "aarch64_rsqrte<mode>"
417 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
418 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
419 UNSPEC_RSQRTE))]
420 "TARGET_SIMD"
421 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
422 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
423
424 (define_insn "aarch64_rsqrts<mode>"
425 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
426 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
427 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
428 UNSPEC_RSQRTS))]
429 "TARGET_SIMD"
430 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
431 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
432
433 (define_expand "rsqrt<mode>2"
434 [(set (match_operand:VALLF 0 "register_operand" "=w")
435 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
436 UNSPEC_RSQRT))]
437 "TARGET_SIMD"
438 {
439 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
440 DONE;
441 })
442
443 (define_insn "*aarch64_mul3_elt_to_64v2df"
444 [(set (match_operand:DF 0 "register_operand" "=w")
445 (mult:DF
446 (vec_select:DF
447 (match_operand:V2DF 1 "register_operand" "w")
448 (parallel [(match_operand:SI 2 "immediate_operand")]))
449 (match_operand:DF 3 "register_operand" "w")))]
450 "TARGET_SIMD"
451 {
452 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
453 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
454 }
455 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
456 )
457
458 (define_insn "neg<mode>2"
459 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
460 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
461 "TARGET_SIMD"
462 "neg\t%0.<Vtype>, %1.<Vtype>"
463 [(set_attr "type" "neon_neg<q>")]
464 )
465
466 (define_insn "abs<mode>2"
467 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
468 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
469 "TARGET_SIMD"
470 "abs\t%0.<Vtype>, %1.<Vtype>"
471 [(set_attr "type" "neon_abs<q>")]
472 )
473
474 ;; The intrinsic version of integer ABS must not be allowed to
475 ;; combine with any operation with an integerated ABS step, such
476 ;; as SABD.
477 (define_insn "aarch64_abs<mode>"
478 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
479 (unspec:VSDQ_I_DI
480 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
481 UNSPEC_ABS))]
482 "TARGET_SIMD"
483 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
484 [(set_attr "type" "neon_abs<q>")]
485 )
486
487 (define_insn "abd<mode>_3"
488 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
489 (abs:VDQ_BHSI (minus:VDQ_BHSI
490 (match_operand:VDQ_BHSI 1 "register_operand" "w")
491 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
492 "TARGET_SIMD"
493 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
494 [(set_attr "type" "neon_abd<q>")]
495 )
496
497 (define_insn "aba<mode>_3"
498 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
499 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
500 (match_operand:VDQ_BHSI 1 "register_operand" "w")
501 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
502 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
503 "TARGET_SIMD"
504 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
505 [(set_attr "type" "neon_arith_acc<q>")]
506 )
507
508 (define_insn "fabd<mode>3"
509 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
510 (abs:VHSDF_HSDF
511 (minus:VHSDF_HSDF
512 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
513 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
514 "TARGET_SIMD"
515 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
516 [(set_attr "type" "neon_fp_abd_<stype><q>")]
517 )
518
519 (define_insn "and<mode>3"
520 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
521 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
522 (match_operand:VDQ_I 2 "register_operand" "w")))]
523 "TARGET_SIMD"
524 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
525 [(set_attr "type" "neon_logic<q>")]
526 )
527
528 (define_insn "ior<mode>3"
529 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
530 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
531 (match_operand:VDQ_I 2 "register_operand" "w")))]
532 "TARGET_SIMD"
533 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
534 [(set_attr "type" "neon_logic<q>")]
535 )
536
537 (define_insn "xor<mode>3"
538 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
539 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
540 (match_operand:VDQ_I 2 "register_operand" "w")))]
541 "TARGET_SIMD"
542 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
543 [(set_attr "type" "neon_logic<q>")]
544 )
545
546 (define_insn "one_cmpl<mode>2"
547 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
548 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
549 "TARGET_SIMD"
550 "not\t%0.<Vbtype>, %1.<Vbtype>"
551 [(set_attr "type" "neon_logic<q>")]
552 )
553
554 (define_insn "aarch64_simd_vec_set<mode>"
555 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
556 (vec_merge:VDQ_BHSI
557 (vec_duplicate:VDQ_BHSI
558 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
559 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
560 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
561 "TARGET_SIMD"
562 {
563 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
564 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
565 switch (which_alternative)
566 {
567 case 0:
568 return "ins\\t%0.<Vetype>[%p2], %w1";
569 case 1:
570 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
571 case 2:
572 return "ld1\\t{%0.<Vetype>}[%p2], %1";
573 default:
574 gcc_unreachable ();
575 }
576 }
577 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
578 )
579
580 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
581 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
582 (vec_merge:VALL_F16
583 (vec_duplicate:VALL_F16
584 (vec_select:<VEL>
585 (match_operand:VALL_F16 3 "register_operand" "w")
586 (parallel
587 [(match_operand:SI 4 "immediate_operand" "i")])))
588 (match_operand:VALL_F16 1 "register_operand" "0")
589 (match_operand:SI 2 "immediate_operand" "i")))]
590 "TARGET_SIMD"
591 {
592 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
593 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
594 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
595
596 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
597 }
598 [(set_attr "type" "neon_ins<q>")]
599 )
600
601 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
602 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
603 (vec_merge:VALL_F16_NO_V2Q
604 (vec_duplicate:VALL_F16_NO_V2Q
605 (vec_select:<VEL>
606 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
607 (parallel
608 [(match_operand:SI 4 "immediate_operand" "i")])))
609 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
610 (match_operand:SI 2 "immediate_operand" "i")))]
611 "TARGET_SIMD"
612 {
613 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
614 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
615 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
616 INTVAL (operands[4])));
617
618 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
619 }
620 [(set_attr "type" "neon_ins<q>")]
621 )
622
623 (define_insn "aarch64_simd_lshr<mode>"
624 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
625 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
626 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
627 "TARGET_SIMD"
628 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
629 [(set_attr "type" "neon_shift_imm<q>")]
630 )
631
632 (define_insn "aarch64_simd_ashr<mode>"
633 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
634 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
635 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
636 "TARGET_SIMD"
637 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
638 [(set_attr "type" "neon_shift_imm<q>")]
639 )
640
641 (define_insn "aarch64_simd_imm_shl<mode>"
642 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
643 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
644 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
645 "TARGET_SIMD"
646 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
647 [(set_attr "type" "neon_shift_imm<q>")]
648 )
649
650 (define_insn "aarch64_simd_reg_sshl<mode>"
651 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
652 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
653 (match_operand:VDQ_I 2 "register_operand" "w")))]
654 "TARGET_SIMD"
655 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
656 [(set_attr "type" "neon_shift_reg<q>")]
657 )
658
659 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
660 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
661 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
662 (match_operand:VDQ_I 2 "register_operand" "w")]
663 UNSPEC_ASHIFT_UNSIGNED))]
664 "TARGET_SIMD"
665 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
666 [(set_attr "type" "neon_shift_reg<q>")]
667 )
668
669 (define_insn "aarch64_simd_reg_shl<mode>_signed"
670 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
671 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
672 (match_operand:VDQ_I 2 "register_operand" "w")]
673 UNSPEC_ASHIFT_SIGNED))]
674 "TARGET_SIMD"
675 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
676 [(set_attr "type" "neon_shift_reg<q>")]
677 )
678
679 (define_expand "ashl<mode>3"
680 [(match_operand:VDQ_I 0 "register_operand" "")
681 (match_operand:VDQ_I 1 "register_operand" "")
682 (match_operand:SI 2 "general_operand" "")]
683 "TARGET_SIMD"
684 {
685 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
686 int shift_amount;
687
688 if (CONST_INT_P (operands[2]))
689 {
690 shift_amount = INTVAL (operands[2]);
691 if (shift_amount >= 0 && shift_amount < bit_width)
692 {
693 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
694 shift_amount);
695 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
696 operands[1],
697 tmp));
698 DONE;
699 }
700 else
701 {
702 operands[2] = force_reg (SImode, operands[2]);
703 }
704 }
705 else if (MEM_P (operands[2]))
706 {
707 operands[2] = force_reg (SImode, operands[2]);
708 }
709
710 if (REG_P (operands[2]))
711 {
712 rtx tmp = gen_reg_rtx (<MODE>mode);
713 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
714 convert_to_mode (<VEL>mode,
715 operands[2],
716 0)));
717 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
718 tmp));
719 DONE;
720 }
721 else
722 FAIL;
723 }
724 )
725
726 (define_expand "lshr<mode>3"
727 [(match_operand:VDQ_I 0 "register_operand" "")
728 (match_operand:VDQ_I 1 "register_operand" "")
729 (match_operand:SI 2 "general_operand" "")]
730 "TARGET_SIMD"
731 {
732 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
733 int shift_amount;
734
735 if (CONST_INT_P (operands[2]))
736 {
737 shift_amount = INTVAL (operands[2]);
738 if (shift_amount > 0 && shift_amount <= bit_width)
739 {
740 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
741 shift_amount);
742 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
743 operands[1],
744 tmp));
745 DONE;
746 }
747 else
748 operands[2] = force_reg (SImode, operands[2]);
749 }
750 else if (MEM_P (operands[2]))
751 {
752 operands[2] = force_reg (SImode, operands[2]);
753 }
754
755 if (REG_P (operands[2]))
756 {
757 rtx tmp = gen_reg_rtx (SImode);
758 rtx tmp1 = gen_reg_rtx (<MODE>mode);
759 emit_insn (gen_negsi2 (tmp, operands[2]));
760 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
761 convert_to_mode (<VEL>mode,
762 tmp, 0)));
763 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
764 operands[1],
765 tmp1));
766 DONE;
767 }
768 else
769 FAIL;
770 }
771 )
772
773 (define_expand "ashr<mode>3"
774 [(match_operand:VDQ_I 0 "register_operand" "")
775 (match_operand:VDQ_I 1 "register_operand" "")
776 (match_operand:SI 2 "general_operand" "")]
777 "TARGET_SIMD"
778 {
779 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
780 int shift_amount;
781
782 if (CONST_INT_P (operands[2]))
783 {
784 shift_amount = INTVAL (operands[2]);
785 if (shift_amount > 0 && shift_amount <= bit_width)
786 {
787 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
788 shift_amount);
789 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
790 operands[1],
791 tmp));
792 DONE;
793 }
794 else
795 operands[2] = force_reg (SImode, operands[2]);
796 }
797 else if (MEM_P (operands[2]))
798 {
799 operands[2] = force_reg (SImode, operands[2]);
800 }
801
802 if (REG_P (operands[2]))
803 {
804 rtx tmp = gen_reg_rtx (SImode);
805 rtx tmp1 = gen_reg_rtx (<MODE>mode);
806 emit_insn (gen_negsi2 (tmp, operands[2]));
807 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
808 convert_to_mode (<VEL>mode,
809 tmp, 0)));
810 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
811 operands[1],
812 tmp1));
813 DONE;
814 }
815 else
816 FAIL;
817 }
818 )
819
820 (define_expand "vashl<mode>3"
821 [(match_operand:VDQ_I 0 "register_operand" "")
822 (match_operand:VDQ_I 1 "register_operand" "")
823 (match_operand:VDQ_I 2 "register_operand" "")]
824 "TARGET_SIMD"
825 {
826 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
827 operands[2]));
828 DONE;
829 })
830
831 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
832 ;; Negating individual lanes most certainly offsets the
833 ;; gain from vectorization.
834 (define_expand "vashr<mode>3"
835 [(match_operand:VDQ_BHSI 0 "register_operand" "")
836 (match_operand:VDQ_BHSI 1 "register_operand" "")
837 (match_operand:VDQ_BHSI 2 "register_operand" "")]
838 "TARGET_SIMD"
839 {
840 rtx neg = gen_reg_rtx (<MODE>mode);
841 emit (gen_neg<mode>2 (neg, operands[2]));
842 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
843 neg));
844 DONE;
845 })
846
847 ;; DI vector shift
848 (define_expand "aarch64_ashr_simddi"
849 [(match_operand:DI 0 "register_operand" "=w")
850 (match_operand:DI 1 "register_operand" "w")
851 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
852 "TARGET_SIMD"
853 {
854 /* An arithmetic shift right by 64 fills the result with copies of the sign
855 bit, just like asr by 63 - however the standard pattern does not handle
856 a shift by 64. */
857 if (INTVAL (operands[2]) == 64)
858 operands[2] = GEN_INT (63);
859 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
860 DONE;
861 }
862 )
863
864 (define_expand "vlshr<mode>3"
865 [(match_operand:VDQ_BHSI 0 "register_operand" "")
866 (match_operand:VDQ_BHSI 1 "register_operand" "")
867 (match_operand:VDQ_BHSI 2 "register_operand" "")]
868 "TARGET_SIMD"
869 {
870 rtx neg = gen_reg_rtx (<MODE>mode);
871 emit (gen_neg<mode>2 (neg, operands[2]));
872 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
873 neg));
874 DONE;
875 })
876
877 (define_expand "aarch64_lshr_simddi"
878 [(match_operand:DI 0 "register_operand" "=w")
879 (match_operand:DI 1 "register_operand" "w")
880 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
881 "TARGET_SIMD"
882 {
883 if (INTVAL (operands[2]) == 64)
884 emit_move_insn (operands[0], const0_rtx);
885 else
886 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
887 DONE;
888 }
889 )
890
891 (define_expand "vec_set<mode>"
892 [(match_operand:VDQ_BHSI 0 "register_operand")
893 (match_operand:<VEL> 1 "register_operand")
894 (match_operand:SI 2 "immediate_operand")]
895 "TARGET_SIMD"
896 {
897 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
898 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
899 GEN_INT (elem), operands[0]));
900 DONE;
901 }
902 )
903
904 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
905 (define_insn "vec_shr_<mode>"
906 [(set (match_operand:VD 0 "register_operand" "=w")
907 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
908 (match_operand:SI 2 "immediate_operand" "i")]
909 UNSPEC_VEC_SHR))]
910 "TARGET_SIMD"
911 {
912 if (BYTES_BIG_ENDIAN)
913 return "shl %d0, %d1, %2";
914 else
915 return "ushr %d0, %d1, %2";
916 }
917 [(set_attr "type" "neon_shift_imm")]
918 )
919
920 (define_insn "aarch64_simd_vec_setv2di"
921 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
922 (vec_merge:V2DI
923 (vec_duplicate:V2DI
924 (match_operand:DI 1 "register_operand" "r,w"))
925 (match_operand:V2DI 3 "register_operand" "0,0")
926 (match_operand:SI 2 "immediate_operand" "i,i")))]
927 "TARGET_SIMD"
928 {
929 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
930 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
931 switch (which_alternative)
932 {
933 case 0:
934 return "ins\\t%0.d[%p2], %1";
935 case 1:
936 return "ins\\t%0.d[%p2], %1.d[0]";
937 default:
938 gcc_unreachable ();
939 }
940 }
941 [(set_attr "type" "neon_from_gp, neon_ins_q")]
942 )
943
944 (define_expand "vec_setv2di"
945 [(match_operand:V2DI 0 "register_operand")
946 (match_operand:DI 1 "register_operand")
947 (match_operand:SI 2 "immediate_operand")]
948 "TARGET_SIMD"
949 {
950 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
951 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
952 GEN_INT (elem), operands[0]));
953 DONE;
954 }
955 )
956
957 (define_insn "aarch64_simd_vec_set<mode>"
958 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
959 (vec_merge:VDQF_F16
960 (vec_duplicate:VDQF_F16
961 (match_operand:<VEL> 1 "register_operand" "w"))
962 (match_operand:VDQF_F16 3 "register_operand" "0")
963 (match_operand:SI 2 "immediate_operand" "i")))]
964 "TARGET_SIMD"
965 {
966 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
967
968 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
969 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
970 }
971 [(set_attr "type" "neon_ins<q>")]
972 )
973
974 (define_expand "vec_set<mode>"
975 [(match_operand:VDQF_F16 0 "register_operand" "+w")
976 (match_operand:<VEL> 1 "register_operand" "w")
977 (match_operand:SI 2 "immediate_operand" "")]
978 "TARGET_SIMD"
979 {
980 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
981 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
982 GEN_INT (elem), operands[0]));
983 DONE;
984 }
985 )
986
987
988 (define_insn "aarch64_mla<mode>"
989 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
990 (plus:VDQ_BHSI (mult:VDQ_BHSI
991 (match_operand:VDQ_BHSI 2 "register_operand" "w")
992 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
993 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
994 "TARGET_SIMD"
995 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
996 [(set_attr "type" "neon_mla_<Vetype><q>")]
997 )
998
999 (define_insn "*aarch64_mla_elt<mode>"
1000 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1001 (plus:VDQHS
1002 (mult:VDQHS
1003 (vec_duplicate:VDQHS
1004 (vec_select:<VEL>
1005 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1006 (parallel [(match_operand:SI 2 "immediate_operand")])))
1007 (match_operand:VDQHS 3 "register_operand" "w"))
1008 (match_operand:VDQHS 4 "register_operand" "0")))]
1009 "TARGET_SIMD"
1010 {
1011 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1012 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1013 }
1014 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1015 )
1016
1017 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1018 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1019 (plus:VDQHS
1020 (mult:VDQHS
1021 (vec_duplicate:VDQHS
1022 (vec_select:<VEL>
1023 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1024 (parallel [(match_operand:SI 2 "immediate_operand")])))
1025 (match_operand:VDQHS 3 "register_operand" "w"))
1026 (match_operand:VDQHS 4 "register_operand" "0")))]
1027 "TARGET_SIMD"
1028 {
1029 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1030 INTVAL (operands[2])));
1031 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1032 }
1033 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1034 )
1035
1036 (define_insn "aarch64_mls<mode>"
1037 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1038 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1039 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1040 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1041 "TARGET_SIMD"
1042 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1043 [(set_attr "type" "neon_mla_<Vetype><q>")]
1044 )
1045
1046 (define_insn "*aarch64_mls_elt<mode>"
1047 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1048 (minus:VDQHS
1049 (match_operand:VDQHS 4 "register_operand" "0")
1050 (mult:VDQHS
1051 (vec_duplicate:VDQHS
1052 (vec_select:<VEL>
1053 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1054 (parallel [(match_operand:SI 2 "immediate_operand")])))
1055 (match_operand:VDQHS 3 "register_operand" "w"))))]
1056 "TARGET_SIMD"
1057 {
1058 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1059 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1060 }
1061 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1062 )
1063
1064 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1065 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1066 (minus:VDQHS
1067 (match_operand:VDQHS 4 "register_operand" "0")
1068 (mult:VDQHS
1069 (vec_duplicate:VDQHS
1070 (vec_select:<VEL>
1071 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1072 (parallel [(match_operand:SI 2 "immediate_operand")])))
1073 (match_operand:VDQHS 3 "register_operand" "w"))))]
1074 "TARGET_SIMD"
1075 {
1076 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1077 INTVAL (operands[2])));
1078 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1079 }
1080 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1081 )
1082
1083 ;; Max/Min operations.
1084 (define_insn "<su><maxmin><mode>3"
1085 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1086 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1087 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1088 "TARGET_SIMD"
1089 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1090 [(set_attr "type" "neon_minmax<q>")]
1091 )
1092
1093 (define_expand "<su><maxmin>v2di3"
1094 [(set (match_operand:V2DI 0 "register_operand" "")
1095 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1096 (match_operand:V2DI 2 "register_operand" "")))]
1097 "TARGET_SIMD"
1098 {
1099 enum rtx_code cmp_operator;
1100 rtx cmp_fmt;
1101
1102 switch (<CODE>)
1103 {
1104 case UMIN:
1105 cmp_operator = LTU;
1106 break;
1107 case SMIN:
1108 cmp_operator = LT;
1109 break;
1110 case UMAX:
1111 cmp_operator = GTU;
1112 break;
1113 case SMAX:
1114 cmp_operator = GT;
1115 break;
1116 default:
1117 gcc_unreachable ();
1118 }
1119
1120 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1121 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1122 operands[2], cmp_fmt, operands[1], operands[2]));
1123 DONE;
1124 })
1125
1126 ;; Pairwise Integer Max/Min operations.
1127 (define_insn "aarch64_<maxmin_uns>p<mode>"
1128 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1129 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1130 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1131 MAXMINV))]
1132 "TARGET_SIMD"
1133 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1134 [(set_attr "type" "neon_minmax<q>")]
1135 )
1136
1137 ;; Pairwise FP Max/Min operations.
1138 (define_insn "aarch64_<maxmin_uns>p<mode>"
1139 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1140 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1141 (match_operand:VHSDF 2 "register_operand" "w")]
1142 FMAXMINV))]
1143 "TARGET_SIMD"
1144 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1145 [(set_attr "type" "neon_minmax<q>")]
1146 )
1147
1148 ;; vec_concat gives a new vector with the low elements from operand 1, and
1149 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1150 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1151 ;; What that means, is that the RTL descriptions of the below patterns
1152 ;; need to change depending on endianness.
1153
1154 ;; Move to the low architectural bits of the register.
1155 ;; On little-endian this is { operand, zeroes }
1156 ;; On big-endian this is { zeroes, operand }
1157
1158 (define_insn "move_lo_quad_internal_<mode>"
1159 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1160 (vec_concat:VQ_NO2E
1161 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1162 (vec_duplicate:<VHALF> (const_int 0))))]
1163 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1164 "@
1165 dup\\t%d0, %1.d[0]
1166 fmov\\t%d0, %1
1167 dup\\t%d0, %1"
1168 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1169 (set_attr "simd" "yes,*,yes")
1170 (set_attr "fp" "*,yes,*")
1171 (set_attr "length" "4")]
1172 )
1173
1174 (define_insn "move_lo_quad_internal_<mode>"
1175 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1176 (vec_concat:VQ_2E
1177 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1178 (const_int 0)))]
1179 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1180 "@
1181 dup\\t%d0, %1.d[0]
1182 fmov\\t%d0, %1
1183 dup\\t%d0, %1"
1184 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1185 (set_attr "simd" "yes,*,yes")
1186 (set_attr "fp" "*,yes,*")
1187 (set_attr "length" "4")]
1188 )
1189
1190 (define_insn "move_lo_quad_internal_be_<mode>"
1191 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1192 (vec_concat:VQ_NO2E
1193 (vec_duplicate:<VHALF> (const_int 0))
1194 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1195 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1196 "@
1197 dup\\t%d0, %1.d[0]
1198 fmov\\t%d0, %1
1199 dup\\t%d0, %1"
1200 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1201 (set_attr "simd" "yes,*,yes")
1202 (set_attr "fp" "*,yes,*")
1203 (set_attr "length" "4")]
1204 )
1205
1206 (define_insn "move_lo_quad_internal_be_<mode>"
1207 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1208 (vec_concat:VQ_2E
1209 (const_int 0)
1210 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1211 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1212 "@
1213 dup\\t%d0, %1.d[0]
1214 fmov\\t%d0, %1
1215 dup\\t%d0, %1"
1216 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1217 (set_attr "simd" "yes,*,yes")
1218 (set_attr "fp" "*,yes,*")
1219 (set_attr "length" "4")]
1220 )
1221
1222 (define_expand "move_lo_quad_<mode>"
1223 [(match_operand:VQ 0 "register_operand")
1224 (match_operand:VQ 1 "register_operand")]
1225 "TARGET_SIMD"
1226 {
1227 if (BYTES_BIG_ENDIAN)
1228 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1229 else
1230 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1231 DONE;
1232 }
1233 )
1234
1235 ;; Move operand1 to the high architectural bits of the register, keeping
1236 ;; the low architectural bits of operand2.
1237 ;; For little-endian this is { operand2, operand1 }
1238 ;; For big-endian this is { operand1, operand2 }
1239
1240 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1241 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1242 (vec_concat:VQ
1243 (vec_select:<VHALF>
1244 (match_dup 0)
1245 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1246 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1247 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1248 "@
1249 ins\\t%0.d[1], %1.d[0]
1250 ins\\t%0.d[1], %1"
1251 [(set_attr "type" "neon_ins")]
1252 )
1253
1254 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1255 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1256 (vec_concat:VQ
1257 (match_operand:<VHALF> 1 "register_operand" "w,r")
1258 (vec_select:<VHALF>
1259 (match_dup 0)
1260 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1261 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1262 "@
1263 ins\\t%0.d[1], %1.d[0]
1264 ins\\t%0.d[1], %1"
1265 [(set_attr "type" "neon_ins")]
1266 )
1267
1268 (define_expand "move_hi_quad_<mode>"
1269 [(match_operand:VQ 0 "register_operand" "")
1270 (match_operand:<VHALF> 1 "register_operand" "")]
1271 "TARGET_SIMD"
1272 {
1273 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1274 if (BYTES_BIG_ENDIAN)
1275 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1276 operands[1], p));
1277 else
1278 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1279 operands[1], p));
1280 DONE;
1281 })
1282
1283 ;; Narrowing operations.
1284
1285 ;; For doubles.
1286 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1287 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1288 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1289 "TARGET_SIMD"
1290 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1291 [(set_attr "type" "neon_shift_imm_narrow_q")]
1292 )
1293
1294 (define_expand "vec_pack_trunc_<mode>"
1295 [(match_operand:<VNARROWD> 0 "register_operand" "")
1296 (match_operand:VDN 1 "register_operand" "")
1297 (match_operand:VDN 2 "register_operand" "")]
1298 "TARGET_SIMD"
1299 {
1300 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1301 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1302 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1303
1304 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1305 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1306 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1307 DONE;
1308 })
1309
1310 ;; For quads.
1311
1312 (define_insn "vec_pack_trunc_<mode>"
1313 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1314 (vec_concat:<VNARROWQ2>
1315 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1316 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1317 "TARGET_SIMD"
1318 {
1319 if (BYTES_BIG_ENDIAN)
1320 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1321 else
1322 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1323 }
1324 [(set_attr "type" "multiple")
1325 (set_attr "length" "8")]
1326 )
1327
1328 ;; Widening operations.
1329
1330 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1331 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1332 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1333 (match_operand:VQW 1 "register_operand" "w")
1334 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1335 )))]
1336 "TARGET_SIMD"
1337 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1338 [(set_attr "type" "neon_shift_imm_long")]
1339 )
1340
1341 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1342 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1343 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1344 (match_operand:VQW 1 "register_operand" "w")
1345 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1346 )))]
1347 "TARGET_SIMD"
1348 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1349 [(set_attr "type" "neon_shift_imm_long")]
1350 )
1351
1352 (define_expand "vec_unpack<su>_hi_<mode>"
1353 [(match_operand:<VWIDE> 0 "register_operand" "")
1354 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1355 "TARGET_SIMD"
1356 {
1357 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1358 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1359 operands[1], p));
1360 DONE;
1361 }
1362 )
1363
1364 (define_expand "vec_unpack<su>_lo_<mode>"
1365 [(match_operand:<VWIDE> 0 "register_operand" "")
1366 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1367 "TARGET_SIMD"
1368 {
1369 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1370 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1371 operands[1], p));
1372 DONE;
1373 }
1374 )
1375
1376 ;; Widening arithmetic.
1377
1378 (define_insn "*aarch64_<su>mlal_lo<mode>"
1379 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1380 (plus:<VWIDE>
1381 (mult:<VWIDE>
1382 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1383 (match_operand:VQW 2 "register_operand" "w")
1384 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1385 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1386 (match_operand:VQW 4 "register_operand" "w")
1387 (match_dup 3))))
1388 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1389 "TARGET_SIMD"
1390 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1391 [(set_attr "type" "neon_mla_<Vetype>_long")]
1392 )
1393
1394 (define_insn "*aarch64_<su>mlal_hi<mode>"
1395 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1396 (plus:<VWIDE>
1397 (mult:<VWIDE>
1398 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1399 (match_operand:VQW 2 "register_operand" "w")
1400 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1401 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1402 (match_operand:VQW 4 "register_operand" "w")
1403 (match_dup 3))))
1404 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1405 "TARGET_SIMD"
1406 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1407 [(set_attr "type" "neon_mla_<Vetype>_long")]
1408 )
1409
1410 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1411 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1412 (minus:<VWIDE>
1413 (match_operand:<VWIDE> 1 "register_operand" "0")
1414 (mult:<VWIDE>
1415 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1416 (match_operand:VQW 2 "register_operand" "w")
1417 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1418 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1419 (match_operand:VQW 4 "register_operand" "w")
1420 (match_dup 3))))))]
1421 "TARGET_SIMD"
1422 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1423 [(set_attr "type" "neon_mla_<Vetype>_long")]
1424 )
1425
1426 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1427 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1428 (minus:<VWIDE>
1429 (match_operand:<VWIDE> 1 "register_operand" "0")
1430 (mult:<VWIDE>
1431 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1432 (match_operand:VQW 2 "register_operand" "w")
1433 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1434 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1435 (match_operand:VQW 4 "register_operand" "w")
1436 (match_dup 3))))))]
1437 "TARGET_SIMD"
1438 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1439 [(set_attr "type" "neon_mla_<Vetype>_long")]
1440 )
1441
1442 (define_insn "*aarch64_<su>mlal<mode>"
1443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1444 (plus:<VWIDE>
1445 (mult:<VWIDE>
1446 (ANY_EXTEND:<VWIDE>
1447 (match_operand:VD_BHSI 1 "register_operand" "w"))
1448 (ANY_EXTEND:<VWIDE>
1449 (match_operand:VD_BHSI 2 "register_operand" "w")))
1450 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1451 "TARGET_SIMD"
1452 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1453 [(set_attr "type" "neon_mla_<Vetype>_long")]
1454 )
1455
1456 (define_insn "*aarch64_<su>mlsl<mode>"
1457 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1458 (minus:<VWIDE>
1459 (match_operand:<VWIDE> 1 "register_operand" "0")
1460 (mult:<VWIDE>
1461 (ANY_EXTEND:<VWIDE>
1462 (match_operand:VD_BHSI 2 "register_operand" "w"))
1463 (ANY_EXTEND:<VWIDE>
1464 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1465 "TARGET_SIMD"
1466 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1467 [(set_attr "type" "neon_mla_<Vetype>_long")]
1468 )
1469
1470 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1471 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1472 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1473 (match_operand:VQW 1 "register_operand" "w")
1474 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1475 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1476 (match_operand:VQW 2 "register_operand" "w")
1477 (match_dup 3)))))]
1478 "TARGET_SIMD"
1479 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1480 [(set_attr "type" "neon_mul_<Vetype>_long")]
1481 )
1482
1483 (define_expand "vec_widen_<su>mult_lo_<mode>"
1484 [(match_operand:<VWIDE> 0 "register_operand" "")
1485 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1486 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1487 "TARGET_SIMD"
1488 {
1489 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1490 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1491 operands[1],
1492 operands[2], p));
1493 DONE;
1494 }
1495 )
1496
1497 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1498 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1499 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1500 (match_operand:VQW 1 "register_operand" "w")
1501 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1502 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1503 (match_operand:VQW 2 "register_operand" "w")
1504 (match_dup 3)))))]
1505 "TARGET_SIMD"
1506 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1507 [(set_attr "type" "neon_mul_<Vetype>_long")]
1508 )
1509
1510 (define_expand "vec_widen_<su>mult_hi_<mode>"
1511 [(match_operand:<VWIDE> 0 "register_operand" "")
1512 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1513 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1514 "TARGET_SIMD"
1515 {
1516 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1517 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1518 operands[1],
1519 operands[2], p));
1520 DONE;
1521
1522 }
1523 )
1524
1525 ;; FP vector operations.
1526 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1527 ;; double-precision (64-bit) floating-point data types and arithmetic as
1528 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1529 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1530 ;;
1531 ;; Floating-point operations can raise an exception. Vectorizing such
1532 ;; operations are safe because of reasons explained below.
1533 ;;
1534 ;; ARMv8 permits an extension to enable trapped floating-point
1535 ;; exception handling, however this is an optional feature. In the
1536 ;; event of a floating-point exception being raised by vectorised
1537 ;; code then:
1538 ;; 1. If trapped floating-point exceptions are available, then a trap
1539 ;; will be taken when any lane raises an enabled exception. A trap
1540 ;; handler may determine which lane raised the exception.
1541 ;; 2. Alternatively a sticky exception flag is set in the
1542 ;; floating-point status register (FPSR). Software may explicitly
1543 ;; test the exception flags, in which case the tests will either
1544 ;; prevent vectorisation, allowing precise identification of the
1545 ;; failing operation, or if tested outside of vectorisable regions
1546 ;; then the specific operation and lane are not of interest.
1547
1548 ;; FP arithmetic operations.
1549
1550 (define_insn "add<mode>3"
1551 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1552 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1553 (match_operand:VHSDF 2 "register_operand" "w")))]
1554 "TARGET_SIMD"
1555 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1556 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1557 )
1558
1559 (define_insn "sub<mode>3"
1560 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1561 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1562 (match_operand:VHSDF 2 "register_operand" "w")))]
1563 "TARGET_SIMD"
1564 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1565 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1566 )
1567
1568 (define_insn "mul<mode>3"
1569 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1570 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1571 (match_operand:VHSDF 2 "register_operand" "w")))]
1572 "TARGET_SIMD"
1573 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1574 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1575 )
1576
1577 (define_expand "div<mode>3"
1578 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1579 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1580 (match_operand:VHSDF 2 "register_operand" "w")))]
1581 "TARGET_SIMD"
1582 {
1583 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1584 DONE;
1585
1586 operands[1] = force_reg (<MODE>mode, operands[1]);
1587 })
1588
1589 (define_insn "*div<mode>3"
1590 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1591 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1592 (match_operand:VHSDF 2 "register_operand" "w")))]
1593 "TARGET_SIMD"
1594 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1595 [(set_attr "type" "neon_fp_div_<stype><q>")]
1596 )
1597
1598 (define_insn "neg<mode>2"
1599 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1600 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1601 "TARGET_SIMD"
1602 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1603 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1604 )
1605
1606 (define_insn "abs<mode>2"
1607 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1608 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1609 "TARGET_SIMD"
1610 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1611 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1612 )
1613
1614 (define_insn "fma<mode>4"
1615 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1616 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1617 (match_operand:VHSDF 2 "register_operand" "w")
1618 (match_operand:VHSDF 3 "register_operand" "0")))]
1619 "TARGET_SIMD"
1620 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1621 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1622 )
1623
1624 (define_insn "*aarch64_fma4_elt<mode>"
1625 [(set (match_operand:VDQF 0 "register_operand" "=w")
1626 (fma:VDQF
1627 (vec_duplicate:VDQF
1628 (vec_select:<VEL>
1629 (match_operand:VDQF 1 "register_operand" "<h_con>")
1630 (parallel [(match_operand:SI 2 "immediate_operand")])))
1631 (match_operand:VDQF 3 "register_operand" "w")
1632 (match_operand:VDQF 4 "register_operand" "0")))]
1633 "TARGET_SIMD"
1634 {
1635 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1636 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1637 }
1638 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1639 )
1640
1641 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1642 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1643 (fma:VDQSF
1644 (vec_duplicate:VDQSF
1645 (vec_select:<VEL>
1646 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1647 (parallel [(match_operand:SI 2 "immediate_operand")])))
1648 (match_operand:VDQSF 3 "register_operand" "w")
1649 (match_operand:VDQSF 4 "register_operand" "0")))]
1650 "TARGET_SIMD"
1651 {
1652 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1653 INTVAL (operands[2])));
1654 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1655 }
1656 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1657 )
1658
1659 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1660 [(set (match_operand:VMUL 0 "register_operand" "=w")
1661 (fma:VMUL
1662 (vec_duplicate:VMUL
1663 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1664 (match_operand:VMUL 2 "register_operand" "w")
1665 (match_operand:VMUL 3 "register_operand" "0")))]
1666 "TARGET_SIMD"
1667 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1668 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1669 )
1670
1671 (define_insn "*aarch64_fma4_elt_to_64v2df"
1672 [(set (match_operand:DF 0 "register_operand" "=w")
1673 (fma:DF
1674 (vec_select:DF
1675 (match_operand:V2DF 1 "register_operand" "w")
1676 (parallel [(match_operand:SI 2 "immediate_operand")]))
1677 (match_operand:DF 3 "register_operand" "w")
1678 (match_operand:DF 4 "register_operand" "0")))]
1679 "TARGET_SIMD"
1680 {
1681 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1682 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1683 }
1684 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1685 )
1686
1687 (define_insn "fnma<mode>4"
1688 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1689 (fma:VHSDF
1690 (match_operand:VHSDF 1 "register_operand" "w")
1691 (neg:VHSDF
1692 (match_operand:VHSDF 2 "register_operand" "w"))
1693 (match_operand:VHSDF 3 "register_operand" "0")))]
1694 "TARGET_SIMD"
1695 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1696 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1697 )
1698
1699 (define_insn "*aarch64_fnma4_elt<mode>"
1700 [(set (match_operand:VDQF 0 "register_operand" "=w")
1701 (fma:VDQF
1702 (neg:VDQF
1703 (match_operand:VDQF 3 "register_operand" "w"))
1704 (vec_duplicate:VDQF
1705 (vec_select:<VEL>
1706 (match_operand:VDQF 1 "register_operand" "<h_con>")
1707 (parallel [(match_operand:SI 2 "immediate_operand")])))
1708 (match_operand:VDQF 4 "register_operand" "0")))]
1709 "TARGET_SIMD"
1710 {
1711 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1712 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1713 }
1714 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1715 )
1716
1717 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1718 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1719 (fma:VDQSF
1720 (neg:VDQSF
1721 (match_operand:VDQSF 3 "register_operand" "w"))
1722 (vec_duplicate:VDQSF
1723 (vec_select:<VEL>
1724 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1725 (parallel [(match_operand:SI 2 "immediate_operand")])))
1726 (match_operand:VDQSF 4 "register_operand" "0")))]
1727 "TARGET_SIMD"
1728 {
1729 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1730 INTVAL (operands[2])));
1731 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1732 }
1733 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1734 )
1735
1736 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1737 [(set (match_operand:VMUL 0 "register_operand" "=w")
1738 (fma:VMUL
1739 (neg:VMUL
1740 (match_operand:VMUL 2 "register_operand" "w"))
1741 (vec_duplicate:VMUL
1742 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1743 (match_operand:VMUL 3 "register_operand" "0")))]
1744 "TARGET_SIMD"
1745 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1746 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1747 )
1748
1749 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1750 [(set (match_operand:DF 0 "register_operand" "=w")
1751 (fma:DF
1752 (vec_select:DF
1753 (match_operand:V2DF 1 "register_operand" "w")
1754 (parallel [(match_operand:SI 2 "immediate_operand")]))
1755 (neg:DF
1756 (match_operand:DF 3 "register_operand" "w"))
1757 (match_operand:DF 4 "register_operand" "0")))]
1758 "TARGET_SIMD"
1759 {
1760 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1761 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1762 }
1763 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1764 )
1765
1766 ;; Vector versions of the floating-point frint patterns.
1767 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1768 (define_insn "<frint_pattern><mode>2"
1769 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1770 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1771 FRINT))]
1772 "TARGET_SIMD"
1773 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1774 [(set_attr "type" "neon_fp_round_<stype><q>")]
1775 )
1776
1777 ;; Vector versions of the fcvt standard patterns.
1778 ;; Expands to lbtrunc, lround, lceil, lfloor
1779 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1780 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1781 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1782 [(match_operand:VHSDF 1 "register_operand" "w")]
1783 FCVT)))]
1784 "TARGET_SIMD"
1785 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1786 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1787 )
1788
1789 ;; HF Scalar variants of related SIMD instructions.
1790 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1791 [(set (match_operand:HI 0 "register_operand" "=w")
1792 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1793 FCVT)))]
1794 "TARGET_SIMD_F16INST"
1795 "fcvt<frint_suffix><su>\t%h0, %h1"
1796 [(set_attr "type" "neon_fp_to_int_s")]
1797 )
1798
1799 (define_insn "<optab>_trunchfhi2"
1800 [(set (match_operand:HI 0 "register_operand" "=w")
1801 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1802 "TARGET_SIMD_F16INST"
1803 "fcvtz<su>\t%h0, %h1"
1804 [(set_attr "type" "neon_fp_to_int_s")]
1805 )
1806
1807 (define_insn "<optab>hihf2"
1808 [(set (match_operand:HF 0 "register_operand" "=w")
1809 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1810 "TARGET_SIMD_F16INST"
1811 "<su_optab>cvtf\t%h0, %h1"
1812 [(set_attr "type" "neon_int_to_fp_s")]
1813 )
1814
1815 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1816 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1817 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1818 [(mult:VDQF
1819 (match_operand:VDQF 1 "register_operand" "w")
1820 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1821 UNSPEC_FRINTZ)))]
1822 "TARGET_SIMD
1823 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1824 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1825 {
1826 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1827 char buf[64];
1828 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1829 output_asm_insn (buf, operands);
1830 return "";
1831 }
1832 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1833 )
1834
1835 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1836 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1837 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1838 [(match_operand:VHSDF 1 "register_operand")]
1839 UNSPEC_FRINTZ)))]
1840 "TARGET_SIMD"
1841 {})
1842
1843 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1844 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1845 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1846 [(match_operand:VHSDF 1 "register_operand")]
1847 UNSPEC_FRINTZ)))]
1848 "TARGET_SIMD"
1849 {})
1850
1851 (define_expand "ftrunc<VHSDF:mode>2"
1852 [(set (match_operand:VHSDF 0 "register_operand")
1853 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1854 UNSPEC_FRINTZ))]
1855 "TARGET_SIMD"
1856 {})
1857
1858 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1859 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1860 (FLOATUORS:VHSDF
1861 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1862 "TARGET_SIMD"
1863 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1864 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1865 )
1866
1867 ;; Conversions between vectors of floats and doubles.
1868 ;; Contains a mix of patterns to match standard pattern names
1869 ;; and those for intrinsics.
1870
1871 ;; Float widening operations.
1872
1873 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1874 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1875 (float_extend:<VWIDE> (vec_select:<VHALF>
1876 (match_operand:VQ_HSF 1 "register_operand" "w")
1877 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1878 )))]
1879 "TARGET_SIMD"
1880 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1881 [(set_attr "type" "neon_fp_cvt_widen_s")]
1882 )
1883
1884 ;; Convert between fixed-point and floating-point (vector modes)
1885
1886 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1887 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1888 (unspec:<VHSDF:FCVT_TARGET>
1889 [(match_operand:VHSDF 1 "register_operand" "w")
1890 (match_operand:SI 2 "immediate_operand" "i")]
1891 FCVT_F2FIXED))]
1892 "TARGET_SIMD"
1893 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1894 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1895 )
1896
1897 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1898 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1899 (unspec:<VDQ_HSDI:FCVT_TARGET>
1900 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1901 (match_operand:SI 2 "immediate_operand" "i")]
1902 FCVT_FIXED2F))]
1903 "TARGET_SIMD"
1904 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1905 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1906 )
1907
1908 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1909 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1910 ;; the meaning of HI and LO changes depending on the target endianness.
1911 ;; While elsewhere we map the higher numbered elements of a vector to
1912 ;; the lower architectural lanes of the vector, for these patterns we want
1913 ;; to always treat "hi" as referring to the higher architectural lanes.
1914 ;; Consequently, while the patterns below look inconsistent with our
1915 ;; other big-endian patterns their behavior is as required.
1916
1917 (define_expand "vec_unpacks_lo_<mode>"
1918 [(match_operand:<VWIDE> 0 "register_operand" "")
1919 (match_operand:VQ_HSF 1 "register_operand" "")]
1920 "TARGET_SIMD"
1921 {
1922 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1923 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1924 operands[1], p));
1925 DONE;
1926 }
1927 )
1928
1929 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1930 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1931 (float_extend:<VWIDE> (vec_select:<VHALF>
1932 (match_operand:VQ_HSF 1 "register_operand" "w")
1933 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1934 )))]
1935 "TARGET_SIMD"
1936 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1937 [(set_attr "type" "neon_fp_cvt_widen_s")]
1938 )
1939
1940 (define_expand "vec_unpacks_hi_<mode>"
1941 [(match_operand:<VWIDE> 0 "register_operand" "")
1942 (match_operand:VQ_HSF 1 "register_operand" "")]
1943 "TARGET_SIMD"
1944 {
1945 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1946 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1947 operands[1], p));
1948 DONE;
1949 }
1950 )
1951 (define_insn "aarch64_float_extend_lo_<Vwide>"
1952 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1953 (float_extend:<VWIDE>
1954 (match_operand:VDF 1 "register_operand" "w")))]
1955 "TARGET_SIMD"
1956 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1957 [(set_attr "type" "neon_fp_cvt_widen_s")]
1958 )
1959
1960 ;; Float narrowing operations.
1961
1962 (define_insn "aarch64_float_truncate_lo_<mode>"
1963 [(set (match_operand:VDF 0 "register_operand" "=w")
1964 (float_truncate:VDF
1965 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1966 "TARGET_SIMD"
1967 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1968 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1969 )
1970
1971 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1972 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1973 (vec_concat:<VDBL>
1974 (match_operand:VDF 1 "register_operand" "0")
1975 (float_truncate:VDF
1976 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1977 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1978 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1979 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1980 )
1981
1982 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1983 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1984 (vec_concat:<VDBL>
1985 (float_truncate:VDF
1986 (match_operand:<VWIDE> 2 "register_operand" "w"))
1987 (match_operand:VDF 1 "register_operand" "0")))]
1988 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1989 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1990 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1991 )
1992
1993 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1994 [(match_operand:<VDBL> 0 "register_operand" "=w")
1995 (match_operand:VDF 1 "register_operand" "0")
1996 (match_operand:<VWIDE> 2 "register_operand" "w")]
1997 "TARGET_SIMD"
1998 {
1999 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2000 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2001 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2002 emit_insn (gen (operands[0], operands[1], operands[2]));
2003 DONE;
2004 }
2005 )
2006
2007 (define_expand "vec_pack_trunc_v2df"
2008 [(set (match_operand:V4SF 0 "register_operand")
2009 (vec_concat:V4SF
2010 (float_truncate:V2SF
2011 (match_operand:V2DF 1 "register_operand"))
2012 (float_truncate:V2SF
2013 (match_operand:V2DF 2 "register_operand"))
2014 ))]
2015 "TARGET_SIMD"
2016 {
2017 rtx tmp = gen_reg_rtx (V2SFmode);
2018 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2019 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2020
2021 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2022 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2023 tmp, operands[hi]));
2024 DONE;
2025 }
2026 )
2027
2028 (define_expand "vec_pack_trunc_df"
2029 [(set (match_operand:V2SF 0 "register_operand")
2030 (vec_concat:V2SF
2031 (float_truncate:SF
2032 (match_operand:DF 1 "register_operand"))
2033 (float_truncate:SF
2034 (match_operand:DF 2 "register_operand"))
2035 ))]
2036 "TARGET_SIMD"
2037 {
2038 rtx tmp = gen_reg_rtx (V2SFmode);
2039 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2040 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2041
2042 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2043 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2044 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2045 DONE;
2046 }
2047 )
2048
2049 ;; FP Max/Min
2050 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2051 ;; expression like:
2052 ;; a = (b < c) ? b : c;
2053 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2054 ;; either explicitly or indirectly via -ffast-math.
2055 ;;
2056 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2057 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2058 ;; operand will be returned when both operands are zero (i.e. they may not
2059 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2060 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2061 ;; NaNs.
2062
2063 (define_insn "<su><maxmin><mode>3"
2064 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2065 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2066 (match_operand:VHSDF 2 "register_operand" "w")))]
2067 "TARGET_SIMD"
2068 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2069 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2070 )
2071
2072 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2073 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2074 ;; which implement the IEEE fmax ()/fmin () functions.
2075 (define_insn "<maxmin_uns><mode>3"
2076 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2077 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2078 (match_operand:VHSDF 2 "register_operand" "w")]
2079 FMAXMIN_UNS))]
2080 "TARGET_SIMD"
2081 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2082 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2083 )
2084
2085 ;; 'across lanes' add.
2086
2087 (define_expand "reduc_plus_scal_<mode>"
2088 [(match_operand:<VEL> 0 "register_operand" "=w")
2089 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2090 UNSPEC_ADDV)]
2091 "TARGET_SIMD"
2092 {
2093 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2094 rtx scratch = gen_reg_rtx (<MODE>mode);
2095 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2096 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2097 DONE;
2098 }
2099 )
2100
2101 (define_insn "aarch64_faddp<mode>"
2102 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2103 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2104 (match_operand:VHSDF 2 "register_operand" "w")]
2105 UNSPEC_FADDV))]
2106 "TARGET_SIMD"
2107 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2108 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2109 )
2110
2111 (define_insn "aarch64_reduc_plus_internal<mode>"
2112 [(set (match_operand:VDQV 0 "register_operand" "=w")
2113 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2114 UNSPEC_ADDV))]
2115 "TARGET_SIMD"
2116 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2117 [(set_attr "type" "neon_reduc_add<q>")]
2118 )
2119
2120 (define_insn "aarch64_reduc_plus_internalv2si"
2121 [(set (match_operand:V2SI 0 "register_operand" "=w")
2122 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2123 UNSPEC_ADDV))]
2124 "TARGET_SIMD"
2125 "addp\\t%0.2s, %1.2s, %1.2s"
2126 [(set_attr "type" "neon_reduc_add")]
2127 )
2128
2129 (define_insn "reduc_plus_scal_<mode>"
2130 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2131 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2132 UNSPEC_FADDV))]
2133 "TARGET_SIMD"
2134 "faddp\\t%<Vetype>0, %1.<Vtype>"
2135 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2136 )
2137
2138 (define_expand "reduc_plus_scal_v4sf"
2139 [(set (match_operand:SF 0 "register_operand")
2140 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2141 UNSPEC_FADDV))]
2142 "TARGET_SIMD"
2143 {
2144 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2145 rtx scratch = gen_reg_rtx (V4SFmode);
2146 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2147 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2148 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2149 DONE;
2150 })
2151
2152 (define_insn "clrsb<mode>2"
2153 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2154 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2155 "TARGET_SIMD"
2156 "cls\\t%0.<Vtype>, %1.<Vtype>"
2157 [(set_attr "type" "neon_cls<q>")]
2158 )
2159
2160 (define_insn "clz<mode>2"
2161 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2162 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2163 "TARGET_SIMD"
2164 "clz\\t%0.<Vtype>, %1.<Vtype>"
2165 [(set_attr "type" "neon_cls<q>")]
2166 )
2167
2168 (define_insn "popcount<mode>2"
2169 [(set (match_operand:VB 0 "register_operand" "=w")
2170 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2171 "TARGET_SIMD"
2172 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2173 [(set_attr "type" "neon_cnt<q>")]
2174 )
2175
2176 ;; 'across lanes' max and min ops.
2177
2178 ;; Template for outputting a scalar, so we can create __builtins which can be
2179 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2180 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2181 [(match_operand:<VEL> 0 "register_operand")
2182 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2183 FMAXMINV)]
2184 "TARGET_SIMD"
2185 {
2186 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2187 rtx scratch = gen_reg_rtx (<MODE>mode);
2188 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2189 operands[1]));
2190 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2191 DONE;
2192 }
2193 )
2194
2195 ;; Likewise for integer cases, signed and unsigned.
2196 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2197 [(match_operand:<VEL> 0 "register_operand")
2198 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2199 MAXMINV)]
2200 "TARGET_SIMD"
2201 {
2202 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2203 rtx scratch = gen_reg_rtx (<MODE>mode);
2204 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2205 operands[1]));
2206 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2207 DONE;
2208 }
2209 )
2210
2211 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2212 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2213 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2214 MAXMINV))]
2215 "TARGET_SIMD"
2216 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2217 [(set_attr "type" "neon_reduc_minmax<q>")]
2218 )
2219
2220 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2221 [(set (match_operand:V2SI 0 "register_operand" "=w")
2222 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2223 MAXMINV))]
2224 "TARGET_SIMD"
2225 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2226 [(set_attr "type" "neon_reduc_minmax")]
2227 )
2228
2229 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2230 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2231 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2232 FMAXMINV))]
2233 "TARGET_SIMD"
2234 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2235 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2236 )
2237
2238 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2239 ;; allocation.
2240 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2241 ;; to select.
2242 ;;
2243 ;; Thus our BSL is of the form:
2244 ;; op0 = bsl (mask, op2, op3)
2245 ;; We can use any of:
2246 ;;
2247 ;; if (op0 = mask)
2248 ;; bsl mask, op1, op2
2249 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2250 ;; bit op0, op2, mask
2251 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2252 ;; bif op0, op1, mask
2253 ;;
2254 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2255 ;; Some forms of straight-line code may generate the equivalent form
2256 ;; in *aarch64_simd_bsl<mode>_alt.
2257
2258 (define_insn "aarch64_simd_bsl<mode>_internal"
2259 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2260 (xor:VSDQ_I_DI
2261 (and:VSDQ_I_DI
2262 (xor:VSDQ_I_DI
2263 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2264 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2265 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2266 (match_dup:<V_cmp_result> 3)
2267 ))]
2268 "TARGET_SIMD"
2269 "@
2270 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2271 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2272 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2273 [(set_attr "type" "neon_bsl<q>")]
2274 )
2275
2276 ;; We need this form in addition to the above pattern to match the case
2277 ;; when combine tries merging three insns such that the second operand of
2278 ;; the outer XOR matches the second operand of the inner XOR rather than
2279 ;; the first. The two are equivalent but since recog doesn't try all
2280 ;; permutations of commutative operations, we have to have a separate pattern.
2281
2282 (define_insn "*aarch64_simd_bsl<mode>_alt"
2283 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2284 (xor:VSDQ_I_DI
2285 (and:VSDQ_I_DI
2286 (xor:VSDQ_I_DI
2287 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2288 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2289 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2290 (match_dup:VSDQ_I_DI 2)))]
2291 "TARGET_SIMD"
2292 "@
2293 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2294 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2295 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2296 [(set_attr "type" "neon_bsl<q>")]
2297 )
2298
2299 (define_expand "aarch64_simd_bsl<mode>"
2300 [(match_operand:VALLDIF 0 "register_operand")
2301 (match_operand:<V_cmp_result> 1 "register_operand")
2302 (match_operand:VALLDIF 2 "register_operand")
2303 (match_operand:VALLDIF 3 "register_operand")]
2304 "TARGET_SIMD"
2305 {
2306 /* We can't alias operands together if they have different modes. */
2307 rtx tmp = operands[0];
2308 if (FLOAT_MODE_P (<MODE>mode))
2309 {
2310 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2311 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2312 tmp = gen_reg_rtx (<V_cmp_result>mode);
2313 }
2314 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2315 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2316 operands[1],
2317 operands[2],
2318 operands[3]));
2319 if (tmp != operands[0])
2320 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2321
2322 DONE;
2323 })
2324
2325 (define_expand "vcond_mask_<mode><v_cmp_result>"
2326 [(match_operand:VALLDI 0 "register_operand")
2327 (match_operand:VALLDI 1 "nonmemory_operand")
2328 (match_operand:VALLDI 2 "nonmemory_operand")
2329 (match_operand:<V_cmp_result> 3 "register_operand")]
2330 "TARGET_SIMD"
2331 {
2332 /* If we have (a = (P) ? -1 : 0);
2333 Then we can simply move the generated mask (result must be int). */
2334 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2335 && operands[2] == CONST0_RTX (<MODE>mode))
2336 emit_move_insn (operands[0], operands[3]);
2337 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2338 else if (operands[1] == CONST0_RTX (<MODE>mode)
2339 && operands[2] == CONSTM1_RTX (<MODE>mode))
2340 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[3]));
2341 else
2342 {
2343 if (!REG_P (operands[1]))
2344 operands[1] = force_reg (<MODE>mode, operands[1]);
2345 if (!REG_P (operands[2]))
2346 operands[2] = force_reg (<MODE>mode, operands[2]);
2347 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2348 operands[1], operands[2]));
2349 }
2350
2351 DONE;
2352 })
2353
2354 ;; Patterns comparing two vectors to produce a mask.
2355
2356 (define_expand "vec_cmp<mode><mode>"
2357 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2358 (match_operator 1 "comparison_operator"
2359 [(match_operand:VSDQ_I_DI 2 "register_operand")
2360 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2361 "TARGET_SIMD"
2362 {
2363 rtx mask = operands[0];
2364 enum rtx_code code = GET_CODE (operands[1]);
2365
2366 switch (code)
2367 {
2368 case NE:
2369 case LE:
2370 case LT:
2371 case GE:
2372 case GT:
2373 case EQ:
2374 if (operands[3] == CONST0_RTX (<MODE>mode))
2375 break;
2376
2377 /* Fall through. */
2378 default:
2379 if (!REG_P (operands[3]))
2380 operands[3] = force_reg (<MODE>mode, operands[3]);
2381
2382 break;
2383 }
2384
2385 switch (code)
2386 {
2387 case LT:
2388 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2389 break;
2390
2391 case GE:
2392 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2393 break;
2394
2395 case LE:
2396 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2397 break;
2398
2399 case GT:
2400 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2401 break;
2402
2403 case LTU:
2404 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2405 break;
2406
2407 case GEU:
2408 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2409 break;
2410
2411 case LEU:
2412 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2413 break;
2414
2415 case GTU:
2416 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2417 break;
2418
2419 case NE:
2420 /* Handle NE as !EQ. */
2421 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2422 emit_insn (gen_one_cmpl<v_cmp_result>2 (mask, mask));
2423 break;
2424
2425 case EQ:
2426 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2427 break;
2428
2429 default:
2430 gcc_unreachable ();
2431 }
2432
2433 DONE;
2434 })
2435
2436 (define_expand "vec_cmp<mode><v_cmp_result>"
2437 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2438 (match_operator 1 "comparison_operator"
2439 [(match_operand:VDQF 2 "register_operand")
2440 (match_operand:VDQF 3 "nonmemory_operand")]))]
2441 "TARGET_SIMD"
2442 {
2443 int use_zero_form = 0;
2444 enum rtx_code code = GET_CODE (operands[1]);
2445 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
2446
2447 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2448
2449 switch (code)
2450 {
2451 case LE:
2452 case LT:
2453 case GE:
2454 case GT:
2455 case EQ:
2456 if (operands[3] == CONST0_RTX (<MODE>mode))
2457 {
2458 use_zero_form = 1;
2459 break;
2460 }
2461 /* Fall through. */
2462 default:
2463 if (!REG_P (operands[3]))
2464 operands[3] = force_reg (<MODE>mode, operands[3]);
2465
2466 break;
2467 }
2468
2469 switch (code)
2470 {
2471 case LT:
2472 if (use_zero_form)
2473 {
2474 comparison = gen_aarch64_cmlt<mode>;
2475 break;
2476 }
2477 /* Fall through. */
2478 case UNGE:
2479 std::swap (operands[2], operands[3]);
2480 /* Fall through. */
2481 case UNLE:
2482 case GT:
2483 comparison = gen_aarch64_cmgt<mode>;
2484 break;
2485 case LE:
2486 if (use_zero_form)
2487 {
2488 comparison = gen_aarch64_cmle<mode>;
2489 break;
2490 }
2491 /* Fall through. */
2492 case UNGT:
2493 std::swap (operands[2], operands[3]);
2494 /* Fall through. */
2495 case UNLT:
2496 case GE:
2497 comparison = gen_aarch64_cmge<mode>;
2498 break;
2499 case NE:
2500 case EQ:
2501 comparison = gen_aarch64_cmeq<mode>;
2502 break;
2503 case UNEQ:
2504 case ORDERED:
2505 case UNORDERED:
2506 break;
2507 default:
2508 gcc_unreachable ();
2509 }
2510
2511 switch (code)
2512 {
2513 case UNGE:
2514 case UNGT:
2515 case UNLE:
2516 case UNLT:
2517 case NE:
2518 /* FCM returns false for lanes which are unordered, so if we use
2519 the inverse of the comparison we actually want to emit, then
2520 invert the result, we will end up with the correct result.
2521 Note that a NE NaN and NaN NE b are true for all a, b.
2522
2523 Our transformations are:
2524 a UNGE b -> !(b GT a)
2525 a UNGT b -> !(b GE a)
2526 a UNLE b -> !(a GT b)
2527 a UNLT b -> !(a GE b)
2528 a NE b -> !(a EQ b) */
2529 gcc_assert (comparison != NULL);
2530 emit_insn (comparison (operands[0], operands[2], operands[3]));
2531 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2532 break;
2533
2534 case LT:
2535 case LE:
2536 case GT:
2537 case GE:
2538 case EQ:
2539 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2540 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2541 a GE b -> a GE b
2542 a GT b -> a GT b
2543 a LE b -> b GE a
2544 a LT b -> b GT a
2545 a EQ b -> a EQ b */
2546 gcc_assert (comparison != NULL);
2547 emit_insn (comparison (operands[0], operands[2], operands[3]));
2548 break;
2549
2550 case UNEQ:
2551 /* We first check (a > b || b > a) which is !UNEQ, inverting
2552 this result will then give us (a == b || a UNORDERED b). */
2553 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2554 operands[2], operands[3]));
2555 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2556 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2557 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2558 break;
2559
2560 case UNORDERED:
2561 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2562 UNORDERED as !ORDERED. */
2563 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2564 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2565 operands[3], operands[2]));
2566 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2567 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2568 break;
2569
2570 case ORDERED:
2571 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2572 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2573 operands[3], operands[2]));
2574 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2575 break;
2576
2577 default:
2578 gcc_unreachable ();
2579 }
2580
2581 DONE;
2582 })
2583
2584 (define_expand "vec_cmpu<mode><mode>"
2585 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2586 (match_operator 1 "comparison_operator"
2587 [(match_operand:VSDQ_I_DI 2 "register_operand")
2588 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2589 "TARGET_SIMD"
2590 {
2591 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2592 operands[2], operands[3]));
2593 DONE;
2594 })
2595
2596 (define_expand "vcond<mode><mode>"
2597 [(set (match_operand:VALLDI 0 "register_operand")
2598 (if_then_else:VALLDI
2599 (match_operator 3 "comparison_operator"
2600 [(match_operand:VALLDI 4 "register_operand")
2601 (match_operand:VALLDI 5 "nonmemory_operand")])
2602 (match_operand:VALLDI 1 "nonmemory_operand")
2603 (match_operand:VALLDI 2 "nonmemory_operand")))]
2604 "TARGET_SIMD"
2605 {
2606 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2607 enum rtx_code code = GET_CODE (operands[3]);
2608
2609 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2610 it as well as switch operands 1/2 in order to avoid the additional
2611 NOT instruction. */
2612 if (code == NE)
2613 {
2614 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2615 operands[4], operands[5]);
2616 std::swap (operands[1], operands[2]);
2617 }
2618 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2619 operands[4], operands[5]));
2620 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2621 operands[2], mask));
2622
2623 DONE;
2624 })
2625
2626 (define_expand "vcond<v_cmp_mixed><mode>"
2627 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2628 (if_then_else:<V_cmp_mixed>
2629 (match_operator 3 "comparison_operator"
2630 [(match_operand:VDQF_COND 4 "register_operand")
2631 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2632 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2633 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2634 "TARGET_SIMD"
2635 {
2636 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2637 enum rtx_code code = GET_CODE (operands[3]);
2638
2639 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2640 it as well as switch operands 1/2 in order to avoid the additional
2641 NOT instruction. */
2642 if (code == NE)
2643 {
2644 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2645 operands[4], operands[5]);
2646 std::swap (operands[1], operands[2]);
2647 }
2648 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2649 operands[4], operands[5]));
2650 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
2651 operands[0], operands[1],
2652 operands[2], mask));
2653
2654 DONE;
2655 })
2656
2657 (define_expand "vcondu<mode><mode>"
2658 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2659 (if_then_else:VSDQ_I_DI
2660 (match_operator 3 "comparison_operator"
2661 [(match_operand:VSDQ_I_DI 4 "register_operand")
2662 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2663 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2664 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2665 "TARGET_SIMD"
2666 {
2667 rtx mask = gen_reg_rtx (<MODE>mode);
2668 enum rtx_code code = GET_CODE (operands[3]);
2669
2670 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2671 it as well as switch operands 1/2 in order to avoid the additional
2672 NOT instruction. */
2673 if (code == NE)
2674 {
2675 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2676 operands[4], operands[5]);
2677 std::swap (operands[1], operands[2]);
2678 }
2679 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2680 operands[4], operands[5]));
2681 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2682 operands[2], mask));
2683 DONE;
2684 })
2685
2686 (define_expand "vcondu<mode><v_cmp_mixed>"
2687 [(set (match_operand:VDQF 0 "register_operand")
2688 (if_then_else:VDQF
2689 (match_operator 3 "comparison_operator"
2690 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2691 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2692 (match_operand:VDQF 1 "nonmemory_operand")
2693 (match_operand:VDQF 2 "nonmemory_operand")))]
2694 "TARGET_SIMD"
2695 {
2696 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2697 enum rtx_code code = GET_CODE (operands[3]);
2698
2699 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2700 it as well as switch operands 1/2 in order to avoid the additional
2701 NOT instruction. */
2702 if (code == NE)
2703 {
2704 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2705 operands[4], operands[5]);
2706 std::swap (operands[1], operands[2]);
2707 }
2708 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2709 mask, operands[3],
2710 operands[4], operands[5]));
2711 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2712 operands[2], mask));
2713 DONE;
2714 })
2715
2716 ;; Patterns for AArch64 SIMD Intrinsics.
2717
2718 ;; Lane extraction with sign extension to general purpose register.
2719 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2720 [(set (match_operand:GPI 0 "register_operand" "=r")
2721 (sign_extend:GPI
2722 (vec_select:<VEL>
2723 (match_operand:VDQQH 1 "register_operand" "w")
2724 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2725 "TARGET_SIMD"
2726 {
2727 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2728 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2729 }
2730 [(set_attr "type" "neon_to_gp<q>")]
2731 )
2732
2733 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2734 [(set (match_operand:SI 0 "register_operand" "=r")
2735 (zero_extend:SI
2736 (vec_select:<VEL>
2737 (match_operand:VDQQH 1 "register_operand" "w")
2738 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2739 "TARGET_SIMD"
2740 {
2741 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2742 return "umov\\t%w0, %1.<Vetype>[%2]";
2743 }
2744 [(set_attr "type" "neon_to_gp<q>")]
2745 )
2746
2747 ;; Lane extraction of a value, neither sign nor zero extension
2748 ;; is guaranteed so upper bits should be considered undefined.
2749 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2750 (define_insn "aarch64_get_lane<mode>"
2751 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2752 (vec_select:<VEL>
2753 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2754 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2755 "TARGET_SIMD"
2756 {
2757 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2758 switch (which_alternative)
2759 {
2760 case 0:
2761 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2762 case 1:
2763 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2764 case 2:
2765 return "st1\\t{%1.<Vetype>}[%2], %0";
2766 default:
2767 gcc_unreachable ();
2768 }
2769 }
2770 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2771 )
2772
2773 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2774 ;; dest vector.
2775
2776 (define_insn "*aarch64_combinez<mode>"
2777 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2778 (vec_concat:<VDBL>
2779 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2780 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2781 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2782 "@
2783 mov\\t%0.8b, %1.8b
2784 fmov\t%d0, %1
2785 ldr\\t%d0, %1"
2786 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2787 (set_attr "simd" "yes,*,yes")
2788 (set_attr "fp" "*,yes,*")]
2789 )
2790
2791 (define_insn "*aarch64_combinez_be<mode>"
2792 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2793 (vec_concat:<VDBL>
2794 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2795 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2796 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2797 "@
2798 mov\\t%0.8b, %1.8b
2799 fmov\t%d0, %1
2800 ldr\\t%d0, %1"
2801 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2802 (set_attr "simd" "yes,*,yes")
2803 (set_attr "fp" "*,yes,*")]
2804 )
2805
2806 (define_expand "aarch64_combine<mode>"
2807 [(match_operand:<VDBL> 0 "register_operand")
2808 (match_operand:VDC 1 "register_operand")
2809 (match_operand:VDC 2 "register_operand")]
2810 "TARGET_SIMD"
2811 {
2812 rtx op1, op2;
2813 if (BYTES_BIG_ENDIAN)
2814 {
2815 op1 = operands[2];
2816 op2 = operands[1];
2817 }
2818 else
2819 {
2820 op1 = operands[1];
2821 op2 = operands[2];
2822 }
2823 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2824 DONE;
2825 }
2826 )
2827
2828 (define_insn_and_split "aarch64_combine_internal<mode>"
2829 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2830 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2831 (match_operand:VDC 2 "register_operand" "w")))]
2832 "TARGET_SIMD"
2833 "#"
2834 "&& reload_completed"
2835 [(const_int 0)]
2836 {
2837 if (BYTES_BIG_ENDIAN)
2838 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2839 else
2840 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2841 DONE;
2842 }
2843 [(set_attr "type" "multiple")]
2844 )
2845
2846 (define_expand "aarch64_simd_combine<mode>"
2847 [(match_operand:<VDBL> 0 "register_operand")
2848 (match_operand:VDC 1 "register_operand")
2849 (match_operand:VDC 2 "register_operand")]
2850 "TARGET_SIMD"
2851 {
2852 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2853 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2854 DONE;
2855 }
2856 [(set_attr "type" "multiple")]
2857 )
2858
2859 ;; <su><addsub>l<q>.
2860
2861 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2862 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2863 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2864 (match_operand:VQW 1 "register_operand" "w")
2865 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2866 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2867 (match_operand:VQW 2 "register_operand" "w")
2868 (match_dup 3)))))]
2869 "TARGET_SIMD"
2870 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2871 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2872 )
2873
2874 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2876 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2877 (match_operand:VQW 1 "register_operand" "w")
2878 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2879 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2880 (match_operand:VQW 2 "register_operand" "w")
2881 (match_dup 3)))))]
2882 "TARGET_SIMD"
2883 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2884 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2885 )
2886
2887
2888 (define_expand "aarch64_saddl2<mode>"
2889 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2890 (match_operand:VQW 1 "register_operand" "w")
2891 (match_operand:VQW 2 "register_operand" "w")]
2892 "TARGET_SIMD"
2893 {
2894 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2895 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2896 operands[2], p));
2897 DONE;
2898 })
2899
2900 (define_expand "aarch64_uaddl2<mode>"
2901 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2902 (match_operand:VQW 1 "register_operand" "w")
2903 (match_operand:VQW 2 "register_operand" "w")]
2904 "TARGET_SIMD"
2905 {
2906 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2907 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2908 operands[2], p));
2909 DONE;
2910 })
2911
2912 (define_expand "aarch64_ssubl2<mode>"
2913 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2914 (match_operand:VQW 1 "register_operand" "w")
2915 (match_operand:VQW 2 "register_operand" "w")]
2916 "TARGET_SIMD"
2917 {
2918 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2919 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2920 operands[2], p));
2921 DONE;
2922 })
2923
2924 (define_expand "aarch64_usubl2<mode>"
2925 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2926 (match_operand:VQW 1 "register_operand" "w")
2927 (match_operand:VQW 2 "register_operand" "w")]
2928 "TARGET_SIMD"
2929 {
2930 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2931 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2932 operands[2], p));
2933 DONE;
2934 })
2935
2936 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2937 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2938 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2939 (match_operand:VD_BHSI 1 "register_operand" "w"))
2940 (ANY_EXTEND:<VWIDE>
2941 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2942 "TARGET_SIMD"
2943 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2944 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2945 )
2946
2947 ;; <su><addsub>w<q>.
2948
2949 (define_expand "widen_ssum<mode>3"
2950 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2951 (plus:<VDBLW> (sign_extend:<VDBLW>
2952 (match_operand:VQW 1 "register_operand" ""))
2953 (match_operand:<VDBLW> 2 "register_operand" "")))]
2954 "TARGET_SIMD"
2955 {
2956 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2957 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2958
2959 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2960 operands[1], p));
2961 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2962 DONE;
2963 }
2964 )
2965
2966 (define_expand "widen_ssum<mode>3"
2967 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2968 (plus:<VWIDE> (sign_extend:<VWIDE>
2969 (match_operand:VD_BHSI 1 "register_operand" ""))
2970 (match_operand:<VWIDE> 2 "register_operand" "")))]
2971 "TARGET_SIMD"
2972 {
2973 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2974 DONE;
2975 })
2976
2977 (define_expand "widen_usum<mode>3"
2978 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2979 (plus:<VDBLW> (zero_extend:<VDBLW>
2980 (match_operand:VQW 1 "register_operand" ""))
2981 (match_operand:<VDBLW> 2 "register_operand" "")))]
2982 "TARGET_SIMD"
2983 {
2984 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2985 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2986
2987 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2988 operands[1], p));
2989 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2990 DONE;
2991 }
2992 )
2993
2994 (define_expand "widen_usum<mode>3"
2995 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2996 (plus:<VWIDE> (zero_extend:<VWIDE>
2997 (match_operand:VD_BHSI 1 "register_operand" ""))
2998 (match_operand:<VWIDE> 2 "register_operand" "")))]
2999 "TARGET_SIMD"
3000 {
3001 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3002 DONE;
3003 })
3004
3005 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3006 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3007 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3008 (ANY_EXTEND:<VWIDE>
3009 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3010 "TARGET_SIMD"
3011 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3012 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3013 )
3014
3015 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3016 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3017 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3018 (ANY_EXTEND:<VWIDE>
3019 (vec_select:<VHALF>
3020 (match_operand:VQW 2 "register_operand" "w")
3021 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3022 "TARGET_SIMD"
3023 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3024 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3025 )
3026
3027 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3028 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3029 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3030 (ANY_EXTEND:<VWIDE>
3031 (vec_select:<VHALF>
3032 (match_operand:VQW 2 "register_operand" "w")
3033 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3034 "TARGET_SIMD"
3035 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3036 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3037 )
3038
3039 (define_expand "aarch64_saddw2<mode>"
3040 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3041 (match_operand:<VWIDE> 1 "register_operand" "w")
3042 (match_operand:VQW 2 "register_operand" "w")]
3043 "TARGET_SIMD"
3044 {
3045 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3046 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3047 operands[2], p));
3048 DONE;
3049 })
3050
3051 (define_expand "aarch64_uaddw2<mode>"
3052 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3053 (match_operand:<VWIDE> 1 "register_operand" "w")
3054 (match_operand:VQW 2 "register_operand" "w")]
3055 "TARGET_SIMD"
3056 {
3057 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3058 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3059 operands[2], p));
3060 DONE;
3061 })
3062
3063
3064 (define_expand "aarch64_ssubw2<mode>"
3065 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3066 (match_operand:<VWIDE> 1 "register_operand" "w")
3067 (match_operand:VQW 2 "register_operand" "w")]
3068 "TARGET_SIMD"
3069 {
3070 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3071 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3072 operands[2], p));
3073 DONE;
3074 })
3075
3076 (define_expand "aarch64_usubw2<mode>"
3077 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3078 (match_operand:<VWIDE> 1 "register_operand" "w")
3079 (match_operand:VQW 2 "register_operand" "w")]
3080 "TARGET_SIMD"
3081 {
3082 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3083 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3084 operands[2], p));
3085 DONE;
3086 })
3087
3088 ;; <su><r>h<addsub>.
3089
3090 (define_insn "aarch64_<sur>h<addsub><mode>"
3091 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3092 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3093 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3094 HADDSUB))]
3095 "TARGET_SIMD"
3096 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3097 [(set_attr "type" "neon_<addsub>_halve<q>")]
3098 )
3099
3100 ;; <r><addsub>hn<q>.
3101
3102 (define_insn "aarch64_<sur><addsub>hn<mode>"
3103 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3104 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3105 (match_operand:VQN 2 "register_operand" "w")]
3106 ADDSUBHN))]
3107 "TARGET_SIMD"
3108 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3109 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3110 )
3111
3112 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3113 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3114 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3115 (match_operand:VQN 2 "register_operand" "w")
3116 (match_operand:VQN 3 "register_operand" "w")]
3117 ADDSUBHN2))]
3118 "TARGET_SIMD"
3119 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3120 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3121 )
3122
3123 ;; pmul.
3124
3125 (define_insn "aarch64_pmul<mode>"
3126 [(set (match_operand:VB 0 "register_operand" "=w")
3127 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3128 (match_operand:VB 2 "register_operand" "w")]
3129 UNSPEC_PMUL))]
3130 "TARGET_SIMD"
3131 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3132 [(set_attr "type" "neon_mul_<Vetype><q>")]
3133 )
3134
3135 ;; fmulx.
3136
3137 (define_insn "aarch64_fmulx<mode>"
3138 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3139 (unspec:VHSDF_HSDF
3140 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3141 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3142 UNSPEC_FMULX))]
3143 "TARGET_SIMD"
3144 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3145 [(set_attr "type" "neon_fp_mul_<stype>")]
3146 )
3147
3148 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3149
3150 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3151 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3152 (unspec:VDQSF
3153 [(match_operand:VDQSF 1 "register_operand" "w")
3154 (vec_duplicate:VDQSF
3155 (vec_select:<VEL>
3156 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3157 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3158 UNSPEC_FMULX))]
3159 "TARGET_SIMD"
3160 {
3161 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3162 INTVAL (operands[3])));
3163 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3164 }
3165 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3166 )
3167
3168 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3169
3170 (define_insn "*aarch64_mulx_elt<mode>"
3171 [(set (match_operand:VDQF 0 "register_operand" "=w")
3172 (unspec:VDQF
3173 [(match_operand:VDQF 1 "register_operand" "w")
3174 (vec_duplicate:VDQF
3175 (vec_select:<VEL>
3176 (match_operand:VDQF 2 "register_operand" "w")
3177 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3178 UNSPEC_FMULX))]
3179 "TARGET_SIMD"
3180 {
3181 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3182 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3183 }
3184 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3185 )
3186
3187 ;; vmulxq_lane
3188
3189 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3190 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3191 (unspec:VHSDF
3192 [(match_operand:VHSDF 1 "register_operand" "w")
3193 (vec_duplicate:VHSDF
3194 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3195 UNSPEC_FMULX))]
3196 "TARGET_SIMD"
3197 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3198 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3199 )
3200
3201 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3202 ;; vmulxd_lane_f64 == vmulx_lane_f64
3203 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3204
3205 (define_insn "*aarch64_vgetfmulx<mode>"
3206 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3207 (unspec:<VEL>
3208 [(match_operand:<VEL> 1 "register_operand" "w")
3209 (vec_select:<VEL>
3210 (match_operand:VDQF 2 "register_operand" "w")
3211 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3212 UNSPEC_FMULX))]
3213 "TARGET_SIMD"
3214 {
3215 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3216 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3217 }
3218 [(set_attr "type" "fmul<Vetype>")]
3219 )
3220 ;; <su>q<addsub>
3221
3222 (define_insn "aarch64_<su_optab><optab><mode>"
3223 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3224 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3225 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3226 "TARGET_SIMD"
3227 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3228 [(set_attr "type" "neon_<optab><q>")]
3229 )
3230
3231 ;; suqadd and usqadd
3232
3233 (define_insn "aarch64_<sur>qadd<mode>"
3234 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3235 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3236 (match_operand:VSDQ_I 2 "register_operand" "w")]
3237 USSUQADD))]
3238 "TARGET_SIMD"
3239 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3240 [(set_attr "type" "neon_qadd<q>")]
3241 )
3242
3243 ;; sqmovun
3244
3245 (define_insn "aarch64_sqmovun<mode>"
3246 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3247 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3248 UNSPEC_SQXTUN))]
3249 "TARGET_SIMD"
3250 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3251 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3252 )
3253
3254 ;; sqmovn and uqmovn
3255
3256 (define_insn "aarch64_<sur>qmovn<mode>"
3257 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3258 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3259 SUQMOVN))]
3260 "TARGET_SIMD"
3261 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3262 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3263 )
3264
3265 ;; <su>q<absneg>
3266
3267 (define_insn "aarch64_s<optab><mode>"
3268 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3269 (UNQOPS:VSDQ_I
3270 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3271 "TARGET_SIMD"
3272 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3273 [(set_attr "type" "neon_<optab><q>")]
3274 )
3275
3276 ;; sq<r>dmulh.
3277
3278 (define_insn "aarch64_sq<r>dmulh<mode>"
3279 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3280 (unspec:VSDQ_HSI
3281 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3282 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3283 VQDMULH))]
3284 "TARGET_SIMD"
3285 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3286 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3287 )
3288
3289 ;; sq<r>dmulh_lane
3290
3291 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3292 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3293 (unspec:VDQHS
3294 [(match_operand:VDQHS 1 "register_operand" "w")
3295 (vec_select:<VEL>
3296 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3297 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3298 VQDMULH))]
3299 "TARGET_SIMD"
3300 "*
3301 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3302 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3303 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3304 )
3305
3306 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3307 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3308 (unspec:VDQHS
3309 [(match_operand:VDQHS 1 "register_operand" "w")
3310 (vec_select:<VEL>
3311 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3312 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3313 VQDMULH))]
3314 "TARGET_SIMD"
3315 "*
3316 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3317 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3318 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3319 )
3320
3321 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3322 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3323 (unspec:SD_HSI
3324 [(match_operand:SD_HSI 1 "register_operand" "w")
3325 (vec_select:<VEL>
3326 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3327 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3328 VQDMULH))]
3329 "TARGET_SIMD"
3330 "*
3331 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3332 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3333 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3334 )
3335
3336 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3337 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3338 (unspec:SD_HSI
3339 [(match_operand:SD_HSI 1 "register_operand" "w")
3340 (vec_select:<VEL>
3341 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3342 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3343 VQDMULH))]
3344 "TARGET_SIMD"
3345 "*
3346 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3347 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3348 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3349 )
3350
3351 ;; sqrdml[as]h.
3352
3353 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3354 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3355 (unspec:VSDQ_HSI
3356 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3357 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3358 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3359 SQRDMLH_AS))]
3360 "TARGET_SIMD_RDMA"
3361 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3362 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3363 )
3364
3365 ;; sqrdml[as]h_lane.
3366
3367 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3368 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3369 (unspec:VDQHS
3370 [(match_operand:VDQHS 1 "register_operand" "0")
3371 (match_operand:VDQHS 2 "register_operand" "w")
3372 (vec_select:<VEL>
3373 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3374 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3375 SQRDMLH_AS))]
3376 "TARGET_SIMD_RDMA"
3377 {
3378 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3379 return
3380 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3381 }
3382 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3383 )
3384
3385 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3386 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3387 (unspec:SD_HSI
3388 [(match_operand:SD_HSI 1 "register_operand" "0")
3389 (match_operand:SD_HSI 2 "register_operand" "w")
3390 (vec_select:<VEL>
3391 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3392 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3393 SQRDMLH_AS))]
3394 "TARGET_SIMD_RDMA"
3395 {
3396 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3397 return
3398 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3399 }
3400 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3401 )
3402
3403 ;; sqrdml[as]h_laneq.
3404
3405 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3406 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3407 (unspec:VDQHS
3408 [(match_operand:VDQHS 1 "register_operand" "0")
3409 (match_operand:VDQHS 2 "register_operand" "w")
3410 (vec_select:<VEL>
3411 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3412 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3413 SQRDMLH_AS))]
3414 "TARGET_SIMD_RDMA"
3415 {
3416 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3417 return
3418 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3419 }
3420 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3421 )
3422
3423 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3424 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3425 (unspec:SD_HSI
3426 [(match_operand:SD_HSI 1 "register_operand" "0")
3427 (match_operand:SD_HSI 2 "register_operand" "w")
3428 (vec_select:<VEL>
3429 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3430 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3431 SQRDMLH_AS))]
3432 "TARGET_SIMD_RDMA"
3433 {
3434 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3435 return
3436 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3437 }
3438 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3439 )
3440
3441 ;; vqdml[sa]l
3442
3443 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3444 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3445 (SBINQOPS:<VWIDE>
3446 (match_operand:<VWIDE> 1 "register_operand" "0")
3447 (ss_ashift:<VWIDE>
3448 (mult:<VWIDE>
3449 (sign_extend:<VWIDE>
3450 (match_operand:VSD_HSI 2 "register_operand" "w"))
3451 (sign_extend:<VWIDE>
3452 (match_operand:VSD_HSI 3 "register_operand" "w")))
3453 (const_int 1))))]
3454 "TARGET_SIMD"
3455 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3456 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3457 )
3458
3459 ;; vqdml[sa]l_lane
3460
3461 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3462 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3463 (SBINQOPS:<VWIDE>
3464 (match_operand:<VWIDE> 1 "register_operand" "0")
3465 (ss_ashift:<VWIDE>
3466 (mult:<VWIDE>
3467 (sign_extend:<VWIDE>
3468 (match_operand:VD_HSI 2 "register_operand" "w"))
3469 (sign_extend:<VWIDE>
3470 (vec_duplicate:VD_HSI
3471 (vec_select:<VEL>
3472 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3473 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3474 ))
3475 (const_int 1))))]
3476 "TARGET_SIMD"
3477 {
3478 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3479 return
3480 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3481 }
3482 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3483 )
3484
3485 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3486 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3487 (SBINQOPS:<VWIDE>
3488 (match_operand:<VWIDE> 1 "register_operand" "0")
3489 (ss_ashift:<VWIDE>
3490 (mult:<VWIDE>
3491 (sign_extend:<VWIDE>
3492 (match_operand:VD_HSI 2 "register_operand" "w"))
3493 (sign_extend:<VWIDE>
3494 (vec_duplicate:VD_HSI
3495 (vec_select:<VEL>
3496 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3497 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3498 ))
3499 (const_int 1))))]
3500 "TARGET_SIMD"
3501 {
3502 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3503 return
3504 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3505 }
3506 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3507 )
3508
3509 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3510 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3511 (SBINQOPS:<VWIDE>
3512 (match_operand:<VWIDE> 1 "register_operand" "0")
3513 (ss_ashift:<VWIDE>
3514 (mult:<VWIDE>
3515 (sign_extend:<VWIDE>
3516 (match_operand:SD_HSI 2 "register_operand" "w"))
3517 (sign_extend:<VWIDE>
3518 (vec_select:<VEL>
3519 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3520 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3521 )
3522 (const_int 1))))]
3523 "TARGET_SIMD"
3524 {
3525 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3526 return
3527 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3528 }
3529 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3530 )
3531
3532 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3533 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3534 (SBINQOPS:<VWIDE>
3535 (match_operand:<VWIDE> 1 "register_operand" "0")
3536 (ss_ashift:<VWIDE>
3537 (mult:<VWIDE>
3538 (sign_extend:<VWIDE>
3539 (match_operand:SD_HSI 2 "register_operand" "w"))
3540 (sign_extend:<VWIDE>
3541 (vec_select:<VEL>
3542 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3543 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3544 )
3545 (const_int 1))))]
3546 "TARGET_SIMD"
3547 {
3548 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3549 return
3550 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3551 }
3552 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3553 )
3554
3555 ;; vqdml[sa]l_n
3556
3557 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3558 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3559 (SBINQOPS:<VWIDE>
3560 (match_operand:<VWIDE> 1 "register_operand" "0")
3561 (ss_ashift:<VWIDE>
3562 (mult:<VWIDE>
3563 (sign_extend:<VWIDE>
3564 (match_operand:VD_HSI 2 "register_operand" "w"))
3565 (sign_extend:<VWIDE>
3566 (vec_duplicate:VD_HSI
3567 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3568 (const_int 1))))]
3569 "TARGET_SIMD"
3570 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3571 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3572 )
3573
3574 ;; sqdml[as]l2
3575
3576 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3577 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3578 (SBINQOPS:<VWIDE>
3579 (match_operand:<VWIDE> 1 "register_operand" "0")
3580 (ss_ashift:<VWIDE>
3581 (mult:<VWIDE>
3582 (sign_extend:<VWIDE>
3583 (vec_select:<VHALF>
3584 (match_operand:VQ_HSI 2 "register_operand" "w")
3585 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3586 (sign_extend:<VWIDE>
3587 (vec_select:<VHALF>
3588 (match_operand:VQ_HSI 3 "register_operand" "w")
3589 (match_dup 4))))
3590 (const_int 1))))]
3591 "TARGET_SIMD"
3592 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3593 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3594 )
3595
3596 (define_expand "aarch64_sqdmlal2<mode>"
3597 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3598 (match_operand:<VWIDE> 1 "register_operand" "w")
3599 (match_operand:VQ_HSI 2 "register_operand" "w")
3600 (match_operand:VQ_HSI 3 "register_operand" "w")]
3601 "TARGET_SIMD"
3602 {
3603 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3604 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3605 operands[2], operands[3], p));
3606 DONE;
3607 })
3608
3609 (define_expand "aarch64_sqdmlsl2<mode>"
3610 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3611 (match_operand:<VWIDE> 1 "register_operand" "w")
3612 (match_operand:VQ_HSI 2 "register_operand" "w")
3613 (match_operand:VQ_HSI 3 "register_operand" "w")]
3614 "TARGET_SIMD"
3615 {
3616 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3617 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3618 operands[2], operands[3], p));
3619 DONE;
3620 })
3621
3622 ;; vqdml[sa]l2_lane
3623
3624 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3625 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3626 (SBINQOPS:<VWIDE>
3627 (match_operand:<VWIDE> 1 "register_operand" "0")
3628 (ss_ashift:<VWIDE>
3629 (mult:<VWIDE>
3630 (sign_extend:<VWIDE>
3631 (vec_select:<VHALF>
3632 (match_operand:VQ_HSI 2 "register_operand" "w")
3633 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3634 (sign_extend:<VWIDE>
3635 (vec_duplicate:<VHALF>
3636 (vec_select:<VEL>
3637 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3638 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3639 ))))
3640 (const_int 1))))]
3641 "TARGET_SIMD"
3642 {
3643 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3644 return
3645 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3646 }
3647 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3648 )
3649
3650 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3651 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3652 (SBINQOPS:<VWIDE>
3653 (match_operand:<VWIDE> 1 "register_operand" "0")
3654 (ss_ashift:<VWIDE>
3655 (mult:<VWIDE>
3656 (sign_extend:<VWIDE>
3657 (vec_select:<VHALF>
3658 (match_operand:VQ_HSI 2 "register_operand" "w")
3659 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3660 (sign_extend:<VWIDE>
3661 (vec_duplicate:<VHALF>
3662 (vec_select:<VEL>
3663 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3664 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3665 ))))
3666 (const_int 1))))]
3667 "TARGET_SIMD"
3668 {
3669 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3670 return
3671 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3672 }
3673 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3674 )
3675
3676 (define_expand "aarch64_sqdmlal2_lane<mode>"
3677 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3678 (match_operand:<VWIDE> 1 "register_operand" "w")
3679 (match_operand:VQ_HSI 2 "register_operand" "w")
3680 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3681 (match_operand:SI 4 "immediate_operand" "i")]
3682 "TARGET_SIMD"
3683 {
3684 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3685 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3686 operands[2], operands[3],
3687 operands[4], p));
3688 DONE;
3689 })
3690
3691 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3692 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3693 (match_operand:<VWIDE> 1 "register_operand" "w")
3694 (match_operand:VQ_HSI 2 "register_operand" "w")
3695 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3696 (match_operand:SI 4 "immediate_operand" "i")]
3697 "TARGET_SIMD"
3698 {
3699 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3700 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3701 operands[2], operands[3],
3702 operands[4], p));
3703 DONE;
3704 })
3705
3706 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3707 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3708 (match_operand:<VWIDE> 1 "register_operand" "w")
3709 (match_operand:VQ_HSI 2 "register_operand" "w")
3710 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3711 (match_operand:SI 4 "immediate_operand" "i")]
3712 "TARGET_SIMD"
3713 {
3714 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3715 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3716 operands[2], operands[3],
3717 operands[4], p));
3718 DONE;
3719 })
3720
3721 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3722 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3723 (match_operand:<VWIDE> 1 "register_operand" "w")
3724 (match_operand:VQ_HSI 2 "register_operand" "w")
3725 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3726 (match_operand:SI 4 "immediate_operand" "i")]
3727 "TARGET_SIMD"
3728 {
3729 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3730 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3731 operands[2], operands[3],
3732 operands[4], p));
3733 DONE;
3734 })
3735
3736 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3737 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3738 (SBINQOPS:<VWIDE>
3739 (match_operand:<VWIDE> 1 "register_operand" "0")
3740 (ss_ashift:<VWIDE>
3741 (mult:<VWIDE>
3742 (sign_extend:<VWIDE>
3743 (vec_select:<VHALF>
3744 (match_operand:VQ_HSI 2 "register_operand" "w")
3745 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3746 (sign_extend:<VWIDE>
3747 (vec_duplicate:<VHALF>
3748 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3749 (const_int 1))))]
3750 "TARGET_SIMD"
3751 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3752 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3753 )
3754
3755 (define_expand "aarch64_sqdmlal2_n<mode>"
3756 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3757 (match_operand:<VWIDE> 1 "register_operand" "w")
3758 (match_operand:VQ_HSI 2 "register_operand" "w")
3759 (match_operand:<VEL> 3 "register_operand" "w")]
3760 "TARGET_SIMD"
3761 {
3762 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3763 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3764 operands[2], operands[3],
3765 p));
3766 DONE;
3767 })
3768
3769 (define_expand "aarch64_sqdmlsl2_n<mode>"
3770 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3771 (match_operand:<VWIDE> 1 "register_operand" "w")
3772 (match_operand:VQ_HSI 2 "register_operand" "w")
3773 (match_operand:<VEL> 3 "register_operand" "w")]
3774 "TARGET_SIMD"
3775 {
3776 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3777 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3778 operands[2], operands[3],
3779 p));
3780 DONE;
3781 })
3782
3783 ;; vqdmull
3784
3785 (define_insn "aarch64_sqdmull<mode>"
3786 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3787 (ss_ashift:<VWIDE>
3788 (mult:<VWIDE>
3789 (sign_extend:<VWIDE>
3790 (match_operand:VSD_HSI 1 "register_operand" "w"))
3791 (sign_extend:<VWIDE>
3792 (match_operand:VSD_HSI 2 "register_operand" "w")))
3793 (const_int 1)))]
3794 "TARGET_SIMD"
3795 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3796 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3797 )
3798
3799 ;; vqdmull_lane
3800
3801 (define_insn "aarch64_sqdmull_lane<mode>"
3802 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3803 (ss_ashift:<VWIDE>
3804 (mult:<VWIDE>
3805 (sign_extend:<VWIDE>
3806 (match_operand:VD_HSI 1 "register_operand" "w"))
3807 (sign_extend:<VWIDE>
3808 (vec_duplicate:VD_HSI
3809 (vec_select:<VEL>
3810 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3811 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3812 ))
3813 (const_int 1)))]
3814 "TARGET_SIMD"
3815 {
3816 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3817 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3818 }
3819 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3820 )
3821
3822 (define_insn "aarch64_sqdmull_laneq<mode>"
3823 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3824 (ss_ashift:<VWIDE>
3825 (mult:<VWIDE>
3826 (sign_extend:<VWIDE>
3827 (match_operand:VD_HSI 1 "register_operand" "w"))
3828 (sign_extend:<VWIDE>
3829 (vec_duplicate:VD_HSI
3830 (vec_select:<VEL>
3831 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3832 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3833 ))
3834 (const_int 1)))]
3835 "TARGET_SIMD"
3836 {
3837 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3838 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3839 }
3840 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3841 )
3842
3843 (define_insn "aarch64_sqdmull_lane<mode>"
3844 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3845 (ss_ashift:<VWIDE>
3846 (mult:<VWIDE>
3847 (sign_extend:<VWIDE>
3848 (match_operand:SD_HSI 1 "register_operand" "w"))
3849 (sign_extend:<VWIDE>
3850 (vec_select:<VEL>
3851 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3852 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3853 ))
3854 (const_int 1)))]
3855 "TARGET_SIMD"
3856 {
3857 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3858 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3859 }
3860 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3861 )
3862
3863 (define_insn "aarch64_sqdmull_laneq<mode>"
3864 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3865 (ss_ashift:<VWIDE>
3866 (mult:<VWIDE>
3867 (sign_extend:<VWIDE>
3868 (match_operand:SD_HSI 1 "register_operand" "w"))
3869 (sign_extend:<VWIDE>
3870 (vec_select:<VEL>
3871 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3872 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3873 ))
3874 (const_int 1)))]
3875 "TARGET_SIMD"
3876 {
3877 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3878 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3879 }
3880 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3881 )
3882
3883 ;; vqdmull_n
3884
3885 (define_insn "aarch64_sqdmull_n<mode>"
3886 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3887 (ss_ashift:<VWIDE>
3888 (mult:<VWIDE>
3889 (sign_extend:<VWIDE>
3890 (match_operand:VD_HSI 1 "register_operand" "w"))
3891 (sign_extend:<VWIDE>
3892 (vec_duplicate:VD_HSI
3893 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3894 )
3895 (const_int 1)))]
3896 "TARGET_SIMD"
3897 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3898 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3899 )
3900
3901 ;; vqdmull2
3902
3903
3904
3905 (define_insn "aarch64_sqdmull2<mode>_internal"
3906 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3907 (ss_ashift:<VWIDE>
3908 (mult:<VWIDE>
3909 (sign_extend:<VWIDE>
3910 (vec_select:<VHALF>
3911 (match_operand:VQ_HSI 1 "register_operand" "w")
3912 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3913 (sign_extend:<VWIDE>
3914 (vec_select:<VHALF>
3915 (match_operand:VQ_HSI 2 "register_operand" "w")
3916 (match_dup 3)))
3917 )
3918 (const_int 1)))]
3919 "TARGET_SIMD"
3920 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3921 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3922 )
3923
3924 (define_expand "aarch64_sqdmull2<mode>"
3925 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3926 (match_operand:VQ_HSI 1 "register_operand" "w")
3927 (match_operand:VQ_HSI 2 "register_operand" "w")]
3928 "TARGET_SIMD"
3929 {
3930 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3931 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3932 operands[2], p));
3933 DONE;
3934 })
3935
3936 ;; vqdmull2_lane
3937
3938 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3939 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3940 (ss_ashift:<VWIDE>
3941 (mult:<VWIDE>
3942 (sign_extend:<VWIDE>
3943 (vec_select:<VHALF>
3944 (match_operand:VQ_HSI 1 "register_operand" "w")
3945 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3946 (sign_extend:<VWIDE>
3947 (vec_duplicate:<VHALF>
3948 (vec_select:<VEL>
3949 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3950 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3951 ))
3952 (const_int 1)))]
3953 "TARGET_SIMD"
3954 {
3955 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3956 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3957 }
3958 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3959 )
3960
3961 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3962 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3963 (ss_ashift:<VWIDE>
3964 (mult:<VWIDE>
3965 (sign_extend:<VWIDE>
3966 (vec_select:<VHALF>
3967 (match_operand:VQ_HSI 1 "register_operand" "w")
3968 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3969 (sign_extend:<VWIDE>
3970 (vec_duplicate:<VHALF>
3971 (vec_select:<VEL>
3972 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3973 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3974 ))
3975 (const_int 1)))]
3976 "TARGET_SIMD"
3977 {
3978 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3979 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3980 }
3981 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3982 )
3983
3984 (define_expand "aarch64_sqdmull2_lane<mode>"
3985 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3986 (match_operand:VQ_HSI 1 "register_operand" "w")
3987 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3988 (match_operand:SI 3 "immediate_operand" "i")]
3989 "TARGET_SIMD"
3990 {
3991 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3992 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3993 operands[2], operands[3],
3994 p));
3995 DONE;
3996 })
3997
3998 (define_expand "aarch64_sqdmull2_laneq<mode>"
3999 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4000 (match_operand:VQ_HSI 1 "register_operand" "w")
4001 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4002 (match_operand:SI 3 "immediate_operand" "i")]
4003 "TARGET_SIMD"
4004 {
4005 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4006 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4007 operands[2], operands[3],
4008 p));
4009 DONE;
4010 })
4011
4012 ;; vqdmull2_n
4013
4014 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4015 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4016 (ss_ashift:<VWIDE>
4017 (mult:<VWIDE>
4018 (sign_extend:<VWIDE>
4019 (vec_select:<VHALF>
4020 (match_operand:VQ_HSI 1 "register_operand" "w")
4021 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4022 (sign_extend:<VWIDE>
4023 (vec_duplicate:<VHALF>
4024 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4025 )
4026 (const_int 1)))]
4027 "TARGET_SIMD"
4028 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4029 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4030 )
4031
4032 (define_expand "aarch64_sqdmull2_n<mode>"
4033 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4034 (match_operand:VQ_HSI 1 "register_operand" "w")
4035 (match_operand:<VEL> 2 "register_operand" "w")]
4036 "TARGET_SIMD"
4037 {
4038 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4039 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4040 operands[2], p));
4041 DONE;
4042 })
4043
4044 ;; vshl
4045
4046 (define_insn "aarch64_<sur>shl<mode>"
4047 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4048 (unspec:VSDQ_I_DI
4049 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4050 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4051 VSHL))]
4052 "TARGET_SIMD"
4053 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4054 [(set_attr "type" "neon_shift_reg<q>")]
4055 )
4056
4057
4058 ;; vqshl
4059
4060 (define_insn "aarch64_<sur>q<r>shl<mode>"
4061 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4062 (unspec:VSDQ_I
4063 [(match_operand:VSDQ_I 1 "register_operand" "w")
4064 (match_operand:VSDQ_I 2 "register_operand" "w")]
4065 VQSHL))]
4066 "TARGET_SIMD"
4067 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4068 [(set_attr "type" "neon_sat_shift_reg<q>")]
4069 )
4070
4071 ;; vshll_n
4072
4073 (define_insn "aarch64_<sur>shll_n<mode>"
4074 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4075 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4076 (match_operand:SI 2
4077 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4078 VSHLL))]
4079 "TARGET_SIMD"
4080 {
4081 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4082 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4083 else
4084 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4085 }
4086 [(set_attr "type" "neon_shift_imm_long")]
4087 )
4088
4089 ;; vshll_high_n
4090
4091 (define_insn "aarch64_<sur>shll2_n<mode>"
4092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4093 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4094 (match_operand:SI 2 "immediate_operand" "i")]
4095 VSHLL))]
4096 "TARGET_SIMD"
4097 {
4098 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4099 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4100 else
4101 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4102 }
4103 [(set_attr "type" "neon_shift_imm_long")]
4104 )
4105
4106 ;; vrshr_n
4107
4108 (define_insn "aarch64_<sur>shr_n<mode>"
4109 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4110 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4111 (match_operand:SI 2
4112 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4113 VRSHR_N))]
4114 "TARGET_SIMD"
4115 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4116 [(set_attr "type" "neon_sat_shift_imm<q>")]
4117 )
4118
4119 ;; v(r)sra_n
4120
4121 (define_insn "aarch64_<sur>sra_n<mode>"
4122 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4123 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4124 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4125 (match_operand:SI 3
4126 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4127 VSRA))]
4128 "TARGET_SIMD"
4129 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4130 [(set_attr "type" "neon_shift_acc<q>")]
4131 )
4132
4133 ;; vs<lr>i_n
4134
4135 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4136 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4137 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4138 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4139 (match_operand:SI 3
4140 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4141 VSLRI))]
4142 "TARGET_SIMD"
4143 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4144 [(set_attr "type" "neon_shift_imm<q>")]
4145 )
4146
4147 ;; vqshl(u)
4148
4149 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4150 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4151 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4152 (match_operand:SI 2
4153 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4154 VQSHL_N))]
4155 "TARGET_SIMD"
4156 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4157 [(set_attr "type" "neon_sat_shift_imm<q>")]
4158 )
4159
4160
4161 ;; vq(r)shr(u)n_n
4162
4163 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4164 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4165 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4166 (match_operand:SI 2
4167 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4168 VQSHRN_N))]
4169 "TARGET_SIMD"
4170 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4171 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4172 )
4173
4174
4175 ;; cm(eq|ge|gt|lt|le)
4176 ;; Note, we have constraints for Dz and Z as different expanders
4177 ;; have different ideas of what should be passed to this pattern.
4178
4179 (define_insn "aarch64_cm<optab><mode>"
4180 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4181 (neg:<V_cmp_result>
4182 (COMPARISONS:<V_cmp_result>
4183 (match_operand:VDQ_I 1 "register_operand" "w,w")
4184 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4185 )))]
4186 "TARGET_SIMD"
4187 "@
4188 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4189 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4190 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4191 )
4192
4193 (define_insn_and_split "aarch64_cm<optab>di"
4194 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4195 (neg:DI
4196 (COMPARISONS:DI
4197 (match_operand:DI 1 "register_operand" "w,w,r")
4198 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4199 )))
4200 (clobber (reg:CC CC_REGNUM))]
4201 "TARGET_SIMD"
4202 "#"
4203 "reload_completed"
4204 [(set (match_operand:DI 0 "register_operand")
4205 (neg:DI
4206 (COMPARISONS:DI
4207 (match_operand:DI 1 "register_operand")
4208 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4209 )))]
4210 {
4211 /* If we are in the general purpose register file,
4212 we split to a sequence of comparison and store. */
4213 if (GP_REGNUM_P (REGNO (operands[0]))
4214 && GP_REGNUM_P (REGNO (operands[1])))
4215 {
4216 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4217 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4218 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4219 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4220 DONE;
4221 }
4222 /* Otherwise, we expand to a similar pattern which does not
4223 clobber CC_REGNUM. */
4224 }
4225 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4226 )
4227
4228 (define_insn "*aarch64_cm<optab>di"
4229 [(set (match_operand:DI 0 "register_operand" "=w,w")
4230 (neg:DI
4231 (COMPARISONS:DI
4232 (match_operand:DI 1 "register_operand" "w,w")
4233 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4234 )))]
4235 "TARGET_SIMD && reload_completed"
4236 "@
4237 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4238 cm<optab>\t%d0, %d1, #0"
4239 [(set_attr "type" "neon_compare, neon_compare_zero")]
4240 )
4241
4242 ;; cm(hs|hi)
4243
4244 (define_insn "aarch64_cm<optab><mode>"
4245 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4246 (neg:<V_cmp_result>
4247 (UCOMPARISONS:<V_cmp_result>
4248 (match_operand:VDQ_I 1 "register_operand" "w")
4249 (match_operand:VDQ_I 2 "register_operand" "w")
4250 )))]
4251 "TARGET_SIMD"
4252 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4253 [(set_attr "type" "neon_compare<q>")]
4254 )
4255
4256 (define_insn_and_split "aarch64_cm<optab>di"
4257 [(set (match_operand:DI 0 "register_operand" "=w,r")
4258 (neg:DI
4259 (UCOMPARISONS:DI
4260 (match_operand:DI 1 "register_operand" "w,r")
4261 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4262 )))
4263 (clobber (reg:CC CC_REGNUM))]
4264 "TARGET_SIMD"
4265 "#"
4266 "reload_completed"
4267 [(set (match_operand:DI 0 "register_operand")
4268 (neg:DI
4269 (UCOMPARISONS:DI
4270 (match_operand:DI 1 "register_operand")
4271 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4272 )))]
4273 {
4274 /* If we are in the general purpose register file,
4275 we split to a sequence of comparison and store. */
4276 if (GP_REGNUM_P (REGNO (operands[0]))
4277 && GP_REGNUM_P (REGNO (operands[1])))
4278 {
4279 machine_mode mode = CCmode;
4280 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4281 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4282 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4283 DONE;
4284 }
4285 /* Otherwise, we expand to a similar pattern which does not
4286 clobber CC_REGNUM. */
4287 }
4288 [(set_attr "type" "neon_compare,multiple")]
4289 )
4290
4291 (define_insn "*aarch64_cm<optab>di"
4292 [(set (match_operand:DI 0 "register_operand" "=w")
4293 (neg:DI
4294 (UCOMPARISONS:DI
4295 (match_operand:DI 1 "register_operand" "w")
4296 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4297 )))]
4298 "TARGET_SIMD && reload_completed"
4299 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4300 [(set_attr "type" "neon_compare")]
4301 )
4302
4303 ;; cmtst
4304
4305 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4306 ;; we don't have any insns using ne, and aarch64_vcond outputs
4307 ;; not (neg (eq (and x y) 0))
4308 ;; which is rewritten by simplify_rtx as
4309 ;; plus (eq (and x y) 0) -1.
4310
4311 (define_insn "aarch64_cmtst<mode>"
4312 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4313 (plus:<V_cmp_result>
4314 (eq:<V_cmp_result>
4315 (and:VDQ_I
4316 (match_operand:VDQ_I 1 "register_operand" "w")
4317 (match_operand:VDQ_I 2 "register_operand" "w"))
4318 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4319 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4320 ]
4321 "TARGET_SIMD"
4322 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4323 [(set_attr "type" "neon_tst<q>")]
4324 )
4325
4326 (define_insn_and_split "aarch64_cmtstdi"
4327 [(set (match_operand:DI 0 "register_operand" "=w,r")
4328 (neg:DI
4329 (ne:DI
4330 (and:DI
4331 (match_operand:DI 1 "register_operand" "w,r")
4332 (match_operand:DI 2 "register_operand" "w,r"))
4333 (const_int 0))))
4334 (clobber (reg:CC CC_REGNUM))]
4335 "TARGET_SIMD"
4336 "#"
4337 "reload_completed"
4338 [(set (match_operand:DI 0 "register_operand")
4339 (neg:DI
4340 (ne:DI
4341 (and:DI
4342 (match_operand:DI 1 "register_operand")
4343 (match_operand:DI 2 "register_operand"))
4344 (const_int 0))))]
4345 {
4346 /* If we are in the general purpose register file,
4347 we split to a sequence of comparison and store. */
4348 if (GP_REGNUM_P (REGNO (operands[0]))
4349 && GP_REGNUM_P (REGNO (operands[1])))
4350 {
4351 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4352 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4353 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4354 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4355 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4356 DONE;
4357 }
4358 /* Otherwise, we expand to a similar pattern which does not
4359 clobber CC_REGNUM. */
4360 }
4361 [(set_attr "type" "neon_tst,multiple")]
4362 )
4363
4364 (define_insn "*aarch64_cmtstdi"
4365 [(set (match_operand:DI 0 "register_operand" "=w")
4366 (neg:DI
4367 (ne:DI
4368 (and:DI
4369 (match_operand:DI 1 "register_operand" "w")
4370 (match_operand:DI 2 "register_operand" "w"))
4371 (const_int 0))))]
4372 "TARGET_SIMD"
4373 "cmtst\t%d0, %d1, %d2"
4374 [(set_attr "type" "neon_tst")]
4375 )
4376
4377 ;; fcm(eq|ge|gt|le|lt)
4378
4379 (define_insn "aarch64_cm<optab><mode>"
4380 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4381 (neg:<V_cmp_result>
4382 (COMPARISONS:<V_cmp_result>
4383 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4384 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4385 )))]
4386 "TARGET_SIMD"
4387 "@
4388 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4389 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4390 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4391 )
4392
4393 ;; fac(ge|gt)
4394 ;; Note we can also handle what would be fac(le|lt) by
4395 ;; generating fac(ge|gt).
4396
4397 (define_insn "aarch64_fac<optab><mode>"
4398 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4399 (neg:<V_cmp_result>
4400 (FAC_COMPARISONS:<V_cmp_result>
4401 (abs:VHSDF_HSDF
4402 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4403 (abs:VHSDF_HSDF
4404 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4405 )))]
4406 "TARGET_SIMD"
4407 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4408 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4409 )
4410
4411 ;; addp
4412
4413 (define_insn "aarch64_addp<mode>"
4414 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4415 (unspec:VD_BHSI
4416 [(match_operand:VD_BHSI 1 "register_operand" "w")
4417 (match_operand:VD_BHSI 2 "register_operand" "w")]
4418 UNSPEC_ADDP))]
4419 "TARGET_SIMD"
4420 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4421 [(set_attr "type" "neon_reduc_add<q>")]
4422 )
4423
4424 (define_insn "aarch64_addpdi"
4425 [(set (match_operand:DI 0 "register_operand" "=w")
4426 (unspec:DI
4427 [(match_operand:V2DI 1 "register_operand" "w")]
4428 UNSPEC_ADDP))]
4429 "TARGET_SIMD"
4430 "addp\t%d0, %1.2d"
4431 [(set_attr "type" "neon_reduc_add")]
4432 )
4433
4434 ;; sqrt
4435
4436 (define_expand "sqrt<mode>2"
4437 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4438 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4439 "TARGET_SIMD"
4440 {
4441 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4442 DONE;
4443 })
4444
4445 (define_insn "*sqrt<mode>2"
4446 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4447 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4448 "TARGET_SIMD"
4449 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4450 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4451 )
4452
4453 ;; Patterns for vector struct loads and stores.
4454
4455 (define_insn "aarch64_simd_ld2<mode>"
4456 [(set (match_operand:OI 0 "register_operand" "=w")
4457 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4458 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4459 UNSPEC_LD2))]
4460 "TARGET_SIMD"
4461 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4462 [(set_attr "type" "neon_load2_2reg<q>")]
4463 )
4464
4465 (define_insn "aarch64_simd_ld2r<mode>"
4466 [(set (match_operand:OI 0 "register_operand" "=w")
4467 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4468 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4469 UNSPEC_LD2_DUP))]
4470 "TARGET_SIMD"
4471 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4472 [(set_attr "type" "neon_load2_all_lanes<q>")]
4473 )
4474
4475 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4476 [(set (match_operand:OI 0 "register_operand" "=w")
4477 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4478 (match_operand:OI 2 "register_operand" "0")
4479 (match_operand:SI 3 "immediate_operand" "i")
4480 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4481 UNSPEC_LD2_LANE))]
4482 "TARGET_SIMD"
4483 {
4484 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4485 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4486 }
4487 [(set_attr "type" "neon_load2_one_lane")]
4488 )
4489
4490 (define_expand "vec_load_lanesoi<mode>"
4491 [(set (match_operand:OI 0 "register_operand" "=w")
4492 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4493 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4494 UNSPEC_LD2))]
4495 "TARGET_SIMD"
4496 {
4497 if (BYTES_BIG_ENDIAN)
4498 {
4499 rtx tmp = gen_reg_rtx (OImode);
4500 rtx mask = aarch64_reverse_mask (<MODE>mode);
4501 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4502 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4503 }
4504 else
4505 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4506 DONE;
4507 })
4508
4509 (define_insn "aarch64_simd_st2<mode>"
4510 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4511 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4512 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4513 UNSPEC_ST2))]
4514 "TARGET_SIMD"
4515 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4516 [(set_attr "type" "neon_store2_2reg<q>")]
4517 )
4518
4519 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4520 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4521 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4522 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4523 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4524 (match_operand:SI 2 "immediate_operand" "i")]
4525 UNSPEC_ST2_LANE))]
4526 "TARGET_SIMD"
4527 {
4528 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4529 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4530 }
4531 [(set_attr "type" "neon_store2_one_lane<q>")]
4532 )
4533
4534 (define_expand "vec_store_lanesoi<mode>"
4535 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4536 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4537 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4538 UNSPEC_ST2))]
4539 "TARGET_SIMD"
4540 {
4541 if (BYTES_BIG_ENDIAN)
4542 {
4543 rtx tmp = gen_reg_rtx (OImode);
4544 rtx mask = aarch64_reverse_mask (<MODE>mode);
4545 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4546 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4547 }
4548 else
4549 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4550 DONE;
4551 })
4552
4553 (define_insn "aarch64_simd_ld3<mode>"
4554 [(set (match_operand:CI 0 "register_operand" "=w")
4555 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4556 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4557 UNSPEC_LD3))]
4558 "TARGET_SIMD"
4559 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4560 [(set_attr "type" "neon_load3_3reg<q>")]
4561 )
4562
4563 (define_insn "aarch64_simd_ld3r<mode>"
4564 [(set (match_operand:CI 0 "register_operand" "=w")
4565 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4566 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4567 UNSPEC_LD3_DUP))]
4568 "TARGET_SIMD"
4569 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4570 [(set_attr "type" "neon_load3_all_lanes<q>")]
4571 )
4572
4573 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4574 [(set (match_operand:CI 0 "register_operand" "=w")
4575 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4576 (match_operand:CI 2 "register_operand" "0")
4577 (match_operand:SI 3 "immediate_operand" "i")
4578 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4579 UNSPEC_LD3_LANE))]
4580 "TARGET_SIMD"
4581 {
4582 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4583 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4584 }
4585 [(set_attr "type" "neon_load3_one_lane")]
4586 )
4587
4588 (define_expand "vec_load_lanesci<mode>"
4589 [(set (match_operand:CI 0 "register_operand" "=w")
4590 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4591 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4592 UNSPEC_LD3))]
4593 "TARGET_SIMD"
4594 {
4595 if (BYTES_BIG_ENDIAN)
4596 {
4597 rtx tmp = gen_reg_rtx (CImode);
4598 rtx mask = aarch64_reverse_mask (<MODE>mode);
4599 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4600 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4601 }
4602 else
4603 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4604 DONE;
4605 })
4606
4607 (define_insn "aarch64_simd_st3<mode>"
4608 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4609 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4610 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4611 UNSPEC_ST3))]
4612 "TARGET_SIMD"
4613 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4614 [(set_attr "type" "neon_store3_3reg<q>")]
4615 )
4616
4617 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4618 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4619 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4620 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4621 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4622 (match_operand:SI 2 "immediate_operand" "i")]
4623 UNSPEC_ST3_LANE))]
4624 "TARGET_SIMD"
4625 {
4626 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4627 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4628 }
4629 [(set_attr "type" "neon_store3_one_lane<q>")]
4630 )
4631
4632 (define_expand "vec_store_lanesci<mode>"
4633 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4634 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4635 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4636 UNSPEC_ST3))]
4637 "TARGET_SIMD"
4638 {
4639 if (BYTES_BIG_ENDIAN)
4640 {
4641 rtx tmp = gen_reg_rtx (CImode);
4642 rtx mask = aarch64_reverse_mask (<MODE>mode);
4643 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4644 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4645 }
4646 else
4647 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4648 DONE;
4649 })
4650
4651 (define_insn "aarch64_simd_ld4<mode>"
4652 [(set (match_operand:XI 0 "register_operand" "=w")
4653 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4654 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4655 UNSPEC_LD4))]
4656 "TARGET_SIMD"
4657 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4658 [(set_attr "type" "neon_load4_4reg<q>")]
4659 )
4660
4661 (define_insn "aarch64_simd_ld4r<mode>"
4662 [(set (match_operand:XI 0 "register_operand" "=w")
4663 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4664 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4665 UNSPEC_LD4_DUP))]
4666 "TARGET_SIMD"
4667 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4668 [(set_attr "type" "neon_load4_all_lanes<q>")]
4669 )
4670
4671 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4672 [(set (match_operand:XI 0 "register_operand" "=w")
4673 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4674 (match_operand:XI 2 "register_operand" "0")
4675 (match_operand:SI 3 "immediate_operand" "i")
4676 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4677 UNSPEC_LD4_LANE))]
4678 "TARGET_SIMD"
4679 {
4680 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4681 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4682 }
4683 [(set_attr "type" "neon_load4_one_lane")]
4684 )
4685
4686 (define_expand "vec_load_lanesxi<mode>"
4687 [(set (match_operand:XI 0 "register_operand" "=w")
4688 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4689 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4690 UNSPEC_LD4))]
4691 "TARGET_SIMD"
4692 {
4693 if (BYTES_BIG_ENDIAN)
4694 {
4695 rtx tmp = gen_reg_rtx (XImode);
4696 rtx mask = aarch64_reverse_mask (<MODE>mode);
4697 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4698 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4699 }
4700 else
4701 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4702 DONE;
4703 })
4704
4705 (define_insn "aarch64_simd_st4<mode>"
4706 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4707 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4708 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4709 UNSPEC_ST4))]
4710 "TARGET_SIMD"
4711 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4712 [(set_attr "type" "neon_store4_4reg<q>")]
4713 )
4714
4715 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4716 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4717 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4718 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4719 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4720 (match_operand:SI 2 "immediate_operand" "i")]
4721 UNSPEC_ST4_LANE))]
4722 "TARGET_SIMD"
4723 {
4724 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4725 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4726 }
4727 [(set_attr "type" "neon_store4_one_lane<q>")]
4728 )
4729
4730 (define_expand "vec_store_lanesxi<mode>"
4731 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4732 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4733 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4734 UNSPEC_ST4))]
4735 "TARGET_SIMD"
4736 {
4737 if (BYTES_BIG_ENDIAN)
4738 {
4739 rtx tmp = gen_reg_rtx (XImode);
4740 rtx mask = aarch64_reverse_mask (<MODE>mode);
4741 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4742 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4743 }
4744 else
4745 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4746 DONE;
4747 })
4748
4749 (define_insn_and_split "aarch64_rev_reglist<mode>"
4750 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4751 (unspec:VSTRUCT
4752 [(match_operand:VSTRUCT 1 "register_operand" "w")
4753 (match_operand:V16QI 2 "register_operand" "w")]
4754 UNSPEC_REV_REGLIST))]
4755 "TARGET_SIMD"
4756 "#"
4757 "&& reload_completed"
4758 [(const_int 0)]
4759 {
4760 int i;
4761 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4762 for (i = 0; i < nregs; i++)
4763 {
4764 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4765 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4766 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4767 }
4768 DONE;
4769 }
4770 [(set_attr "type" "neon_tbl1_q")
4771 (set_attr "length" "<insn_count>")]
4772 )
4773
4774 ;; Reload patterns for AdvSIMD register list operands.
4775
4776 (define_expand "mov<mode>"
4777 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4778 (match_operand:VSTRUCT 1 "general_operand" ""))]
4779 "TARGET_SIMD"
4780 {
4781 if (can_create_pseudo_p ())
4782 {
4783 if (GET_CODE (operands[0]) != REG)
4784 operands[1] = force_reg (<MODE>mode, operands[1]);
4785 }
4786 })
4787
4788 (define_insn "*aarch64_mov<mode>"
4789 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4790 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4791 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4792 && (register_operand (operands[0], <MODE>mode)
4793 || register_operand (operands[1], <MODE>mode))"
4794 "@
4795 #
4796 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4797 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4798 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4799 neon_load<nregs>_<nregs>reg_q")
4800 (set_attr "length" "<insn_count>,4,4")]
4801 )
4802
4803 (define_insn "aarch64_be_ld1<mode>"
4804 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4805 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4806 "aarch64_simd_struct_operand" "Utv")]
4807 UNSPEC_LD1))]
4808 "TARGET_SIMD"
4809 "ld1\\t{%0<Vmtype>}, %1"
4810 [(set_attr "type" "neon_load1_1reg<q>")]
4811 )
4812
4813 (define_insn "aarch64_be_st1<mode>"
4814 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4815 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4816 UNSPEC_ST1))]
4817 "TARGET_SIMD"
4818 "st1\\t{%1<Vmtype>}, %0"
4819 [(set_attr "type" "neon_store1_1reg<q>")]
4820 )
4821
4822 (define_insn "*aarch64_be_movoi"
4823 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4824 (match_operand:OI 1 "general_operand" " w,w,m"))]
4825 "TARGET_SIMD && BYTES_BIG_ENDIAN
4826 && (register_operand (operands[0], OImode)
4827 || register_operand (operands[1], OImode))"
4828 "@
4829 #
4830 stp\\t%q1, %R1, %0
4831 ldp\\t%q0, %R0, %1"
4832 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4833 (set_attr "length" "8,4,4")]
4834 )
4835
4836 (define_insn "*aarch64_be_movci"
4837 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4838 (match_operand:CI 1 "general_operand" " w,w,o"))]
4839 "TARGET_SIMD && BYTES_BIG_ENDIAN
4840 && (register_operand (operands[0], CImode)
4841 || register_operand (operands[1], CImode))"
4842 "#"
4843 [(set_attr "type" "multiple")
4844 (set_attr "length" "12,4,4")]
4845 )
4846
4847 (define_insn "*aarch64_be_movxi"
4848 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4849 (match_operand:XI 1 "general_operand" " w,w,o"))]
4850 "TARGET_SIMD && BYTES_BIG_ENDIAN
4851 && (register_operand (operands[0], XImode)
4852 || register_operand (operands[1], XImode))"
4853 "#"
4854 [(set_attr "type" "multiple")
4855 (set_attr "length" "16,4,4")]
4856 )
4857
4858 (define_split
4859 [(set (match_operand:OI 0 "register_operand")
4860 (match_operand:OI 1 "register_operand"))]
4861 "TARGET_SIMD && reload_completed"
4862 [(const_int 0)]
4863 {
4864 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4865 DONE;
4866 })
4867
4868 (define_split
4869 [(set (match_operand:CI 0 "nonimmediate_operand")
4870 (match_operand:CI 1 "general_operand"))]
4871 "TARGET_SIMD && reload_completed"
4872 [(const_int 0)]
4873 {
4874 if (register_operand (operands[0], CImode)
4875 && register_operand (operands[1], CImode))
4876 {
4877 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4878 DONE;
4879 }
4880 else if (BYTES_BIG_ENDIAN)
4881 {
4882 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4883 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4884 emit_move_insn (gen_lowpart (V16QImode,
4885 simplify_gen_subreg (TImode, operands[0],
4886 CImode, 32)),
4887 gen_lowpart (V16QImode,
4888 simplify_gen_subreg (TImode, operands[1],
4889 CImode, 32)));
4890 DONE;
4891 }
4892 else
4893 FAIL;
4894 })
4895
4896 (define_split
4897 [(set (match_operand:XI 0 "nonimmediate_operand")
4898 (match_operand:XI 1 "general_operand"))]
4899 "TARGET_SIMD && reload_completed"
4900 [(const_int 0)]
4901 {
4902 if (register_operand (operands[0], XImode)
4903 && register_operand (operands[1], XImode))
4904 {
4905 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4906 DONE;
4907 }
4908 else if (BYTES_BIG_ENDIAN)
4909 {
4910 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4911 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4912 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4913 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4914 DONE;
4915 }
4916 else
4917 FAIL;
4918 })
4919
4920 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4921 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4922 (match_operand:DI 1 "register_operand" "w")
4923 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4924 "TARGET_SIMD"
4925 {
4926 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4927 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4928 * <VSTRUCT:nregs>);
4929
4930 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4931 mem));
4932 DONE;
4933 })
4934
4935 (define_insn "aarch64_ld2<mode>_dreg_le"
4936 [(set (match_operand:OI 0 "register_operand" "=w")
4937 (subreg:OI
4938 (vec_concat:<VRL2>
4939 (vec_concat:<VDBL>
4940 (unspec:VD
4941 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4942 UNSPEC_LD2)
4943 (vec_duplicate:VD (const_int 0)))
4944 (vec_concat:<VDBL>
4945 (unspec:VD [(match_dup 1)]
4946 UNSPEC_LD2)
4947 (vec_duplicate:VD (const_int 0)))) 0))]
4948 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4949 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4950 [(set_attr "type" "neon_load2_2reg<q>")]
4951 )
4952
4953 (define_insn "aarch64_ld2<mode>_dreg_be"
4954 [(set (match_operand:OI 0 "register_operand" "=w")
4955 (subreg:OI
4956 (vec_concat:<VRL2>
4957 (vec_concat:<VDBL>
4958 (vec_duplicate:VD (const_int 0))
4959 (unspec:VD
4960 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4961 UNSPEC_LD2))
4962 (vec_concat:<VDBL>
4963 (vec_duplicate:VD (const_int 0))
4964 (unspec:VD [(match_dup 1)]
4965 UNSPEC_LD2))) 0))]
4966 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4967 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4968 [(set_attr "type" "neon_load2_2reg<q>")]
4969 )
4970
4971 (define_insn "aarch64_ld2<mode>_dreg_le"
4972 [(set (match_operand:OI 0 "register_operand" "=w")
4973 (subreg:OI
4974 (vec_concat:<VRL2>
4975 (vec_concat:<VDBL>
4976 (unspec:DX
4977 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4978 UNSPEC_LD2)
4979 (const_int 0))
4980 (vec_concat:<VDBL>
4981 (unspec:DX [(match_dup 1)]
4982 UNSPEC_LD2)
4983 (const_int 0))) 0))]
4984 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4985 "ld1\\t{%S0.1d - %T0.1d}, %1"
4986 [(set_attr "type" "neon_load1_2reg<q>")]
4987 )
4988
4989 (define_insn "aarch64_ld2<mode>_dreg_be"
4990 [(set (match_operand:OI 0 "register_operand" "=w")
4991 (subreg:OI
4992 (vec_concat:<VRL2>
4993 (vec_concat:<VDBL>
4994 (const_int 0)
4995 (unspec:DX
4996 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4997 UNSPEC_LD2))
4998 (vec_concat:<VDBL>
4999 (const_int 0)
5000 (unspec:DX [(match_dup 1)]
5001 UNSPEC_LD2))) 0))]
5002 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5003 "ld1\\t{%S0.1d - %T0.1d}, %1"
5004 [(set_attr "type" "neon_load1_2reg<q>")]
5005 )
5006
5007 (define_insn "aarch64_ld3<mode>_dreg_le"
5008 [(set (match_operand:CI 0 "register_operand" "=w")
5009 (subreg:CI
5010 (vec_concat:<VRL3>
5011 (vec_concat:<VRL2>
5012 (vec_concat:<VDBL>
5013 (unspec:VD
5014 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5015 UNSPEC_LD3)
5016 (vec_duplicate:VD (const_int 0)))
5017 (vec_concat:<VDBL>
5018 (unspec:VD [(match_dup 1)]
5019 UNSPEC_LD3)
5020 (vec_duplicate:VD (const_int 0))))
5021 (vec_concat:<VDBL>
5022 (unspec:VD [(match_dup 1)]
5023 UNSPEC_LD3)
5024 (vec_duplicate:VD (const_int 0)))) 0))]
5025 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5026 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5027 [(set_attr "type" "neon_load3_3reg<q>")]
5028 )
5029
5030 (define_insn "aarch64_ld3<mode>_dreg_be"
5031 [(set (match_operand:CI 0 "register_operand" "=w")
5032 (subreg:CI
5033 (vec_concat:<VRL3>
5034 (vec_concat:<VRL2>
5035 (vec_concat:<VDBL>
5036 (vec_duplicate:VD (const_int 0))
5037 (unspec:VD
5038 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5039 UNSPEC_LD3))
5040 (vec_concat:<VDBL>
5041 (vec_duplicate:VD (const_int 0))
5042 (unspec:VD [(match_dup 1)]
5043 UNSPEC_LD3)))
5044 (vec_concat:<VDBL>
5045 (vec_duplicate:VD (const_int 0))
5046 (unspec:VD [(match_dup 1)]
5047 UNSPEC_LD3))) 0))]
5048 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5049 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5050 [(set_attr "type" "neon_load3_3reg<q>")]
5051 )
5052
5053 (define_insn "aarch64_ld3<mode>_dreg_le"
5054 [(set (match_operand:CI 0 "register_operand" "=w")
5055 (subreg:CI
5056 (vec_concat:<VRL3>
5057 (vec_concat:<VRL2>
5058 (vec_concat:<VDBL>
5059 (unspec:DX
5060 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5061 UNSPEC_LD3)
5062 (const_int 0))
5063 (vec_concat:<VDBL>
5064 (unspec:DX [(match_dup 1)]
5065 UNSPEC_LD3)
5066 (const_int 0)))
5067 (vec_concat:<VDBL>
5068 (unspec:DX [(match_dup 1)]
5069 UNSPEC_LD3)
5070 (const_int 0))) 0))]
5071 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5072 "ld1\\t{%S0.1d - %U0.1d}, %1"
5073 [(set_attr "type" "neon_load1_3reg<q>")]
5074 )
5075
5076 (define_insn "aarch64_ld3<mode>_dreg_be"
5077 [(set (match_operand:CI 0 "register_operand" "=w")
5078 (subreg:CI
5079 (vec_concat:<VRL3>
5080 (vec_concat:<VRL2>
5081 (vec_concat:<VDBL>
5082 (const_int 0)
5083 (unspec:DX
5084 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5085 UNSPEC_LD3))
5086 (vec_concat:<VDBL>
5087 (const_int 0)
5088 (unspec:DX [(match_dup 1)]
5089 UNSPEC_LD3)))
5090 (vec_concat:<VDBL>
5091 (const_int 0)
5092 (unspec:DX [(match_dup 1)]
5093 UNSPEC_LD3))) 0))]
5094 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5095 "ld1\\t{%S0.1d - %U0.1d}, %1"
5096 [(set_attr "type" "neon_load1_3reg<q>")]
5097 )
5098
5099 (define_insn "aarch64_ld4<mode>_dreg_le"
5100 [(set (match_operand:XI 0 "register_operand" "=w")
5101 (subreg:XI
5102 (vec_concat:<VRL4>
5103 (vec_concat:<VRL2>
5104 (vec_concat:<VDBL>
5105 (unspec:VD
5106 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5107 UNSPEC_LD4)
5108 (vec_duplicate:VD (const_int 0)))
5109 (vec_concat:<VDBL>
5110 (unspec:VD [(match_dup 1)]
5111 UNSPEC_LD4)
5112 (vec_duplicate:VD (const_int 0))))
5113 (vec_concat:<VRL2>
5114 (vec_concat:<VDBL>
5115 (unspec:VD [(match_dup 1)]
5116 UNSPEC_LD4)
5117 (vec_duplicate:VD (const_int 0)))
5118 (vec_concat:<VDBL>
5119 (unspec:VD [(match_dup 1)]
5120 UNSPEC_LD4)
5121 (vec_duplicate:VD (const_int 0))))) 0))]
5122 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5123 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5124 [(set_attr "type" "neon_load4_4reg<q>")]
5125 )
5126
5127 (define_insn "aarch64_ld4<mode>_dreg_be"
5128 [(set (match_operand:XI 0 "register_operand" "=w")
5129 (subreg:XI
5130 (vec_concat:<VRL4>
5131 (vec_concat:<VRL2>
5132 (vec_concat:<VDBL>
5133 (vec_duplicate:VD (const_int 0))
5134 (unspec:VD
5135 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5136 UNSPEC_LD4))
5137 (vec_concat:<VDBL>
5138 (vec_duplicate:VD (const_int 0))
5139 (unspec:VD [(match_dup 1)]
5140 UNSPEC_LD4)))
5141 (vec_concat:<VRL2>
5142 (vec_concat:<VDBL>
5143 (vec_duplicate:VD (const_int 0))
5144 (unspec:VD [(match_dup 1)]
5145 UNSPEC_LD4))
5146 (vec_concat:<VDBL>
5147 (vec_duplicate:VD (const_int 0))
5148 (unspec:VD [(match_dup 1)]
5149 UNSPEC_LD4)))) 0))]
5150 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5151 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5152 [(set_attr "type" "neon_load4_4reg<q>")]
5153 )
5154
5155 (define_insn "aarch64_ld4<mode>_dreg_le"
5156 [(set (match_operand:XI 0 "register_operand" "=w")
5157 (subreg:XI
5158 (vec_concat:<VRL4>
5159 (vec_concat:<VRL2>
5160 (vec_concat:<VDBL>
5161 (unspec:DX
5162 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5163 UNSPEC_LD4)
5164 (const_int 0))
5165 (vec_concat:<VDBL>
5166 (unspec:DX [(match_dup 1)]
5167 UNSPEC_LD4)
5168 (const_int 0)))
5169 (vec_concat:<VRL2>
5170 (vec_concat:<VDBL>
5171 (unspec:DX [(match_dup 1)]
5172 UNSPEC_LD4)
5173 (const_int 0))
5174 (vec_concat:<VDBL>
5175 (unspec:DX [(match_dup 1)]
5176 UNSPEC_LD4)
5177 (const_int 0)))) 0))]
5178 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5179 "ld1\\t{%S0.1d - %V0.1d}, %1"
5180 [(set_attr "type" "neon_load1_4reg<q>")]
5181 )
5182
5183 (define_insn "aarch64_ld4<mode>_dreg_be"
5184 [(set (match_operand:XI 0 "register_operand" "=w")
5185 (subreg:XI
5186 (vec_concat:<VRL4>
5187 (vec_concat:<VRL2>
5188 (vec_concat:<VDBL>
5189 (const_int 0)
5190 (unspec:DX
5191 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5192 UNSPEC_LD4))
5193 (vec_concat:<VDBL>
5194 (const_int 0)
5195 (unspec:DX [(match_dup 1)]
5196 UNSPEC_LD4)))
5197 (vec_concat:<VRL2>
5198 (vec_concat:<VDBL>
5199 (const_int 0)
5200 (unspec:DX [(match_dup 1)]
5201 UNSPEC_LD4))
5202 (vec_concat:<VDBL>
5203 (const_int 0)
5204 (unspec:DX [(match_dup 1)]
5205 UNSPEC_LD4)))) 0))]
5206 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5207 "ld1\\t{%S0.1d - %V0.1d}, %1"
5208 [(set_attr "type" "neon_load1_4reg<q>")]
5209 )
5210
5211 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5212 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5213 (match_operand:DI 1 "register_operand" "r")
5214 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5215 "TARGET_SIMD"
5216 {
5217 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5218 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5219
5220 if (BYTES_BIG_ENDIAN)
5221 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_be (operands[0],
5222 mem));
5223 else
5224 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_le (operands[0],
5225 mem));
5226 DONE;
5227 })
5228
5229 (define_expand "aarch64_ld1<VALL_F16:mode>"
5230 [(match_operand:VALL_F16 0 "register_operand")
5231 (match_operand:DI 1 "register_operand")]
5232 "TARGET_SIMD"
5233 {
5234 machine_mode mode = <VALL_F16:MODE>mode;
5235 rtx mem = gen_rtx_MEM (mode, operands[1]);
5236
5237 if (BYTES_BIG_ENDIAN)
5238 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5239 else
5240 emit_move_insn (operands[0], mem);
5241 DONE;
5242 })
5243
5244 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5245 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5246 (match_operand:DI 1 "register_operand" "r")
5247 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5248 "TARGET_SIMD"
5249 {
5250 machine_mode mode = <VSTRUCT:MODE>mode;
5251 rtx mem = gen_rtx_MEM (mode, operands[1]);
5252
5253 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5254 DONE;
5255 })
5256
5257 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5258 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5259 (match_operand:DI 1 "register_operand" "w")
5260 (match_operand:VSTRUCT 2 "register_operand" "0")
5261 (match_operand:SI 3 "immediate_operand" "i")
5262 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5263 "TARGET_SIMD"
5264 {
5265 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5266 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5267 * <VSTRUCT:nregs>);
5268
5269 aarch64_simd_lane_bounds (operands[3], 0,
5270 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5271 NULL);
5272 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5273 operands[0], mem, operands[2], operands[3]));
5274 DONE;
5275 })
5276
5277 ;; Expanders for builtins to extract vector registers from large
5278 ;; opaque integer modes.
5279
5280 ;; D-register list.
5281
5282 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5283 [(match_operand:VDC 0 "register_operand" "=w")
5284 (match_operand:VSTRUCT 1 "register_operand" "w")
5285 (match_operand:SI 2 "immediate_operand" "i")]
5286 "TARGET_SIMD"
5287 {
5288 int part = INTVAL (operands[2]);
5289 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5290 int offset = part * 16;
5291
5292 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5293 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5294 DONE;
5295 })
5296
5297 ;; Q-register list.
5298
5299 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5300 [(match_operand:VQ 0 "register_operand" "=w")
5301 (match_operand:VSTRUCT 1 "register_operand" "w")
5302 (match_operand:SI 2 "immediate_operand" "i")]
5303 "TARGET_SIMD"
5304 {
5305 int part = INTVAL (operands[2]);
5306 int offset = part * 16;
5307
5308 emit_move_insn (operands[0],
5309 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5310 DONE;
5311 })
5312
5313 ;; Permuted-store expanders for neon intrinsics.
5314
5315 ;; Permute instructions
5316
5317 ;; vec_perm support
5318
5319 (define_expand "vec_perm_const<mode>"
5320 [(match_operand:VALL_F16 0 "register_operand")
5321 (match_operand:VALL_F16 1 "register_operand")
5322 (match_operand:VALL_F16 2 "register_operand")
5323 (match_operand:<V_cmp_result> 3)]
5324 "TARGET_SIMD"
5325 {
5326 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5327 operands[2], operands[3]))
5328 DONE;
5329 else
5330 FAIL;
5331 })
5332
5333 (define_expand "vec_perm<mode>"
5334 [(match_operand:VB 0 "register_operand")
5335 (match_operand:VB 1 "register_operand")
5336 (match_operand:VB 2 "register_operand")
5337 (match_operand:VB 3 "register_operand")]
5338 "TARGET_SIMD"
5339 {
5340 aarch64_expand_vec_perm (operands[0], operands[1],
5341 operands[2], operands[3]);
5342 DONE;
5343 })
5344
5345 (define_insn "aarch64_tbl1<mode>"
5346 [(set (match_operand:VB 0 "register_operand" "=w")
5347 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5348 (match_operand:VB 2 "register_operand" "w")]
5349 UNSPEC_TBL))]
5350 "TARGET_SIMD"
5351 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5352 [(set_attr "type" "neon_tbl1<q>")]
5353 )
5354
5355 ;; Two source registers.
5356
5357 (define_insn "aarch64_tbl2v16qi"
5358 [(set (match_operand:V16QI 0 "register_operand" "=w")
5359 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5360 (match_operand:V16QI 2 "register_operand" "w")]
5361 UNSPEC_TBL))]
5362 "TARGET_SIMD"
5363 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5364 [(set_attr "type" "neon_tbl2_q")]
5365 )
5366
5367 (define_insn "aarch64_tbl3<mode>"
5368 [(set (match_operand:VB 0 "register_operand" "=w")
5369 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5370 (match_operand:VB 2 "register_operand" "w")]
5371 UNSPEC_TBL))]
5372 "TARGET_SIMD"
5373 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5374 [(set_attr "type" "neon_tbl3")]
5375 )
5376
5377 (define_insn "aarch64_tbx4<mode>"
5378 [(set (match_operand:VB 0 "register_operand" "=w")
5379 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5380 (match_operand:OI 2 "register_operand" "w")
5381 (match_operand:VB 3 "register_operand" "w")]
5382 UNSPEC_TBX))]
5383 "TARGET_SIMD"
5384 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5385 [(set_attr "type" "neon_tbl4")]
5386 )
5387
5388 ;; Three source registers.
5389
5390 (define_insn "aarch64_qtbl3<mode>"
5391 [(set (match_operand:VB 0 "register_operand" "=w")
5392 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5393 (match_operand:VB 2 "register_operand" "w")]
5394 UNSPEC_TBL))]
5395 "TARGET_SIMD"
5396 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5397 [(set_attr "type" "neon_tbl3")]
5398 )
5399
5400 (define_insn "aarch64_qtbx3<mode>"
5401 [(set (match_operand:VB 0 "register_operand" "=w")
5402 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5403 (match_operand:CI 2 "register_operand" "w")
5404 (match_operand:VB 3 "register_operand" "w")]
5405 UNSPEC_TBX))]
5406 "TARGET_SIMD"
5407 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5408 [(set_attr "type" "neon_tbl3")]
5409 )
5410
5411 ;; Four source registers.
5412
5413 (define_insn "aarch64_qtbl4<mode>"
5414 [(set (match_operand:VB 0 "register_operand" "=w")
5415 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5416 (match_operand:VB 2 "register_operand" "w")]
5417 UNSPEC_TBL))]
5418 "TARGET_SIMD"
5419 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5420 [(set_attr "type" "neon_tbl4")]
5421 )
5422
5423 (define_insn "aarch64_qtbx4<mode>"
5424 [(set (match_operand:VB 0 "register_operand" "=w")
5425 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5426 (match_operand:XI 2 "register_operand" "w")
5427 (match_operand:VB 3 "register_operand" "w")]
5428 UNSPEC_TBX))]
5429 "TARGET_SIMD"
5430 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5431 [(set_attr "type" "neon_tbl4")]
5432 )
5433
5434 (define_insn_and_split "aarch64_combinev16qi"
5435 [(set (match_operand:OI 0 "register_operand" "=w")
5436 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5437 (match_operand:V16QI 2 "register_operand" "w")]
5438 UNSPEC_CONCAT))]
5439 "TARGET_SIMD"
5440 "#"
5441 "&& reload_completed"
5442 [(const_int 0)]
5443 {
5444 aarch64_split_combinev16qi (operands);
5445 DONE;
5446 }
5447 [(set_attr "type" "multiple")]
5448 )
5449
5450 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5451 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5452 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5453 (match_operand:VALL_F16 2 "register_operand" "w")]
5454 PERMUTE))]
5455 "TARGET_SIMD"
5456 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5457 [(set_attr "type" "neon_permute<q>")]
5458 )
5459
5460 ;; Note immediate (third) operand is lane index not byte index.
5461 (define_insn "aarch64_ext<mode>"
5462 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5463 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5464 (match_operand:VALL_F16 2 "register_operand" "w")
5465 (match_operand:SI 3 "immediate_operand" "i")]
5466 UNSPEC_EXT))]
5467 "TARGET_SIMD"
5468 {
5469 operands[3] = GEN_INT (INTVAL (operands[3])
5470 * GET_MODE_UNIT_SIZE (<MODE>mode));
5471 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5472 }
5473 [(set_attr "type" "neon_ext<q>")]
5474 )
5475
5476 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5477 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5478 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5479 REVERSE))]
5480 "TARGET_SIMD"
5481 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5482 [(set_attr "type" "neon_rev<q>")]
5483 )
5484
5485 (define_insn "aarch64_st2<mode>_dreg"
5486 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5487 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5488 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5489 UNSPEC_ST2))]
5490 "TARGET_SIMD"
5491 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5492 [(set_attr "type" "neon_store2_2reg")]
5493 )
5494
5495 (define_insn "aarch64_st2<mode>_dreg"
5496 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5497 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5498 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5499 UNSPEC_ST2))]
5500 "TARGET_SIMD"
5501 "st1\\t{%S1.1d - %T1.1d}, %0"
5502 [(set_attr "type" "neon_store1_2reg")]
5503 )
5504
5505 (define_insn "aarch64_st3<mode>_dreg"
5506 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5507 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5508 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5509 UNSPEC_ST3))]
5510 "TARGET_SIMD"
5511 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5512 [(set_attr "type" "neon_store3_3reg")]
5513 )
5514
5515 (define_insn "aarch64_st3<mode>_dreg"
5516 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5517 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5518 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5519 UNSPEC_ST3))]
5520 "TARGET_SIMD"
5521 "st1\\t{%S1.1d - %U1.1d}, %0"
5522 [(set_attr "type" "neon_store1_3reg")]
5523 )
5524
5525 (define_insn "aarch64_st4<mode>_dreg"
5526 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5527 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5528 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5529 UNSPEC_ST4))]
5530 "TARGET_SIMD"
5531 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5532 [(set_attr "type" "neon_store4_4reg")]
5533 )
5534
5535 (define_insn "aarch64_st4<mode>_dreg"
5536 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5537 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5538 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5539 UNSPEC_ST4))]
5540 "TARGET_SIMD"
5541 "st1\\t{%S1.1d - %V1.1d}, %0"
5542 [(set_attr "type" "neon_store1_4reg")]
5543 )
5544
5545 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5546 [(match_operand:DI 0 "register_operand" "r")
5547 (match_operand:VSTRUCT 1 "register_operand" "w")
5548 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5549 "TARGET_SIMD"
5550 {
5551 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5552 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5553
5554 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5555 DONE;
5556 })
5557
5558 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5559 [(match_operand:DI 0 "register_operand" "r")
5560 (match_operand:VSTRUCT 1 "register_operand" "w")
5561 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5562 "TARGET_SIMD"
5563 {
5564 machine_mode mode = <VSTRUCT:MODE>mode;
5565 rtx mem = gen_rtx_MEM (mode, operands[0]);
5566
5567 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5568 DONE;
5569 })
5570
5571 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5572 [(match_operand:DI 0 "register_operand" "r")
5573 (match_operand:VSTRUCT 1 "register_operand" "w")
5574 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5575 (match_operand:SI 2 "immediate_operand")]
5576 "TARGET_SIMD"
5577 {
5578 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5579 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5580 * <VSTRUCT:nregs>);
5581
5582 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5583 mem, operands[1], operands[2]));
5584 DONE;
5585 })
5586
5587 (define_expand "aarch64_st1<VALL_F16:mode>"
5588 [(match_operand:DI 0 "register_operand")
5589 (match_operand:VALL_F16 1 "register_operand")]
5590 "TARGET_SIMD"
5591 {
5592 machine_mode mode = <VALL_F16:MODE>mode;
5593 rtx mem = gen_rtx_MEM (mode, operands[0]);
5594
5595 if (BYTES_BIG_ENDIAN)
5596 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5597 else
5598 emit_move_insn (mem, operands[1]);
5599 DONE;
5600 })
5601
5602 ;; Expander for builtins to insert vector registers into large
5603 ;; opaque integer modes.
5604
5605 ;; Q-register list. We don't need a D-reg inserter as we zero
5606 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5607
5608 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5609 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5610 (match_operand:VSTRUCT 1 "register_operand" "0")
5611 (match_operand:VQ 2 "register_operand" "w")
5612 (match_operand:SI 3 "immediate_operand" "i")]
5613 "TARGET_SIMD"
5614 {
5615 int part = INTVAL (operands[3]);
5616 int offset = part * 16;
5617
5618 emit_move_insn (operands[0], operands[1]);
5619 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5620 operands[2]);
5621 DONE;
5622 })
5623
5624 ;; Standard pattern name vec_init<mode>.
5625
5626 (define_expand "vec_init<mode>"
5627 [(match_operand:VALL_F16 0 "register_operand" "")
5628 (match_operand 1 "" "")]
5629 "TARGET_SIMD"
5630 {
5631 aarch64_expand_vector_init (operands[0], operands[1]);
5632 DONE;
5633 })
5634
5635 (define_insn "*aarch64_simd_ld1r<mode>"
5636 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5637 (vec_duplicate:VALL_F16
5638 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5639 "TARGET_SIMD"
5640 "ld1r\\t{%0.<Vtype>}, %1"
5641 [(set_attr "type" "neon_load1_all_lanes")]
5642 )
5643
5644 (define_insn "aarch64_frecpe<mode>"
5645 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5646 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5647 UNSPEC_FRECPE))]
5648 "TARGET_SIMD"
5649 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5650 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5651 )
5652
5653 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5654 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5655 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5656 FRECP))]
5657 "TARGET_SIMD"
5658 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5659 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5660 )
5661
5662 (define_insn "aarch64_frecps<mode>"
5663 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5664 (unspec:VHSDF_HSDF
5665 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5666 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5667 UNSPEC_FRECPS))]
5668 "TARGET_SIMD"
5669 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5670 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5671 )
5672
5673 (define_insn "aarch64_urecpe<mode>"
5674 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5675 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5676 UNSPEC_URECPE))]
5677 "TARGET_SIMD"
5678 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5679 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5680
5681 ;; Standard pattern name vec_extract<mode>.
5682
5683 (define_expand "vec_extract<mode>"
5684 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5685 (match_operand:VALL_F16 1 "register_operand" "")
5686 (match_operand:SI 2 "immediate_operand" "")]
5687 "TARGET_SIMD"
5688 {
5689 emit_insn
5690 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5691 DONE;
5692 })
5693
5694 ;; aes
5695
5696 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5697 [(set (match_operand:V16QI 0 "register_operand" "=w")
5698 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5699 (match_operand:V16QI 2 "register_operand" "w")]
5700 CRYPTO_AES))]
5701 "TARGET_SIMD && TARGET_CRYPTO"
5702 "aes<aes_op>\\t%0.16b, %2.16b"
5703 [(set_attr "type" "crypto_aese")]
5704 )
5705
5706 ;; When AES/AESMC fusion is enabled we want the register allocation to
5707 ;; look like:
5708 ;; AESE Vn, _
5709 ;; AESMC Vn, Vn
5710 ;; So prefer to tie operand 1 to operand 0 when fusing.
5711
5712 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5713 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5714 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5715 CRYPTO_AESMC))]
5716 "TARGET_SIMD && TARGET_CRYPTO"
5717 "aes<aesmc_op>\\t%0.16b, %1.16b"
5718 [(set_attr "type" "crypto_aesmc")
5719 (set_attr_alternative "enabled"
5720 [(if_then_else (match_test
5721 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5722 (const_string "yes" )
5723 (const_string "no"))
5724 (const_string "yes")])]
5725 )
5726
5727 ;; sha1
5728
5729 (define_insn "aarch64_crypto_sha1hsi"
5730 [(set (match_operand:SI 0 "register_operand" "=w")
5731 (unspec:SI [(match_operand:SI 1
5732 "register_operand" "w")]
5733 UNSPEC_SHA1H))]
5734 "TARGET_SIMD && TARGET_CRYPTO"
5735 "sha1h\\t%s0, %s1"
5736 [(set_attr "type" "crypto_sha1_fast")]
5737 )
5738
5739 (define_insn "aarch64_crypto_sha1hv4si"
5740 [(set (match_operand:SI 0 "register_operand" "=w")
5741 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5742 (parallel [(const_int 0)]))]
5743 UNSPEC_SHA1H))]
5744 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5745 "sha1h\\t%s0, %s1"
5746 [(set_attr "type" "crypto_sha1_fast")]
5747 )
5748
5749 (define_insn "aarch64_be_crypto_sha1hv4si"
5750 [(set (match_operand:SI 0 "register_operand" "=w")
5751 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5752 (parallel [(const_int 3)]))]
5753 UNSPEC_SHA1H))]
5754 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5755 "sha1h\\t%s0, %s1"
5756 [(set_attr "type" "crypto_sha1_fast")]
5757 )
5758
5759 (define_insn "aarch64_crypto_sha1su1v4si"
5760 [(set (match_operand:V4SI 0 "register_operand" "=w")
5761 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5762 (match_operand:V4SI 2 "register_operand" "w")]
5763 UNSPEC_SHA1SU1))]
5764 "TARGET_SIMD && TARGET_CRYPTO"
5765 "sha1su1\\t%0.4s, %2.4s"
5766 [(set_attr "type" "crypto_sha1_fast")]
5767 )
5768
5769 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5770 [(set (match_operand:V4SI 0 "register_operand" "=w")
5771 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5772 (match_operand:SI 2 "register_operand" "w")
5773 (match_operand:V4SI 3 "register_operand" "w")]
5774 CRYPTO_SHA1))]
5775 "TARGET_SIMD && TARGET_CRYPTO"
5776 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5777 [(set_attr "type" "crypto_sha1_slow")]
5778 )
5779
5780 (define_insn "aarch64_crypto_sha1su0v4si"
5781 [(set (match_operand:V4SI 0 "register_operand" "=w")
5782 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5783 (match_operand:V4SI 2 "register_operand" "w")
5784 (match_operand:V4SI 3 "register_operand" "w")]
5785 UNSPEC_SHA1SU0))]
5786 "TARGET_SIMD && TARGET_CRYPTO"
5787 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5788 [(set_attr "type" "crypto_sha1_xor")]
5789 )
5790
5791 ;; sha256
5792
5793 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5794 [(set (match_operand:V4SI 0 "register_operand" "=w")
5795 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5796 (match_operand:V4SI 2 "register_operand" "w")
5797 (match_operand:V4SI 3 "register_operand" "w")]
5798 CRYPTO_SHA256))]
5799 "TARGET_SIMD && TARGET_CRYPTO"
5800 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5801 [(set_attr "type" "crypto_sha256_slow")]
5802 )
5803
5804 (define_insn "aarch64_crypto_sha256su0v4si"
5805 [(set (match_operand:V4SI 0 "register_operand" "=w")
5806 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5807 (match_operand:V4SI 2 "register_operand" "w")]
5808 UNSPEC_SHA256SU0))]
5809 "TARGET_SIMD &&TARGET_CRYPTO"
5810 "sha256su0\\t%0.4s, %2.4s"
5811 [(set_attr "type" "crypto_sha256_fast")]
5812 )
5813
5814 (define_insn "aarch64_crypto_sha256su1v4si"
5815 [(set (match_operand:V4SI 0 "register_operand" "=w")
5816 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5817 (match_operand:V4SI 2 "register_operand" "w")
5818 (match_operand:V4SI 3 "register_operand" "w")]
5819 UNSPEC_SHA256SU1))]
5820 "TARGET_SIMD &&TARGET_CRYPTO"
5821 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5822 [(set_attr "type" "crypto_sha256_slow")]
5823 )
5824
5825 ;; pmull
5826
5827 (define_insn "aarch64_crypto_pmulldi"
5828 [(set (match_operand:TI 0 "register_operand" "=w")
5829 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5830 (match_operand:DI 2 "register_operand" "w")]
5831 UNSPEC_PMULL))]
5832 "TARGET_SIMD && TARGET_CRYPTO"
5833 "pmull\\t%0.1q, %1.1d, %2.1d"
5834 [(set_attr "type" "crypto_pmull")]
5835 )
5836
5837 (define_insn "aarch64_crypto_pmullv2di"
5838 [(set (match_operand:TI 0 "register_operand" "=w")
5839 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5840 (match_operand:V2DI 2 "register_operand" "w")]
5841 UNSPEC_PMULL2))]
5842 "TARGET_SIMD && TARGET_CRYPTO"
5843 "pmull2\\t%0.1q, %1.2d, %2.2d"
5844 [(set_attr "type" "crypto_pmull")]
5845 )