]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
[AArch64][PATCH 1/2] Fix addressing printing of LDP/STP
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %1"
219 [(set_attr "type" "neon_ldp_q")]
220 )
221
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %0"
232 [(set_attr "type" "neon_stp_q")]
233 )
234
235
236 (define_split
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
242 [(const_int 0)]
243 {
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245 DONE;
246 })
247
248 (define_split
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254 [(const_int 0)]
255 {
256 aarch64_split_simd_move (operands[0], operands[1]);
257 DONE;
258 })
259
260 (define_expand "aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
263 "TARGET_SIMD"
264 {
265 rtx dst = operands[0];
266 rtx src = operands[1];
267
268 if (GP_REGNUM_P (REGNO (src)))
269 {
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
272
273 emit_insn
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
275 emit_insn
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
277 }
278
279 else
280 {
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285
286 emit_insn
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
288 emit_insn
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
290 }
291 DONE;
292 }
293 )
294
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297 (vec_select:<VHALF>
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
301 "umov\t%0, %1.d[0]"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
304 ])
305
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308 (vec_select:<VHALF>
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
312 "umov\t%0, %1.d[1]"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
315 ])
316
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
324 )
325
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
333 )
334
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
339 "TARGET_SIMD"
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
342 )
343
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
348 "TARGET_SIMD"
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
351 )
352
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
357 "TARGET_SIMD"
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
360 )
361
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365 "TARGET_SIMD"
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
368 )
369
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373 UNSPEC_RBIT))]
374 "TARGET_SIMD"
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
377 )
378
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
382 "TARGET_SIMD"
383 {
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386 <MODE>mode, 0);
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389 DONE;
390 }
391 )
392
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
397 "TARGET_SIMD"
398 {
399
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
404
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
407
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
409
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
413
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
418 DONE;
419 }
420 )
421
422 ;; These instructions map to the __builtins for the Dot Product operations.
423 (define_insn "aarch64_<sur>dot<vsi2qi>"
424 [(set (match_operand:VS 0 "register_operand" "=w")
425 (plus:VS (match_operand:VS 1 "register_operand" "0")
426 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
427 (match_operand:<VSI2QI> 3 "register_operand" "w")]
428 DOTPROD)))]
429 "TARGET_DOTPROD"
430 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
431 [(set_attr "type" "neon_dot")]
432 )
433
434 ;; These expands map to the Dot Product optab the vectorizer checks for.
435 ;; The auto-vectorizer expects a dot product builtin that also does an
436 ;; accumulation into the provided register.
437 ;; Given the following pattern
438 ;;
439 ;; for (i=0; i<len; i++) {
440 ;; c = a[i] * b[i];
441 ;; r += c;
442 ;; }
443 ;; return result;
444 ;;
445 ;; This can be auto-vectorized to
446 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
447 ;;
448 ;; given enough iterations. However the vectorizer can keep unrolling the loop
449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
451 ;; ...
452 ;;
453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
454 (define_expand "<sur>dot_prod<vsi2qi>"
455 [(set (match_operand:VS 0 "register_operand")
456 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
457 (match_operand:<VSI2QI> 2 "register_operand")]
458 DOTPROD)
459 (match_operand:VS 3 "register_operand")))]
460 "TARGET_DOTPROD"
461 {
462 emit_insn (
463 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
464 operands[2]));
465 emit_insn (gen_rtx_SET (operands[0], operands[3]));
466 DONE;
467 })
468
469 ;; These instructions map to the __builtins for the Dot Product
470 ;; indexed operations.
471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
472 [(set (match_operand:VS 0 "register_operand" "=w")
473 (plus:VS (match_operand:VS 1 "register_operand" "0")
474 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
475 (match_operand:V8QI 3 "register_operand" "<h_con>")
476 (match_operand:SI 4 "immediate_operand" "i")]
477 DOTPROD)))]
478 "TARGET_DOTPROD"
479 {
480 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
481 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
482 }
483 [(set_attr "type" "neon_dot")]
484 )
485
486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
487 [(set (match_operand:VS 0 "register_operand" "=w")
488 (plus:VS (match_operand:VS 1 "register_operand" "0")
489 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
490 (match_operand:V16QI 3 "register_operand" "<h_con>")
491 (match_operand:SI 4 "immediate_operand" "i")]
492 DOTPROD)))]
493 "TARGET_DOTPROD"
494 {
495 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
496 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
497 }
498 [(set_attr "type" "neon_dot")]
499 )
500
501 (define_expand "copysign<mode>3"
502 [(match_operand:VHSDF 0 "register_operand")
503 (match_operand:VHSDF 1 "register_operand")
504 (match_operand:VHSDF 2 "register_operand")]
505 "TARGET_FLOAT && TARGET_SIMD"
506 {
507 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
508 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
509
510 emit_move_insn (v_bitmask,
511 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
512 HOST_WIDE_INT_M1U << bits));
513 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
514 operands[2], operands[1]));
515 DONE;
516 }
517 )
518
519 (define_insn "*aarch64_mul3_elt<mode>"
520 [(set (match_operand:VMUL 0 "register_operand" "=w")
521 (mult:VMUL
522 (vec_duplicate:VMUL
523 (vec_select:<VEL>
524 (match_operand:VMUL 1 "register_operand" "<h_con>")
525 (parallel [(match_operand:SI 2 "immediate_operand")])))
526 (match_operand:VMUL 3 "register_operand" "w")))]
527 "TARGET_SIMD"
528 {
529 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
530 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
531 }
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
533 )
534
535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
536 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
537 (mult:VMUL_CHANGE_NLANES
538 (vec_duplicate:VMUL_CHANGE_NLANES
539 (vec_select:<VEL>
540 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
541 (parallel [(match_operand:SI 2 "immediate_operand")])))
542 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
543 "TARGET_SIMD"
544 {
545 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
546 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
547 }
548 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
549 )
550
551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
552 [(set (match_operand:VMUL 0 "register_operand" "=w")
553 (mult:VMUL
554 (vec_duplicate:VMUL
555 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
556 (match_operand:VMUL 2 "register_operand" "w")))]
557 "TARGET_SIMD"
558 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
559 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
560 )
561
562 (define_insn "aarch64_rsqrte<mode>"
563 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
564 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
565 UNSPEC_RSQRTE))]
566 "TARGET_SIMD"
567 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
568 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
569
570 (define_insn "aarch64_rsqrts<mode>"
571 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
572 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
574 UNSPEC_RSQRTS))]
575 "TARGET_SIMD"
576 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
577 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
578
579 (define_expand "rsqrt<mode>2"
580 [(set (match_operand:VALLF 0 "register_operand" "=w")
581 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
582 UNSPEC_RSQRT))]
583 "TARGET_SIMD"
584 {
585 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
586 DONE;
587 })
588
589 (define_insn "*aarch64_mul3_elt_to_64v2df"
590 [(set (match_operand:DF 0 "register_operand" "=w")
591 (mult:DF
592 (vec_select:DF
593 (match_operand:V2DF 1 "register_operand" "w")
594 (parallel [(match_operand:SI 2 "immediate_operand")]))
595 (match_operand:DF 3 "register_operand" "w")))]
596 "TARGET_SIMD"
597 {
598 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
599 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
600 }
601 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
602 )
603
604 (define_insn "neg<mode>2"
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
607 "TARGET_SIMD"
608 "neg\t%0.<Vtype>, %1.<Vtype>"
609 [(set_attr "type" "neon_neg<q>")]
610 )
611
612 (define_insn "abs<mode>2"
613 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
614 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
615 "TARGET_SIMD"
616 "abs\t%0.<Vtype>, %1.<Vtype>"
617 [(set_attr "type" "neon_abs<q>")]
618 )
619
620 ;; The intrinsic version of integer ABS must not be allowed to
621 ;; combine with any operation with an integerated ABS step, such
622 ;; as SABD.
623 (define_insn "aarch64_abs<mode>"
624 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
625 (unspec:VSDQ_I_DI
626 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
627 UNSPEC_ABS))]
628 "TARGET_SIMD"
629 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
630 [(set_attr "type" "neon_abs<q>")]
631 )
632
633 (define_insn "abd<mode>_3"
634 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
635 (abs:VDQ_BHSI (minus:VDQ_BHSI
636 (match_operand:VDQ_BHSI 1 "register_operand" "w")
637 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
638 "TARGET_SIMD"
639 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
640 [(set_attr "type" "neon_abd<q>")]
641 )
642
643 (define_insn "aarch64_<sur>abdl2<mode>_3"
644 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
645 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
646 (match_operand:VDQV_S 2 "register_operand" "w")]
647 ABDL2))]
648 "TARGET_SIMD"
649 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
650 [(set_attr "type" "neon_abd<q>")]
651 )
652
653 (define_insn "aarch64_<sur>abal<mode>_4"
654 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
655 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
656 (match_operand:VDQV_S 2 "register_operand" "w")
657 (match_operand:<VDBLW> 3 "register_operand" "0")]
658 ABAL))]
659 "TARGET_SIMD"
660 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
661 [(set_attr "type" "neon_arith_acc<q>")]
662 )
663
664 (define_insn "aarch64_<sur>adalp<mode>_3"
665 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
666 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
667 (match_operand:<VDBLW> 2 "register_operand" "0")]
668 ADALP))]
669 "TARGET_SIMD"
670 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
671 [(set_attr "type" "neon_reduc_add<q>")]
672 )
673
674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
675 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
676 ;; reduction of the difference into a V4SI vector and accumulate that into
677 ;; operand 3 before copying that into the result operand 0.
678 ;; Perform that with a sequence of:
679 ;; UABDL2 tmp.8h, op1.16b, op2.16b
680 ;; UABAL tmp.8h, op1.16b, op2.16b
681 ;; UADALP op3.4s, tmp.8h
682 ;; MOV op0, op3 // should be eliminated in later passes.
683 ;; The signed version just uses the signed variants of the above instructions.
684
685 (define_expand "<sur>sadv16qi"
686 [(use (match_operand:V4SI 0 "register_operand"))
687 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
688 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
689 (use (match_operand:V4SI 3 "register_operand"))]
690 "TARGET_SIMD"
691 {
692 rtx reduc = gen_reg_rtx (V8HImode);
693 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
694 operands[2]));
695 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
696 operands[2], reduc));
697 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
698 operands[3]));
699 emit_move_insn (operands[0], operands[3]);
700 DONE;
701 }
702 )
703
704 (define_insn "aba<mode>_3"
705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
706 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
707 (match_operand:VDQ_BHSI 1 "register_operand" "w")
708 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
709 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
710 "TARGET_SIMD"
711 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
712 [(set_attr "type" "neon_arith_acc<q>")]
713 )
714
715 (define_insn "fabd<mode>3"
716 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
717 (abs:VHSDF_HSDF
718 (minus:VHSDF_HSDF
719 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
720 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
721 "TARGET_SIMD"
722 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
723 [(set_attr "type" "neon_fp_abd_<stype><q>")]
724 )
725
726 ;; For AND (vector, register) and BIC (vector, immediate)
727 (define_insn "and<mode>3"
728 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
729 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
730 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
731 "TARGET_SIMD"
732 {
733 switch (which_alternative)
734 {
735 case 0:
736 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
737 case 1:
738 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
739 AARCH64_CHECK_BIC);
740 default:
741 gcc_unreachable ();
742 }
743 }
744 [(set_attr "type" "neon_logic<q>")]
745 )
746
747 ;; For ORR (vector, register) and ORR (vector, immediate)
748 (define_insn "ior<mode>3"
749 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
750 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
751 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
752 "TARGET_SIMD"
753 {
754 switch (which_alternative)
755 {
756 case 0:
757 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
758 case 1:
759 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
760 AARCH64_CHECK_ORR);
761 default:
762 gcc_unreachable ();
763 }
764 }
765 [(set_attr "type" "neon_logic<q>")]
766 )
767
768 (define_insn "xor<mode>3"
769 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771 (match_operand:VDQ_I 2 "register_operand" "w")))]
772 "TARGET_SIMD"
773 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
774 [(set_attr "type" "neon_logic<q>")]
775 )
776
777 (define_insn "one_cmpl<mode>2"
778 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
780 "TARGET_SIMD"
781 "not\t%0.<Vbtype>, %1.<Vbtype>"
782 [(set_attr "type" "neon_logic<q>")]
783 )
784
785 (define_insn "aarch64_simd_vec_set<mode>"
786 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
787 (vec_merge:VALL_F16
788 (vec_duplicate:VALL_F16
789 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
790 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
791 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
792 "TARGET_SIMD"
793 {
794 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
796 switch (which_alternative)
797 {
798 case 0:
799 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
800 case 1:
801 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
802 case 2:
803 return "ld1\\t{%0.<Vetype>}[%p2], %1";
804 default:
805 gcc_unreachable ();
806 }
807 }
808 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
809 )
810
811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
812 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
813 (vec_merge:VALL_F16
814 (vec_duplicate:VALL_F16
815 (vec_select:<VEL>
816 (match_operand:VALL_F16 3 "register_operand" "w")
817 (parallel
818 [(match_operand:SI 4 "immediate_operand" "i")])))
819 (match_operand:VALL_F16 1 "register_operand" "0")
820 (match_operand:SI 2 "immediate_operand" "i")))]
821 "TARGET_SIMD"
822 {
823 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
824 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
825 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
826
827 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
828 }
829 [(set_attr "type" "neon_ins<q>")]
830 )
831
832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
833 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
834 (vec_merge:VALL_F16_NO_V2Q
835 (vec_duplicate:VALL_F16_NO_V2Q
836 (vec_select:<VEL>
837 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
838 (parallel
839 [(match_operand:SI 4 "immediate_operand" "i")])))
840 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
841 (match_operand:SI 2 "immediate_operand" "i")))]
842 "TARGET_SIMD"
843 {
844 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
845 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
846 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
847 INTVAL (operands[4]));
848
849 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
850 }
851 [(set_attr "type" "neon_ins<q>")]
852 )
853
854 (define_insn "aarch64_simd_lshr<mode>"
855 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
856 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
857 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
858 "TARGET_SIMD"
859 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
860 [(set_attr "type" "neon_shift_imm<q>")]
861 )
862
863 (define_insn "aarch64_simd_ashr<mode>"
864 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
865 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
866 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
867 "TARGET_SIMD"
868 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
869 [(set_attr "type" "neon_shift_imm<q>")]
870 )
871
872 (define_insn "aarch64_simd_imm_shl<mode>"
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
876 "TARGET_SIMD"
877 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
878 [(set_attr "type" "neon_shift_imm<q>")]
879 )
880
881 (define_insn "aarch64_simd_reg_sshl<mode>"
882 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
883 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
884 (match_operand:VDQ_I 2 "register_operand" "w")))]
885 "TARGET_SIMD"
886 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
887 [(set_attr "type" "neon_shift_reg<q>")]
888 )
889
890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
891 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
892 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
893 (match_operand:VDQ_I 2 "register_operand" "w")]
894 UNSPEC_ASHIFT_UNSIGNED))]
895 "TARGET_SIMD"
896 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
897 [(set_attr "type" "neon_shift_reg<q>")]
898 )
899
900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
901 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
902 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
903 (match_operand:VDQ_I 2 "register_operand" "w")]
904 UNSPEC_ASHIFT_SIGNED))]
905 "TARGET_SIMD"
906 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
907 [(set_attr "type" "neon_shift_reg<q>")]
908 )
909
910 (define_expand "ashl<mode>3"
911 [(match_operand:VDQ_I 0 "register_operand" "")
912 (match_operand:VDQ_I 1 "register_operand" "")
913 (match_operand:SI 2 "general_operand" "")]
914 "TARGET_SIMD"
915 {
916 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
917 int shift_amount;
918
919 if (CONST_INT_P (operands[2]))
920 {
921 shift_amount = INTVAL (operands[2]);
922 if (shift_amount >= 0 && shift_amount < bit_width)
923 {
924 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
925 shift_amount);
926 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
927 operands[1],
928 tmp));
929 DONE;
930 }
931 else
932 {
933 operands[2] = force_reg (SImode, operands[2]);
934 }
935 }
936 else if (MEM_P (operands[2]))
937 {
938 operands[2] = force_reg (SImode, operands[2]);
939 }
940
941 if (REG_P (operands[2]))
942 {
943 rtx tmp = gen_reg_rtx (<MODE>mode);
944 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
945 convert_to_mode (<VEL>mode,
946 operands[2],
947 0)));
948 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
949 tmp));
950 DONE;
951 }
952 else
953 FAIL;
954 }
955 )
956
957 (define_expand "lshr<mode>3"
958 [(match_operand:VDQ_I 0 "register_operand" "")
959 (match_operand:VDQ_I 1 "register_operand" "")
960 (match_operand:SI 2 "general_operand" "")]
961 "TARGET_SIMD"
962 {
963 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
964 int shift_amount;
965
966 if (CONST_INT_P (operands[2]))
967 {
968 shift_amount = INTVAL (operands[2]);
969 if (shift_amount > 0 && shift_amount <= bit_width)
970 {
971 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
972 shift_amount);
973 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
974 operands[1],
975 tmp));
976 DONE;
977 }
978 else
979 operands[2] = force_reg (SImode, operands[2]);
980 }
981 else if (MEM_P (operands[2]))
982 {
983 operands[2] = force_reg (SImode, operands[2]);
984 }
985
986 if (REG_P (operands[2]))
987 {
988 rtx tmp = gen_reg_rtx (SImode);
989 rtx tmp1 = gen_reg_rtx (<MODE>mode);
990 emit_insn (gen_negsi2 (tmp, operands[2]));
991 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
992 convert_to_mode (<VEL>mode,
993 tmp, 0)));
994 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
995 operands[1],
996 tmp1));
997 DONE;
998 }
999 else
1000 FAIL;
1001 }
1002 )
1003
1004 (define_expand "ashr<mode>3"
1005 [(match_operand:VDQ_I 0 "register_operand" "")
1006 (match_operand:VDQ_I 1 "register_operand" "")
1007 (match_operand:SI 2 "general_operand" "")]
1008 "TARGET_SIMD"
1009 {
1010 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1011 int shift_amount;
1012
1013 if (CONST_INT_P (operands[2]))
1014 {
1015 shift_amount = INTVAL (operands[2]);
1016 if (shift_amount > 0 && shift_amount <= bit_width)
1017 {
1018 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1019 shift_amount);
1020 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1021 operands[1],
1022 tmp));
1023 DONE;
1024 }
1025 else
1026 operands[2] = force_reg (SImode, operands[2]);
1027 }
1028 else if (MEM_P (operands[2]))
1029 {
1030 operands[2] = force_reg (SImode, operands[2]);
1031 }
1032
1033 if (REG_P (operands[2]))
1034 {
1035 rtx tmp = gen_reg_rtx (SImode);
1036 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037 emit_insn (gen_negsi2 (tmp, operands[2]));
1038 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039 convert_to_mode (<VEL>mode,
1040 tmp, 0)));
1041 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1042 operands[1],
1043 tmp1));
1044 DONE;
1045 }
1046 else
1047 FAIL;
1048 }
1049 )
1050
1051 (define_expand "vashl<mode>3"
1052 [(match_operand:VDQ_I 0 "register_operand" "")
1053 (match_operand:VDQ_I 1 "register_operand" "")
1054 (match_operand:VDQ_I 2 "register_operand" "")]
1055 "TARGET_SIMD"
1056 {
1057 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1058 operands[2]));
1059 DONE;
1060 })
1061
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067 (match_operand:VDQ_BHSI 1 "register_operand" "")
1068 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1069 "TARGET_SIMD"
1070 {
1071 rtx neg = gen_reg_rtx (<MODE>mode);
1072 emit (gen_neg<mode>2 (neg, operands[2]));
1073 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1074 neg));
1075 DONE;
1076 })
1077
1078 ;; DI vector shift
1079 (define_expand "aarch64_ashr_simddi"
1080 [(match_operand:DI 0 "register_operand" "=w")
1081 (match_operand:DI 1 "register_operand" "w")
1082 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1083 "TARGET_SIMD"
1084 {
1085 /* An arithmetic shift right by 64 fills the result with copies of the sign
1086 bit, just like asr by 63 - however the standard pattern does not handle
1087 a shift by 64. */
1088 if (INTVAL (operands[2]) == 64)
1089 operands[2] = GEN_INT (63);
1090 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1091 DONE;
1092 }
1093 )
1094
1095 (define_expand "vlshr<mode>3"
1096 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097 (match_operand:VDQ_BHSI 1 "register_operand" "")
1098 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1099 "TARGET_SIMD"
1100 {
1101 rtx neg = gen_reg_rtx (<MODE>mode);
1102 emit (gen_neg<mode>2 (neg, operands[2]));
1103 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1104 neg));
1105 DONE;
1106 })
1107
1108 (define_expand "aarch64_lshr_simddi"
1109 [(match_operand:DI 0 "register_operand" "=w")
1110 (match_operand:DI 1 "register_operand" "w")
1111 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1112 "TARGET_SIMD"
1113 {
1114 if (INTVAL (operands[2]) == 64)
1115 emit_move_insn (operands[0], const0_rtx);
1116 else
1117 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1118 DONE;
1119 }
1120 )
1121
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124 [(set (match_operand:VD 0 "register_operand" "=w")
1125 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126 (match_operand:SI 2 "immediate_operand" "i")]
1127 UNSPEC_VEC_SHR))]
1128 "TARGET_SIMD"
1129 {
1130 if (BYTES_BIG_ENDIAN)
1131 return "shl %d0, %d1, %2";
1132 else
1133 return "ushr %d0, %d1, %2";
1134 }
1135 [(set_attr "type" "neon_shift_imm")]
1136 )
1137
1138 (define_expand "vec_set<mode>"
1139 [(match_operand:VALL_F16 0 "register_operand" "+w")
1140 (match_operand:<VEL> 1 "register_operand" "w")
1141 (match_operand:SI 2 "immediate_operand" "")]
1142 "TARGET_SIMD"
1143 {
1144 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146 GEN_INT (elem), operands[0]));
1147 DONE;
1148 }
1149 )
1150
1151
1152 (define_insn "aarch64_mla<mode>"
1153 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154 (plus:VDQ_BHSI (mult:VDQ_BHSI
1155 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1158 "TARGET_SIMD"
1159 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160 [(set_attr "type" "neon_mla_<Vetype><q>")]
1161 )
1162
1163 (define_insn "*aarch64_mla_elt<mode>"
1164 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1165 (plus:VDQHS
1166 (mult:VDQHS
1167 (vec_duplicate:VDQHS
1168 (vec_select:<VEL>
1169 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170 (parallel [(match_operand:SI 2 "immediate_operand")])))
1171 (match_operand:VDQHS 3 "register_operand" "w"))
1172 (match_operand:VDQHS 4 "register_operand" "0")))]
1173 "TARGET_SIMD"
1174 {
1175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1177 }
1178 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1179 )
1180
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1183 (plus:VDQHS
1184 (mult:VDQHS
1185 (vec_duplicate:VDQHS
1186 (vec_select:<VEL>
1187 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188 (parallel [(match_operand:SI 2 "immediate_operand")])))
1189 (match_operand:VDQHS 3 "register_operand" "w"))
1190 (match_operand:VDQHS 4 "register_operand" "0")))]
1191 "TARGET_SIMD"
1192 {
1193 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1195 }
1196 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1197 )
1198
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201 (plus:VDQHS
1202 (mult:VDQHS (vec_duplicate:VDQHS
1203 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204 (match_operand:VDQHS 2 "register_operand" "w"))
1205 (match_operand:VDQHS 3 "register_operand" "0")))]
1206 "TARGET_SIMD"
1207 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "aarch64_mls<mode>"
1212 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1216 "TARGET_SIMD"
1217 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218 [(set_attr "type" "neon_mla_<Vetype><q>")]
1219 )
1220
1221 (define_insn "*aarch64_mls_elt<mode>"
1222 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1223 (minus:VDQHS
1224 (match_operand:VDQHS 4 "register_operand" "0")
1225 (mult:VDQHS
1226 (vec_duplicate:VDQHS
1227 (vec_select:<VEL>
1228 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229 (parallel [(match_operand:SI 2 "immediate_operand")])))
1230 (match_operand:VDQHS 3 "register_operand" "w"))))]
1231 "TARGET_SIMD"
1232 {
1233 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1235 }
1236 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1237 )
1238
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1241 (minus:VDQHS
1242 (match_operand:VDQHS 4 "register_operand" "0")
1243 (mult:VDQHS
1244 (vec_duplicate:VDQHS
1245 (vec_select:<VEL>
1246 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247 (parallel [(match_operand:SI 2 "immediate_operand")])))
1248 (match_operand:VDQHS 3 "register_operand" "w"))))]
1249 "TARGET_SIMD"
1250 {
1251 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1253 }
1254 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1255 )
1256
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1259 (minus:VDQHS
1260 (match_operand:VDQHS 1 "register_operand" "0")
1261 (mult:VDQHS (vec_duplicate:VDQHS
1262 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263 (match_operand:VDQHS 3 "register_operand" "w"))))]
1264 "TARGET_SIMD"
1265 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1267 )
1268
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1274 "TARGET_SIMD"
1275 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276 [(set_attr "type" "neon_minmax<q>")]
1277 )
1278
1279 (define_expand "<su><maxmin>v2di3"
1280 [(set (match_operand:V2DI 0 "register_operand" "")
1281 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282 (match_operand:V2DI 2 "register_operand" "")))]
1283 "TARGET_SIMD"
1284 {
1285 enum rtx_code cmp_operator;
1286 rtx cmp_fmt;
1287
1288 switch (<CODE>)
1289 {
1290 case UMIN:
1291 cmp_operator = LTU;
1292 break;
1293 case SMIN:
1294 cmp_operator = LT;
1295 break;
1296 case UMAX:
1297 cmp_operator = GTU;
1298 break;
1299 case SMAX:
1300 cmp_operator = GT;
1301 break;
1302 default:
1303 gcc_unreachable ();
1304 }
1305
1306 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308 operands[2], cmp_fmt, operands[1], operands[2]));
1309 DONE;
1310 })
1311
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1317 MAXMINV))]
1318 "TARGET_SIMD"
1319 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320 [(set_attr "type" "neon_minmax<q>")]
1321 )
1322
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327 (match_operand:VHSDF 2 "register_operand" "w")]
1328 FMAXMINV))]
1329 "TARGET_SIMD"
1330 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331 [(set_attr "type" "neon_minmax<q>")]
1332 )
1333
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1339
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1343
1344 (define_insn "move_lo_quad_internal_<mode>"
1345 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1346 (vec_concat:VQ_NO2E
1347 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348 (vec_duplicate:<VHALF> (const_int 0))))]
1349 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1350 "@
1351 dup\\t%d0, %1.d[0]
1352 fmov\\t%d0, %1
1353 dup\\t%d0, %1"
1354 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355 (set_attr "simd" "yes,*,yes")
1356 (set_attr "fp" "*,yes,*")
1357 (set_attr "length" "4")]
1358 )
1359
1360 (define_insn "move_lo_quad_internal_<mode>"
1361 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1362 (vec_concat:VQ_2E
1363 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1364 (const_int 0)))]
1365 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1366 "@
1367 dup\\t%d0, %1.d[0]
1368 fmov\\t%d0, %1
1369 dup\\t%d0, %1"
1370 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1371 (set_attr "simd" "yes,*,yes")
1372 (set_attr "fp" "*,yes,*")
1373 (set_attr "length" "4")]
1374 )
1375
1376 (define_insn "move_lo_quad_internal_be_<mode>"
1377 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1378 (vec_concat:VQ_NO2E
1379 (vec_duplicate:<VHALF> (const_int 0))
1380 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1381 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1382 "@
1383 dup\\t%d0, %1.d[0]
1384 fmov\\t%d0, %1
1385 dup\\t%d0, %1"
1386 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1387 (set_attr "simd" "yes,*,yes")
1388 (set_attr "fp" "*,yes,*")
1389 (set_attr "length" "4")]
1390 )
1391
1392 (define_insn "move_lo_quad_internal_be_<mode>"
1393 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1394 (vec_concat:VQ_2E
1395 (const_int 0)
1396 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1397 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1398 "@
1399 dup\\t%d0, %1.d[0]
1400 fmov\\t%d0, %1
1401 dup\\t%d0, %1"
1402 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1403 (set_attr "simd" "yes,*,yes")
1404 (set_attr "fp" "*,yes,*")
1405 (set_attr "length" "4")]
1406 )
1407
1408 (define_expand "move_lo_quad_<mode>"
1409 [(match_operand:VQ 0 "register_operand")
1410 (match_operand:VQ 1 "register_operand")]
1411 "TARGET_SIMD"
1412 {
1413 if (BYTES_BIG_ENDIAN)
1414 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1415 else
1416 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1417 DONE;
1418 }
1419 )
1420
1421 ;; Move operand1 to the high architectural bits of the register, keeping
1422 ;; the low architectural bits of operand2.
1423 ;; For little-endian this is { operand2, operand1 }
1424 ;; For big-endian this is { operand1, operand2 }
1425
1426 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1427 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1428 (vec_concat:VQ
1429 (vec_select:<VHALF>
1430 (match_dup 0)
1431 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1432 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1433 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1434 "@
1435 ins\\t%0.d[1], %1.d[0]
1436 ins\\t%0.d[1], %1"
1437 [(set_attr "type" "neon_ins")]
1438 )
1439
1440 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1441 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1442 (vec_concat:VQ
1443 (match_operand:<VHALF> 1 "register_operand" "w,r")
1444 (vec_select:<VHALF>
1445 (match_dup 0)
1446 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1447 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1448 "@
1449 ins\\t%0.d[1], %1.d[0]
1450 ins\\t%0.d[1], %1"
1451 [(set_attr "type" "neon_ins")]
1452 )
1453
1454 (define_expand "move_hi_quad_<mode>"
1455 [(match_operand:VQ 0 "register_operand" "")
1456 (match_operand:<VHALF> 1 "register_operand" "")]
1457 "TARGET_SIMD"
1458 {
1459 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1460 if (BYTES_BIG_ENDIAN)
1461 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1462 operands[1], p));
1463 else
1464 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1465 operands[1], p));
1466 DONE;
1467 })
1468
1469 ;; Narrowing operations.
1470
1471 ;; For doubles.
1472 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1473 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1474 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1475 "TARGET_SIMD"
1476 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1477 [(set_attr "type" "neon_shift_imm_narrow_q")]
1478 )
1479
1480 (define_expand "vec_pack_trunc_<mode>"
1481 [(match_operand:<VNARROWD> 0 "register_operand" "")
1482 (match_operand:VDN 1 "register_operand" "")
1483 (match_operand:VDN 2 "register_operand" "")]
1484 "TARGET_SIMD"
1485 {
1486 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1487 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1488 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1489
1490 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1491 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1492 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1493 DONE;
1494 })
1495
1496 ;; For quads.
1497
1498 (define_insn "vec_pack_trunc_<mode>"
1499 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1500 (vec_concat:<VNARROWQ2>
1501 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1502 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1503 "TARGET_SIMD"
1504 {
1505 if (BYTES_BIG_ENDIAN)
1506 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1507 else
1508 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1509 }
1510 [(set_attr "type" "multiple")
1511 (set_attr "length" "8")]
1512 )
1513
1514 ;; Widening operations.
1515
1516 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1517 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1518 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1519 (match_operand:VQW 1 "register_operand" "w")
1520 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1521 )))]
1522 "TARGET_SIMD"
1523 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1524 [(set_attr "type" "neon_shift_imm_long")]
1525 )
1526
1527 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1528 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1529 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1530 (match_operand:VQW 1 "register_operand" "w")
1531 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1532 )))]
1533 "TARGET_SIMD"
1534 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1535 [(set_attr "type" "neon_shift_imm_long")]
1536 )
1537
1538 (define_expand "vec_unpack<su>_hi_<mode>"
1539 [(match_operand:<VWIDE> 0 "register_operand" "")
1540 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1541 "TARGET_SIMD"
1542 {
1543 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1544 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1545 operands[1], p));
1546 DONE;
1547 }
1548 )
1549
1550 (define_expand "vec_unpack<su>_lo_<mode>"
1551 [(match_operand:<VWIDE> 0 "register_operand" "")
1552 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1553 "TARGET_SIMD"
1554 {
1555 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1556 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1557 operands[1], p));
1558 DONE;
1559 }
1560 )
1561
1562 ;; Widening arithmetic.
1563
1564 (define_insn "*aarch64_<su>mlal_lo<mode>"
1565 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1566 (plus:<VWIDE>
1567 (mult:<VWIDE>
1568 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1569 (match_operand:VQW 2 "register_operand" "w")
1570 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1571 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1572 (match_operand:VQW 4 "register_operand" "w")
1573 (match_dup 3))))
1574 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1575 "TARGET_SIMD"
1576 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1577 [(set_attr "type" "neon_mla_<Vetype>_long")]
1578 )
1579
1580 (define_insn "*aarch64_<su>mlal_hi<mode>"
1581 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1582 (plus:<VWIDE>
1583 (mult:<VWIDE>
1584 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1585 (match_operand:VQW 2 "register_operand" "w")
1586 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1587 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1588 (match_operand:VQW 4 "register_operand" "w")
1589 (match_dup 3))))
1590 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1591 "TARGET_SIMD"
1592 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1593 [(set_attr "type" "neon_mla_<Vetype>_long")]
1594 )
1595
1596 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1598 (minus:<VWIDE>
1599 (match_operand:<VWIDE> 1 "register_operand" "0")
1600 (mult:<VWIDE>
1601 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1602 (match_operand:VQW 2 "register_operand" "w")
1603 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1604 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1605 (match_operand:VQW 4 "register_operand" "w")
1606 (match_dup 3))))))]
1607 "TARGET_SIMD"
1608 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1609 [(set_attr "type" "neon_mla_<Vetype>_long")]
1610 )
1611
1612 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1613 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1614 (minus:<VWIDE>
1615 (match_operand:<VWIDE> 1 "register_operand" "0")
1616 (mult:<VWIDE>
1617 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1618 (match_operand:VQW 2 "register_operand" "w")
1619 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1620 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1621 (match_operand:VQW 4 "register_operand" "w")
1622 (match_dup 3))))))]
1623 "TARGET_SIMD"
1624 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1625 [(set_attr "type" "neon_mla_<Vetype>_long")]
1626 )
1627
1628 (define_insn "*aarch64_<su>mlal<mode>"
1629 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630 (plus:<VWIDE>
1631 (mult:<VWIDE>
1632 (ANY_EXTEND:<VWIDE>
1633 (match_operand:VD_BHSI 1 "register_operand" "w"))
1634 (ANY_EXTEND:<VWIDE>
1635 (match_operand:VD_BHSI 2 "register_operand" "w")))
1636 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1637 "TARGET_SIMD"
1638 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1639 [(set_attr "type" "neon_mla_<Vetype>_long")]
1640 )
1641
1642 (define_insn "*aarch64_<su>mlsl<mode>"
1643 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1644 (minus:<VWIDE>
1645 (match_operand:<VWIDE> 1 "register_operand" "0")
1646 (mult:<VWIDE>
1647 (ANY_EXTEND:<VWIDE>
1648 (match_operand:VD_BHSI 2 "register_operand" "w"))
1649 (ANY_EXTEND:<VWIDE>
1650 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1651 "TARGET_SIMD"
1652 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1653 [(set_attr "type" "neon_mla_<Vetype>_long")]
1654 )
1655
1656 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1657 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1658 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1659 (match_operand:VQW 1 "register_operand" "w")
1660 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1661 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1662 (match_operand:VQW 2 "register_operand" "w")
1663 (match_dup 3)))))]
1664 "TARGET_SIMD"
1665 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1666 [(set_attr "type" "neon_mul_<Vetype>_long")]
1667 )
1668
1669 (define_expand "vec_widen_<su>mult_lo_<mode>"
1670 [(match_operand:<VWIDE> 0 "register_operand" "")
1671 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1672 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1673 "TARGET_SIMD"
1674 {
1675 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1676 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1677 operands[1],
1678 operands[2], p));
1679 DONE;
1680 }
1681 )
1682
1683 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1684 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1685 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1686 (match_operand:VQW 1 "register_operand" "w")
1687 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1688 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1689 (match_operand:VQW 2 "register_operand" "w")
1690 (match_dup 3)))))]
1691 "TARGET_SIMD"
1692 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1693 [(set_attr "type" "neon_mul_<Vetype>_long")]
1694 )
1695
1696 (define_expand "vec_widen_<su>mult_hi_<mode>"
1697 [(match_operand:<VWIDE> 0 "register_operand" "")
1698 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1699 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1700 "TARGET_SIMD"
1701 {
1702 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1703 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1704 operands[1],
1705 operands[2], p));
1706 DONE;
1707
1708 }
1709 )
1710
1711 ;; FP vector operations.
1712 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1713 ;; double-precision (64-bit) floating-point data types and arithmetic as
1714 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1715 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1716 ;;
1717 ;; Floating-point operations can raise an exception. Vectorizing such
1718 ;; operations are safe because of reasons explained below.
1719 ;;
1720 ;; ARMv8 permits an extension to enable trapped floating-point
1721 ;; exception handling, however this is an optional feature. In the
1722 ;; event of a floating-point exception being raised by vectorised
1723 ;; code then:
1724 ;; 1. If trapped floating-point exceptions are available, then a trap
1725 ;; will be taken when any lane raises an enabled exception. A trap
1726 ;; handler may determine which lane raised the exception.
1727 ;; 2. Alternatively a sticky exception flag is set in the
1728 ;; floating-point status register (FPSR). Software may explicitly
1729 ;; test the exception flags, in which case the tests will either
1730 ;; prevent vectorisation, allowing precise identification of the
1731 ;; failing operation, or if tested outside of vectorisable regions
1732 ;; then the specific operation and lane are not of interest.
1733
1734 ;; FP arithmetic operations.
1735
1736 (define_insn "add<mode>3"
1737 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1738 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1739 (match_operand:VHSDF 2 "register_operand" "w")))]
1740 "TARGET_SIMD"
1741 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1742 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1743 )
1744
1745 (define_insn "sub<mode>3"
1746 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1747 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1748 (match_operand:VHSDF 2 "register_operand" "w")))]
1749 "TARGET_SIMD"
1750 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1751 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1752 )
1753
1754 (define_insn "mul<mode>3"
1755 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757 (match_operand:VHSDF 2 "register_operand" "w")))]
1758 "TARGET_SIMD"
1759 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1761 )
1762
1763 (define_expand "div<mode>3"
1764 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1766 (match_operand:VHSDF 2 "register_operand" "w")))]
1767 "TARGET_SIMD"
1768 {
1769 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1770 DONE;
1771
1772 operands[1] = force_reg (<MODE>mode, operands[1]);
1773 })
1774
1775 (define_insn "*div<mode>3"
1776 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1777 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1778 (match_operand:VHSDF 2 "register_operand" "w")))]
1779 "TARGET_SIMD"
1780 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1781 [(set_attr "type" "neon_fp_div_<stype><q>")]
1782 )
1783
1784 (define_insn "neg<mode>2"
1785 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1786 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1787 "TARGET_SIMD"
1788 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1789 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1790 )
1791
1792 (define_insn "abs<mode>2"
1793 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1794 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1795 "TARGET_SIMD"
1796 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1797 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1798 )
1799
1800 (define_insn "fma<mode>4"
1801 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1802 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1803 (match_operand:VHSDF 2 "register_operand" "w")
1804 (match_operand:VHSDF 3 "register_operand" "0")))]
1805 "TARGET_SIMD"
1806 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1807 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1808 )
1809
1810 (define_insn "*aarch64_fma4_elt<mode>"
1811 [(set (match_operand:VDQF 0 "register_operand" "=w")
1812 (fma:VDQF
1813 (vec_duplicate:VDQF
1814 (vec_select:<VEL>
1815 (match_operand:VDQF 1 "register_operand" "<h_con>")
1816 (parallel [(match_operand:SI 2 "immediate_operand")])))
1817 (match_operand:VDQF 3 "register_operand" "w")
1818 (match_operand:VDQF 4 "register_operand" "0")))]
1819 "TARGET_SIMD"
1820 {
1821 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1822 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1823 }
1824 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1825 )
1826
1827 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1828 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1829 (fma:VDQSF
1830 (vec_duplicate:VDQSF
1831 (vec_select:<VEL>
1832 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1833 (parallel [(match_operand:SI 2 "immediate_operand")])))
1834 (match_operand:VDQSF 3 "register_operand" "w")
1835 (match_operand:VDQSF 4 "register_operand" "0")))]
1836 "TARGET_SIMD"
1837 {
1838 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1839 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1840 }
1841 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1842 )
1843
1844 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1845 [(set (match_operand:VMUL 0 "register_operand" "=w")
1846 (fma:VMUL
1847 (vec_duplicate:VMUL
1848 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1849 (match_operand:VMUL 2 "register_operand" "w")
1850 (match_operand:VMUL 3 "register_operand" "0")))]
1851 "TARGET_SIMD"
1852 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1853 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1854 )
1855
1856 (define_insn "*aarch64_fma4_elt_to_64v2df"
1857 [(set (match_operand:DF 0 "register_operand" "=w")
1858 (fma:DF
1859 (vec_select:DF
1860 (match_operand:V2DF 1 "register_operand" "w")
1861 (parallel [(match_operand:SI 2 "immediate_operand")]))
1862 (match_operand:DF 3 "register_operand" "w")
1863 (match_operand:DF 4 "register_operand" "0")))]
1864 "TARGET_SIMD"
1865 {
1866 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1867 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1868 }
1869 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1870 )
1871
1872 (define_insn "fnma<mode>4"
1873 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1874 (fma:VHSDF
1875 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1876 (match_operand:VHSDF 2 "register_operand" "w")
1877 (match_operand:VHSDF 3 "register_operand" "0")))]
1878 "TARGET_SIMD"
1879 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1880 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1881 )
1882
1883 (define_insn "*aarch64_fnma4_elt<mode>"
1884 [(set (match_operand:VDQF 0 "register_operand" "=w")
1885 (fma:VDQF
1886 (neg:VDQF
1887 (match_operand:VDQF 3 "register_operand" "w"))
1888 (vec_duplicate:VDQF
1889 (vec_select:<VEL>
1890 (match_operand:VDQF 1 "register_operand" "<h_con>")
1891 (parallel [(match_operand:SI 2 "immediate_operand")])))
1892 (match_operand:VDQF 4 "register_operand" "0")))]
1893 "TARGET_SIMD"
1894 {
1895 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1896 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1897 }
1898 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1899 )
1900
1901 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1902 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1903 (fma:VDQSF
1904 (neg:VDQSF
1905 (match_operand:VDQSF 3 "register_operand" "w"))
1906 (vec_duplicate:VDQSF
1907 (vec_select:<VEL>
1908 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1909 (parallel [(match_operand:SI 2 "immediate_operand")])))
1910 (match_operand:VDQSF 4 "register_operand" "0")))]
1911 "TARGET_SIMD"
1912 {
1913 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1914 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1915 }
1916 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1917 )
1918
1919 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1920 [(set (match_operand:VMUL 0 "register_operand" "=w")
1921 (fma:VMUL
1922 (neg:VMUL
1923 (match_operand:VMUL 2 "register_operand" "w"))
1924 (vec_duplicate:VMUL
1925 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1926 (match_operand:VMUL 3 "register_operand" "0")))]
1927 "TARGET_SIMD"
1928 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1929 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1930 )
1931
1932 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1933 [(set (match_operand:DF 0 "register_operand" "=w")
1934 (fma:DF
1935 (vec_select:DF
1936 (match_operand:V2DF 1 "register_operand" "w")
1937 (parallel [(match_operand:SI 2 "immediate_operand")]))
1938 (neg:DF
1939 (match_operand:DF 3 "register_operand" "w"))
1940 (match_operand:DF 4 "register_operand" "0")))]
1941 "TARGET_SIMD"
1942 {
1943 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1944 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1945 }
1946 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1947 )
1948
1949 ;; Vector versions of the floating-point frint patterns.
1950 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1951 (define_insn "<frint_pattern><mode>2"
1952 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1953 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1954 FRINT))]
1955 "TARGET_SIMD"
1956 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1957 [(set_attr "type" "neon_fp_round_<stype><q>")]
1958 )
1959
1960 ;; Vector versions of the fcvt standard patterns.
1961 ;; Expands to lbtrunc, lround, lceil, lfloor
1962 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1963 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1964 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1965 [(match_operand:VHSDF 1 "register_operand" "w")]
1966 FCVT)))]
1967 "TARGET_SIMD"
1968 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1969 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1970 )
1971
1972 ;; HF Scalar variants of related SIMD instructions.
1973 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1974 [(set (match_operand:HI 0 "register_operand" "=w")
1975 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1976 FCVT)))]
1977 "TARGET_SIMD_F16INST"
1978 "fcvt<frint_suffix><su>\t%h0, %h1"
1979 [(set_attr "type" "neon_fp_to_int_s")]
1980 )
1981
1982 (define_insn "<optab>_trunchfhi2"
1983 [(set (match_operand:HI 0 "register_operand" "=w")
1984 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1985 "TARGET_SIMD_F16INST"
1986 "fcvtz<su>\t%h0, %h1"
1987 [(set_attr "type" "neon_fp_to_int_s")]
1988 )
1989
1990 (define_insn "<optab>hihf2"
1991 [(set (match_operand:HF 0 "register_operand" "=w")
1992 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1993 "TARGET_SIMD_F16INST"
1994 "<su_optab>cvtf\t%h0, %h1"
1995 [(set_attr "type" "neon_int_to_fp_s")]
1996 )
1997
1998 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1999 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2000 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001 [(mult:VDQF
2002 (match_operand:VDQF 1 "register_operand" "w")
2003 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2004 UNSPEC_FRINTZ)))]
2005 "TARGET_SIMD
2006 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2007 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2008 {
2009 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2010 char buf[64];
2011 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2012 output_asm_insn (buf, operands);
2013 return "";
2014 }
2015 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2016 )
2017
2018 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2019 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2020 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2021 [(match_operand:VHSDF 1 "register_operand")]
2022 UNSPEC_FRINTZ)))]
2023 "TARGET_SIMD"
2024 {})
2025
2026 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2027 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2028 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2029 [(match_operand:VHSDF 1 "register_operand")]
2030 UNSPEC_FRINTZ)))]
2031 "TARGET_SIMD"
2032 {})
2033
2034 (define_expand "ftrunc<VHSDF:mode>2"
2035 [(set (match_operand:VHSDF 0 "register_operand")
2036 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2037 UNSPEC_FRINTZ))]
2038 "TARGET_SIMD"
2039 {})
2040
2041 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2042 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2043 (FLOATUORS:VHSDF
2044 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2045 "TARGET_SIMD"
2046 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2047 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2048 )
2049
2050 ;; Conversions between vectors of floats and doubles.
2051 ;; Contains a mix of patterns to match standard pattern names
2052 ;; and those for intrinsics.
2053
2054 ;; Float widening operations.
2055
2056 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2057 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2058 (float_extend:<VWIDE> (vec_select:<VHALF>
2059 (match_operand:VQ_HSF 1 "register_operand" "w")
2060 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2061 )))]
2062 "TARGET_SIMD"
2063 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2064 [(set_attr "type" "neon_fp_cvt_widen_s")]
2065 )
2066
2067 ;; Convert between fixed-point and floating-point (vector modes)
2068
2069 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2070 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2071 (unspec:<VHSDF:FCVT_TARGET>
2072 [(match_operand:VHSDF 1 "register_operand" "w")
2073 (match_operand:SI 2 "immediate_operand" "i")]
2074 FCVT_F2FIXED))]
2075 "TARGET_SIMD"
2076 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2077 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2078 )
2079
2080 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2081 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2082 (unspec:<VDQ_HSDI:FCVT_TARGET>
2083 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2084 (match_operand:SI 2 "immediate_operand" "i")]
2085 FCVT_FIXED2F))]
2086 "TARGET_SIMD"
2087 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2088 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2089 )
2090
2091 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2092 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2093 ;; the meaning of HI and LO changes depending on the target endianness.
2094 ;; While elsewhere we map the higher numbered elements of a vector to
2095 ;; the lower architectural lanes of the vector, for these patterns we want
2096 ;; to always treat "hi" as referring to the higher architectural lanes.
2097 ;; Consequently, while the patterns below look inconsistent with our
2098 ;; other big-endian patterns their behavior is as required.
2099
2100 (define_expand "vec_unpacks_lo_<mode>"
2101 [(match_operand:<VWIDE> 0 "register_operand" "")
2102 (match_operand:VQ_HSF 1 "register_operand" "")]
2103 "TARGET_SIMD"
2104 {
2105 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2106 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2107 operands[1], p));
2108 DONE;
2109 }
2110 )
2111
2112 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2113 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2114 (float_extend:<VWIDE> (vec_select:<VHALF>
2115 (match_operand:VQ_HSF 1 "register_operand" "w")
2116 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2117 )))]
2118 "TARGET_SIMD"
2119 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2120 [(set_attr "type" "neon_fp_cvt_widen_s")]
2121 )
2122
2123 (define_expand "vec_unpacks_hi_<mode>"
2124 [(match_operand:<VWIDE> 0 "register_operand" "")
2125 (match_operand:VQ_HSF 1 "register_operand" "")]
2126 "TARGET_SIMD"
2127 {
2128 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2129 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2130 operands[1], p));
2131 DONE;
2132 }
2133 )
2134 (define_insn "aarch64_float_extend_lo_<Vwide>"
2135 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2136 (float_extend:<VWIDE>
2137 (match_operand:VDF 1 "register_operand" "w")))]
2138 "TARGET_SIMD"
2139 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2140 [(set_attr "type" "neon_fp_cvt_widen_s")]
2141 )
2142
2143 ;; Float narrowing operations.
2144
2145 (define_insn "aarch64_float_truncate_lo_<mode>"
2146 [(set (match_operand:VDF 0 "register_operand" "=w")
2147 (float_truncate:VDF
2148 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2149 "TARGET_SIMD"
2150 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2151 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2152 )
2153
2154 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2155 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2156 (vec_concat:<VDBL>
2157 (match_operand:VDF 1 "register_operand" "0")
2158 (float_truncate:VDF
2159 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2160 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2161 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2162 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2163 )
2164
2165 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2166 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2167 (vec_concat:<VDBL>
2168 (float_truncate:VDF
2169 (match_operand:<VWIDE> 2 "register_operand" "w"))
2170 (match_operand:VDF 1 "register_operand" "0")))]
2171 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2172 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2173 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2174 )
2175
2176 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2177 [(match_operand:<VDBL> 0 "register_operand" "=w")
2178 (match_operand:VDF 1 "register_operand" "0")
2179 (match_operand:<VWIDE> 2 "register_operand" "w")]
2180 "TARGET_SIMD"
2181 {
2182 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2183 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2184 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2185 emit_insn (gen (operands[0], operands[1], operands[2]));
2186 DONE;
2187 }
2188 )
2189
2190 (define_expand "vec_pack_trunc_v2df"
2191 [(set (match_operand:V4SF 0 "register_operand")
2192 (vec_concat:V4SF
2193 (float_truncate:V2SF
2194 (match_operand:V2DF 1 "register_operand"))
2195 (float_truncate:V2SF
2196 (match_operand:V2DF 2 "register_operand"))
2197 ))]
2198 "TARGET_SIMD"
2199 {
2200 rtx tmp = gen_reg_rtx (V2SFmode);
2201 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2203
2204 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2205 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2206 tmp, operands[hi]));
2207 DONE;
2208 }
2209 )
2210
2211 (define_expand "vec_pack_trunc_df"
2212 [(set (match_operand:V2SF 0 "register_operand")
2213 (vec_concat:V2SF
2214 (float_truncate:SF
2215 (match_operand:DF 1 "register_operand"))
2216 (float_truncate:SF
2217 (match_operand:DF 2 "register_operand"))
2218 ))]
2219 "TARGET_SIMD"
2220 {
2221 rtx tmp = gen_reg_rtx (V2SFmode);
2222 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2223 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2224
2225 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2226 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2227 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2228 DONE;
2229 }
2230 )
2231
2232 ;; FP Max/Min
2233 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2234 ;; expression like:
2235 ;; a = (b < c) ? b : c;
2236 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2237 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2238 ;; -ffast-math.
2239 ;;
2240 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2241 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2242 ;; operand will be returned when both operands are zero (i.e. they may not
2243 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2244 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2245 ;; NaNs.
2246
2247 (define_insn "<su><maxmin><mode>3"
2248 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2249 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2250 (match_operand:VHSDF 2 "register_operand" "w")))]
2251 "TARGET_SIMD"
2252 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2253 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2254 )
2255
2256 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2257 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2258 ;; which implement the IEEE fmax ()/fmin () functions.
2259 (define_insn "<maxmin_uns><mode>3"
2260 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2261 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2262 (match_operand:VHSDF 2 "register_operand" "w")]
2263 FMAXMIN_UNS))]
2264 "TARGET_SIMD"
2265 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2266 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2267 )
2268
2269 ;; 'across lanes' add.
2270
2271 (define_expand "reduc_plus_scal_<mode>"
2272 [(match_operand:<VEL> 0 "register_operand" "=w")
2273 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2274 UNSPEC_ADDV)]
2275 "TARGET_SIMD"
2276 {
2277 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2278 rtx scratch = gen_reg_rtx (<MODE>mode);
2279 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2280 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2281 DONE;
2282 }
2283 )
2284
2285 (define_insn "aarch64_faddp<mode>"
2286 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2287 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2288 (match_operand:VHSDF 2 "register_operand" "w")]
2289 UNSPEC_FADDV))]
2290 "TARGET_SIMD"
2291 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2292 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2293 )
2294
2295 (define_insn "aarch64_reduc_plus_internal<mode>"
2296 [(set (match_operand:VDQV 0 "register_operand" "=w")
2297 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2298 UNSPEC_ADDV))]
2299 "TARGET_SIMD"
2300 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2301 [(set_attr "type" "neon_reduc_add<q>")]
2302 )
2303
2304 (define_insn "aarch64_reduc_plus_internalv2si"
2305 [(set (match_operand:V2SI 0 "register_operand" "=w")
2306 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2307 UNSPEC_ADDV))]
2308 "TARGET_SIMD"
2309 "addp\\t%0.2s, %1.2s, %1.2s"
2310 [(set_attr "type" "neon_reduc_add")]
2311 )
2312
2313 (define_insn "reduc_plus_scal_<mode>"
2314 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2315 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2316 UNSPEC_FADDV))]
2317 "TARGET_SIMD"
2318 "faddp\\t%<Vetype>0, %1.<Vtype>"
2319 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2320 )
2321
2322 (define_expand "reduc_plus_scal_v4sf"
2323 [(set (match_operand:SF 0 "register_operand")
2324 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2325 UNSPEC_FADDV))]
2326 "TARGET_SIMD"
2327 {
2328 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2329 rtx scratch = gen_reg_rtx (V4SFmode);
2330 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2331 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2332 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2333 DONE;
2334 })
2335
2336 (define_insn "clrsb<mode>2"
2337 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2338 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2339 "TARGET_SIMD"
2340 "cls\\t%0.<Vtype>, %1.<Vtype>"
2341 [(set_attr "type" "neon_cls<q>")]
2342 )
2343
2344 (define_insn "clz<mode>2"
2345 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2346 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2347 "TARGET_SIMD"
2348 "clz\\t%0.<Vtype>, %1.<Vtype>"
2349 [(set_attr "type" "neon_cls<q>")]
2350 )
2351
2352 (define_insn "popcount<mode>2"
2353 [(set (match_operand:VB 0 "register_operand" "=w")
2354 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2355 "TARGET_SIMD"
2356 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2357 [(set_attr "type" "neon_cnt<q>")]
2358 )
2359
2360 ;; 'across lanes' max and min ops.
2361
2362 ;; Template for outputting a scalar, so we can create __builtins which can be
2363 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2364 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2365 [(match_operand:<VEL> 0 "register_operand")
2366 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2367 FMAXMINV)]
2368 "TARGET_SIMD"
2369 {
2370 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2371 rtx scratch = gen_reg_rtx (<MODE>mode);
2372 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2373 operands[1]));
2374 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2375 DONE;
2376 }
2377 )
2378
2379 ;; Likewise for integer cases, signed and unsigned.
2380 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2381 [(match_operand:<VEL> 0 "register_operand")
2382 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2383 MAXMINV)]
2384 "TARGET_SIMD"
2385 {
2386 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2387 rtx scratch = gen_reg_rtx (<MODE>mode);
2388 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2389 operands[1]));
2390 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2391 DONE;
2392 }
2393 )
2394
2395 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2396 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2397 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2398 MAXMINV))]
2399 "TARGET_SIMD"
2400 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2401 [(set_attr "type" "neon_reduc_minmax<q>")]
2402 )
2403
2404 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2405 [(set (match_operand:V2SI 0 "register_operand" "=w")
2406 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2407 MAXMINV))]
2408 "TARGET_SIMD"
2409 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2410 [(set_attr "type" "neon_reduc_minmax")]
2411 )
2412
2413 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2414 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2415 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2416 FMAXMINV))]
2417 "TARGET_SIMD"
2418 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2419 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2420 )
2421
2422 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2423 ;; allocation.
2424 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2425 ;; to select.
2426 ;;
2427 ;; Thus our BSL is of the form:
2428 ;; op0 = bsl (mask, op2, op3)
2429 ;; We can use any of:
2430 ;;
2431 ;; if (op0 = mask)
2432 ;; bsl mask, op1, op2
2433 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2434 ;; bit op0, op2, mask
2435 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2436 ;; bif op0, op1, mask
2437 ;;
2438 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2439 ;; Some forms of straight-line code may generate the equivalent form
2440 ;; in *aarch64_simd_bsl<mode>_alt.
2441
2442 (define_insn "aarch64_simd_bsl<mode>_internal"
2443 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2444 (xor:VDQ_I
2445 (and:VDQ_I
2446 (xor:VDQ_I
2447 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2448 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2449 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2450 (match_dup:<V_INT_EQUIV> 3)
2451 ))]
2452 "TARGET_SIMD"
2453 "@
2454 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2455 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2456 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2457 [(set_attr "type" "neon_bsl<q>")]
2458 )
2459
2460 ;; We need this form in addition to the above pattern to match the case
2461 ;; when combine tries merging three insns such that the second operand of
2462 ;; the outer XOR matches the second operand of the inner XOR rather than
2463 ;; the first. The two are equivalent but since recog doesn't try all
2464 ;; permutations of commutative operations, we have to have a separate pattern.
2465
2466 (define_insn "*aarch64_simd_bsl<mode>_alt"
2467 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2468 (xor:VDQ_I
2469 (and:VDQ_I
2470 (xor:VDQ_I
2471 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2472 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2473 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2474 (match_dup:<V_INT_EQUIV> 2)))]
2475 "TARGET_SIMD"
2476 "@
2477 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2478 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2479 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2480 [(set_attr "type" "neon_bsl<q>")]
2481 )
2482
2483 ;; DImode is special, we want to avoid computing operations which are
2484 ;; more naturally computed in general purpose registers in the vector
2485 ;; registers. If we do that, we need to move all three operands from general
2486 ;; purpose registers to vector registers, then back again. However, we
2487 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2488 ;; optimizations based on the component operations of a BSL.
2489 ;;
2490 ;; That means we need a splitter back to the individual operations, if they
2491 ;; would be better calculated on the integer side.
2492
2493 (define_insn_and_split "aarch64_simd_bsldi_internal"
2494 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2495 (xor:DI
2496 (and:DI
2497 (xor:DI
2498 (match_operand:DI 3 "register_operand" "w,0,w,r")
2499 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2500 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2501 (match_dup:DI 3)
2502 ))]
2503 "TARGET_SIMD"
2504 "@
2505 bsl\\t%0.8b, %2.8b, %3.8b
2506 bit\\t%0.8b, %2.8b, %1.8b
2507 bif\\t%0.8b, %3.8b, %1.8b
2508 #"
2509 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2510 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2511 {
2512 /* Split back to individual operations. If we're before reload, and
2513 able to create a temporary register, do so. If we're after reload,
2514 we've got an early-clobber destination register, so use that.
2515 Otherwise, we can't create pseudos and we can't yet guarantee that
2516 operands[0] is safe to write, so FAIL to split. */
2517
2518 rtx scratch;
2519 if (reload_completed)
2520 scratch = operands[0];
2521 else if (can_create_pseudo_p ())
2522 scratch = gen_reg_rtx (DImode);
2523 else
2524 FAIL;
2525
2526 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2527 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2528 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2529 DONE;
2530 }
2531 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2532 (set_attr "length" "4,4,4,12")]
2533 )
2534
2535 (define_insn_and_split "aarch64_simd_bsldi_alt"
2536 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2537 (xor:DI
2538 (and:DI
2539 (xor:DI
2540 (match_operand:DI 3 "register_operand" "w,w,0,r")
2541 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2542 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2543 (match_dup:DI 2)
2544 ))]
2545 "TARGET_SIMD"
2546 "@
2547 bsl\\t%0.8b, %3.8b, %2.8b
2548 bit\\t%0.8b, %3.8b, %1.8b
2549 bif\\t%0.8b, %2.8b, %1.8b
2550 #"
2551 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2552 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2553 {
2554 /* Split back to individual operations. If we're before reload, and
2555 able to create a temporary register, do so. If we're after reload,
2556 we've got an early-clobber destination register, so use that.
2557 Otherwise, we can't create pseudos and we can't yet guarantee that
2558 operands[0] is safe to write, so FAIL to split. */
2559
2560 rtx scratch;
2561 if (reload_completed)
2562 scratch = operands[0];
2563 else if (can_create_pseudo_p ())
2564 scratch = gen_reg_rtx (DImode);
2565 else
2566 FAIL;
2567
2568 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2569 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2570 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2571 DONE;
2572 }
2573 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2574 (set_attr "length" "4,4,4,12")]
2575 )
2576
2577 (define_expand "aarch64_simd_bsl<mode>"
2578 [(match_operand:VALLDIF 0 "register_operand")
2579 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2580 (match_operand:VALLDIF 2 "register_operand")
2581 (match_operand:VALLDIF 3 "register_operand")]
2582 "TARGET_SIMD"
2583 {
2584 /* We can't alias operands together if they have different modes. */
2585 rtx tmp = operands[0];
2586 if (FLOAT_MODE_P (<MODE>mode))
2587 {
2588 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2589 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2590 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2591 }
2592 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2593 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2594 operands[1],
2595 operands[2],
2596 operands[3]));
2597 if (tmp != operands[0])
2598 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2599
2600 DONE;
2601 })
2602
2603 (define_expand "vcond_mask_<mode><v_int_equiv>"
2604 [(match_operand:VALLDI 0 "register_operand")
2605 (match_operand:VALLDI 1 "nonmemory_operand")
2606 (match_operand:VALLDI 2 "nonmemory_operand")
2607 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2608 "TARGET_SIMD"
2609 {
2610 /* If we have (a = (P) ? -1 : 0);
2611 Then we can simply move the generated mask (result must be int). */
2612 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2613 && operands[2] == CONST0_RTX (<MODE>mode))
2614 emit_move_insn (operands[0], operands[3]);
2615 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2616 else if (operands[1] == CONST0_RTX (<MODE>mode)
2617 && operands[2] == CONSTM1_RTX (<MODE>mode))
2618 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2619 else
2620 {
2621 if (!REG_P (operands[1]))
2622 operands[1] = force_reg (<MODE>mode, operands[1]);
2623 if (!REG_P (operands[2]))
2624 operands[2] = force_reg (<MODE>mode, operands[2]);
2625 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2626 operands[1], operands[2]));
2627 }
2628
2629 DONE;
2630 })
2631
2632 ;; Patterns comparing two vectors to produce a mask.
2633
2634 (define_expand "vec_cmp<mode><mode>"
2635 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2636 (match_operator 1 "comparison_operator"
2637 [(match_operand:VSDQ_I_DI 2 "register_operand")
2638 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2639 "TARGET_SIMD"
2640 {
2641 rtx mask = operands[0];
2642 enum rtx_code code = GET_CODE (operands[1]);
2643
2644 switch (code)
2645 {
2646 case NE:
2647 case LE:
2648 case LT:
2649 case GE:
2650 case GT:
2651 case EQ:
2652 if (operands[3] == CONST0_RTX (<MODE>mode))
2653 break;
2654
2655 /* Fall through. */
2656 default:
2657 if (!REG_P (operands[3]))
2658 operands[3] = force_reg (<MODE>mode, operands[3]);
2659
2660 break;
2661 }
2662
2663 switch (code)
2664 {
2665 case LT:
2666 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2667 break;
2668
2669 case GE:
2670 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2671 break;
2672
2673 case LE:
2674 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2675 break;
2676
2677 case GT:
2678 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2679 break;
2680
2681 case LTU:
2682 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2683 break;
2684
2685 case GEU:
2686 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2687 break;
2688
2689 case LEU:
2690 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2691 break;
2692
2693 case GTU:
2694 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2695 break;
2696
2697 case NE:
2698 /* Handle NE as !EQ. */
2699 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2700 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2701 break;
2702
2703 case EQ:
2704 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2705 break;
2706
2707 default:
2708 gcc_unreachable ();
2709 }
2710
2711 DONE;
2712 })
2713
2714 (define_expand "vec_cmp<mode><v_int_equiv>"
2715 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2716 (match_operator 1 "comparison_operator"
2717 [(match_operand:VDQF 2 "register_operand")
2718 (match_operand:VDQF 3 "nonmemory_operand")]))]
2719 "TARGET_SIMD"
2720 {
2721 int use_zero_form = 0;
2722 enum rtx_code code = GET_CODE (operands[1]);
2723 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2724
2725 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2726
2727 switch (code)
2728 {
2729 case LE:
2730 case LT:
2731 case GE:
2732 case GT:
2733 case EQ:
2734 if (operands[3] == CONST0_RTX (<MODE>mode))
2735 {
2736 use_zero_form = 1;
2737 break;
2738 }
2739 /* Fall through. */
2740 default:
2741 if (!REG_P (operands[3]))
2742 operands[3] = force_reg (<MODE>mode, operands[3]);
2743
2744 break;
2745 }
2746
2747 switch (code)
2748 {
2749 case LT:
2750 if (use_zero_form)
2751 {
2752 comparison = gen_aarch64_cmlt<mode>;
2753 break;
2754 }
2755 /* Fall through. */
2756 case UNLT:
2757 std::swap (operands[2], operands[3]);
2758 /* Fall through. */
2759 case UNGT:
2760 case GT:
2761 comparison = gen_aarch64_cmgt<mode>;
2762 break;
2763 case LE:
2764 if (use_zero_form)
2765 {
2766 comparison = gen_aarch64_cmle<mode>;
2767 break;
2768 }
2769 /* Fall through. */
2770 case UNLE:
2771 std::swap (operands[2], operands[3]);
2772 /* Fall through. */
2773 case UNGE:
2774 case GE:
2775 comparison = gen_aarch64_cmge<mode>;
2776 break;
2777 case NE:
2778 case EQ:
2779 comparison = gen_aarch64_cmeq<mode>;
2780 break;
2781 case UNEQ:
2782 case ORDERED:
2783 case UNORDERED:
2784 case LTGT:
2785 break;
2786 default:
2787 gcc_unreachable ();
2788 }
2789
2790 switch (code)
2791 {
2792 case UNGE:
2793 case UNGT:
2794 case UNLE:
2795 case UNLT:
2796 {
2797 /* All of the above must not raise any FP exceptions. Thus we first
2798 check each operand for NaNs and force any elements containing NaN to
2799 zero before using them in the compare.
2800 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2801 (cm<cc> (isnan (a) ? 0.0 : a,
2802 isnan (b) ? 0.0 : b))
2803 We use the following transformations for doing the comparisions:
2804 a UNGE b -> a GE b
2805 a UNGT b -> a GT b
2806 a UNLE b -> b GE a
2807 a UNLT b -> b GT a. */
2808
2809 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2810 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2811 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2812 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2813 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2814 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2815 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2816 lowpart_subreg (<V_INT_EQUIV>mode,
2817 operands[2],
2818 <MODE>mode)));
2819 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2820 lowpart_subreg (<V_INT_EQUIV>mode,
2821 operands[3],
2822 <MODE>mode)));
2823 gcc_assert (comparison != NULL);
2824 emit_insn (comparison (operands[0],
2825 lowpart_subreg (<MODE>mode,
2826 tmp0, <V_INT_EQUIV>mode),
2827 lowpart_subreg (<MODE>mode,
2828 tmp1, <V_INT_EQUIV>mode)));
2829 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2830 }
2831 break;
2832
2833 case LT:
2834 case LE:
2835 case GT:
2836 case GE:
2837 case EQ:
2838 case NE:
2839 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2840 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2841 a GE b -> a GE b
2842 a GT b -> a GT b
2843 a LE b -> b GE a
2844 a LT b -> b GT a
2845 a EQ b -> a EQ b
2846 a NE b -> ~(a EQ b) */
2847 gcc_assert (comparison != NULL);
2848 emit_insn (comparison (operands[0], operands[2], operands[3]));
2849 if (code == NE)
2850 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2851 break;
2852
2853 case LTGT:
2854 /* LTGT is not guranteed to not generate a FP exception. So let's
2855 go the faster way : ((a > b) || (b > a)). */
2856 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2857 operands[2], operands[3]));
2858 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2859 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2860 break;
2861
2862 case ORDERED:
2863 case UNORDERED:
2864 case UNEQ:
2865 /* cmeq (a, a) & cmeq (b, b). */
2866 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2867 operands[2], operands[2]));
2868 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2869 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2870
2871 if (code == UNORDERED)
2872 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2873 else if (code == UNEQ)
2874 {
2875 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2876 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2877 }
2878 break;
2879
2880 default:
2881 gcc_unreachable ();
2882 }
2883
2884 DONE;
2885 })
2886
2887 (define_expand "vec_cmpu<mode><mode>"
2888 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2889 (match_operator 1 "comparison_operator"
2890 [(match_operand:VSDQ_I_DI 2 "register_operand")
2891 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2892 "TARGET_SIMD"
2893 {
2894 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2895 operands[2], operands[3]));
2896 DONE;
2897 })
2898
2899 (define_expand "vcond<mode><mode>"
2900 [(set (match_operand:VALLDI 0 "register_operand")
2901 (if_then_else:VALLDI
2902 (match_operator 3 "comparison_operator"
2903 [(match_operand:VALLDI 4 "register_operand")
2904 (match_operand:VALLDI 5 "nonmemory_operand")])
2905 (match_operand:VALLDI 1 "nonmemory_operand")
2906 (match_operand:VALLDI 2 "nonmemory_operand")))]
2907 "TARGET_SIMD"
2908 {
2909 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2910 enum rtx_code code = GET_CODE (operands[3]);
2911
2912 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2913 it as well as switch operands 1/2 in order to avoid the additional
2914 NOT instruction. */
2915 if (code == NE)
2916 {
2917 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2918 operands[4], operands[5]);
2919 std::swap (operands[1], operands[2]);
2920 }
2921 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2922 operands[4], operands[5]));
2923 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2924 operands[2], mask));
2925
2926 DONE;
2927 })
2928
2929 (define_expand "vcond<v_cmp_mixed><mode>"
2930 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2931 (if_then_else:<V_cmp_mixed>
2932 (match_operator 3 "comparison_operator"
2933 [(match_operand:VDQF_COND 4 "register_operand")
2934 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2935 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2936 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2937 "TARGET_SIMD"
2938 {
2939 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2940 enum rtx_code code = GET_CODE (operands[3]);
2941
2942 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2943 it as well as switch operands 1/2 in order to avoid the additional
2944 NOT instruction. */
2945 if (code == NE)
2946 {
2947 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2948 operands[4], operands[5]);
2949 std::swap (operands[1], operands[2]);
2950 }
2951 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2952 operands[4], operands[5]));
2953 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2954 operands[0], operands[1],
2955 operands[2], mask));
2956
2957 DONE;
2958 })
2959
2960 (define_expand "vcondu<mode><mode>"
2961 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2962 (if_then_else:VSDQ_I_DI
2963 (match_operator 3 "comparison_operator"
2964 [(match_operand:VSDQ_I_DI 4 "register_operand")
2965 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2966 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2967 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2968 "TARGET_SIMD"
2969 {
2970 rtx mask = gen_reg_rtx (<MODE>mode);
2971 enum rtx_code code = GET_CODE (operands[3]);
2972
2973 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974 it as well as switch operands 1/2 in order to avoid the additional
2975 NOT instruction. */
2976 if (code == NE)
2977 {
2978 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979 operands[4], operands[5]);
2980 std::swap (operands[1], operands[2]);
2981 }
2982 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2983 operands[4], operands[5]));
2984 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2985 operands[2], mask));
2986 DONE;
2987 })
2988
2989 (define_expand "vcondu<mode><v_cmp_mixed>"
2990 [(set (match_operand:VDQF 0 "register_operand")
2991 (if_then_else:VDQF
2992 (match_operator 3 "comparison_operator"
2993 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2994 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2995 (match_operand:VDQF 1 "nonmemory_operand")
2996 (match_operand:VDQF 2 "nonmemory_operand")))]
2997 "TARGET_SIMD"
2998 {
2999 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3000 enum rtx_code code = GET_CODE (operands[3]);
3001
3002 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3003 it as well as switch operands 1/2 in order to avoid the additional
3004 NOT instruction. */
3005 if (code == NE)
3006 {
3007 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3008 operands[4], operands[5]);
3009 std::swap (operands[1], operands[2]);
3010 }
3011 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3012 mask, operands[3],
3013 operands[4], operands[5]));
3014 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3015 operands[2], mask));
3016 DONE;
3017 })
3018
3019 ;; Patterns for AArch64 SIMD Intrinsics.
3020
3021 ;; Lane extraction with sign extension to general purpose register.
3022 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3023 [(set (match_operand:GPI 0 "register_operand" "=r")
3024 (sign_extend:GPI
3025 (vec_select:<VEL>
3026 (match_operand:VDQQH 1 "register_operand" "w")
3027 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3028 "TARGET_SIMD"
3029 {
3030 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3031 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3032 }
3033 [(set_attr "type" "neon_to_gp<q>")]
3034 )
3035
3036 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3037 [(set (match_operand:SI 0 "register_operand" "=r")
3038 (zero_extend:SI
3039 (vec_select:<VEL>
3040 (match_operand:VDQQH 1 "register_operand" "w")
3041 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3042 "TARGET_SIMD"
3043 {
3044 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3045 return "umov\\t%w0, %1.<Vetype>[%2]";
3046 }
3047 [(set_attr "type" "neon_to_gp<q>")]
3048 )
3049
3050 ;; Lane extraction of a value, neither sign nor zero extension
3051 ;; is guaranteed so upper bits should be considered undefined.
3052 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3053 (define_insn "aarch64_get_lane<mode>"
3054 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3055 (vec_select:<VEL>
3056 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3057 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3058 "TARGET_SIMD"
3059 {
3060 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3061 switch (which_alternative)
3062 {
3063 case 0:
3064 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3065 case 1:
3066 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3067 case 2:
3068 return "st1\\t{%1.<Vetype>}[%2], %0";
3069 default:
3070 gcc_unreachable ();
3071 }
3072 }
3073 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3074 )
3075
3076 (define_insn "load_pair_lanes<mode>"
3077 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3078 (vec_concat:<VDBL>
3079 (match_operand:VDC 1 "memory_operand" "Utq")
3080 (match_operand:VDC 2 "memory_operand" "m")))]
3081 "TARGET_SIMD && !STRICT_ALIGNMENT
3082 && rtx_equal_p (XEXP (operands[2], 0),
3083 plus_constant (Pmode,
3084 XEXP (operands[1], 0),
3085 GET_MODE_SIZE (<MODE>mode)))"
3086 "ldr\\t%q0, %1"
3087 [(set_attr "type" "neon_load1_1reg_q")]
3088 )
3089
3090 (define_insn "store_pair_lanes<mode>"
3091 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3092 (vec_concat:<VDBL>
3093 (match_operand:VDC 1 "register_operand" "w, r")
3094 (match_operand:VDC 2 "register_operand" "w, r")))]
3095 "TARGET_SIMD"
3096 "@
3097 stp\\t%d1, %d2, %y0
3098 stp\\t%x1, %x2, %y0"
3099 [(set_attr "type" "neon_stp, store_16")]
3100 )
3101
3102 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3103 ;; dest vector.
3104
3105 (define_insn "*aarch64_combinez<mode>"
3106 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3107 (vec_concat:<VDBL>
3108 (match_operand:VDC 1 "general_operand" "w,?r,m")
3109 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3110 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3111 "@
3112 mov\\t%0.8b, %1.8b
3113 fmov\t%d0, %1
3114 ldr\\t%d0, %1"
3115 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3116 (set_attr "simd" "yes,*,yes")
3117 (set_attr "fp" "*,yes,*")]
3118 )
3119
3120 (define_insn "*aarch64_combinez_be<mode>"
3121 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3122 (vec_concat:<VDBL>
3123 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3124 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3125 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3126 "@
3127 mov\\t%0.8b, %1.8b
3128 fmov\t%d0, %1
3129 ldr\\t%d0, %1"
3130 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3131 (set_attr "simd" "yes,*,yes")
3132 (set_attr "fp" "*,yes,*")]
3133 )
3134
3135 (define_expand "aarch64_combine<mode>"
3136 [(match_operand:<VDBL> 0 "register_operand")
3137 (match_operand:VDC 1 "register_operand")
3138 (match_operand:VDC 2 "register_operand")]
3139 "TARGET_SIMD"
3140 {
3141 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3142
3143 DONE;
3144 }
3145 )
3146
3147 (define_expand "aarch64_simd_combine<mode>"
3148 [(match_operand:<VDBL> 0 "register_operand")
3149 (match_operand:VDC 1 "register_operand")
3150 (match_operand:VDC 2 "register_operand")]
3151 "TARGET_SIMD"
3152 {
3153 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3154 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3155 DONE;
3156 }
3157 [(set_attr "type" "multiple")]
3158 )
3159
3160 ;; <su><addsub>l<q>.
3161
3162 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3163 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3164 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3165 (match_operand:VQW 1 "register_operand" "w")
3166 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3167 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3168 (match_operand:VQW 2 "register_operand" "w")
3169 (match_dup 3)))))]
3170 "TARGET_SIMD"
3171 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3172 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3173 )
3174
3175 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3176 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3177 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3178 (match_operand:VQW 1 "register_operand" "w")
3179 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3180 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3181 (match_operand:VQW 2 "register_operand" "w")
3182 (match_dup 3)))))]
3183 "TARGET_SIMD"
3184 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3185 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3186 )
3187
3188
3189 (define_expand "aarch64_saddl2<mode>"
3190 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3191 (match_operand:VQW 1 "register_operand" "w")
3192 (match_operand:VQW 2 "register_operand" "w")]
3193 "TARGET_SIMD"
3194 {
3195 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3196 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3197 operands[2], p));
3198 DONE;
3199 })
3200
3201 (define_expand "aarch64_uaddl2<mode>"
3202 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3203 (match_operand:VQW 1 "register_operand" "w")
3204 (match_operand:VQW 2 "register_operand" "w")]
3205 "TARGET_SIMD"
3206 {
3207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3208 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3209 operands[2], p));
3210 DONE;
3211 })
3212
3213 (define_expand "aarch64_ssubl2<mode>"
3214 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3215 (match_operand:VQW 1 "register_operand" "w")
3216 (match_operand:VQW 2 "register_operand" "w")]
3217 "TARGET_SIMD"
3218 {
3219 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3220 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3221 operands[2], p));
3222 DONE;
3223 })
3224
3225 (define_expand "aarch64_usubl2<mode>"
3226 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3227 (match_operand:VQW 1 "register_operand" "w")
3228 (match_operand:VQW 2 "register_operand" "w")]
3229 "TARGET_SIMD"
3230 {
3231 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3232 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3233 operands[2], p));
3234 DONE;
3235 })
3236
3237 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3238 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3239 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3240 (match_operand:VD_BHSI 1 "register_operand" "w"))
3241 (ANY_EXTEND:<VWIDE>
3242 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3243 "TARGET_SIMD"
3244 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3245 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3246 )
3247
3248 ;; <su><addsub>w<q>.
3249
3250 (define_expand "widen_ssum<mode>3"
3251 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3252 (plus:<VDBLW> (sign_extend:<VDBLW>
3253 (match_operand:VQW 1 "register_operand" ""))
3254 (match_operand:<VDBLW> 2 "register_operand" "")))]
3255 "TARGET_SIMD"
3256 {
3257 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3258 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3259
3260 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3261 operands[1], p));
3262 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3263 DONE;
3264 }
3265 )
3266
3267 (define_expand "widen_ssum<mode>3"
3268 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3269 (plus:<VWIDE> (sign_extend:<VWIDE>
3270 (match_operand:VD_BHSI 1 "register_operand" ""))
3271 (match_operand:<VWIDE> 2 "register_operand" "")))]
3272 "TARGET_SIMD"
3273 {
3274 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3275 DONE;
3276 })
3277
3278 (define_expand "widen_usum<mode>3"
3279 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3280 (plus:<VDBLW> (zero_extend:<VDBLW>
3281 (match_operand:VQW 1 "register_operand" ""))
3282 (match_operand:<VDBLW> 2 "register_operand" "")))]
3283 "TARGET_SIMD"
3284 {
3285 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3286 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3287
3288 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3289 operands[1], p));
3290 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3291 DONE;
3292 }
3293 )
3294
3295 (define_expand "widen_usum<mode>3"
3296 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3297 (plus:<VWIDE> (zero_extend:<VWIDE>
3298 (match_operand:VD_BHSI 1 "register_operand" ""))
3299 (match_operand:<VWIDE> 2 "register_operand" "")))]
3300 "TARGET_SIMD"
3301 {
3302 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3303 DONE;
3304 })
3305
3306 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3307 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3308 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3309 (ANY_EXTEND:<VWIDE>
3310 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3311 "TARGET_SIMD"
3312 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3313 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3314 )
3315
3316 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3318 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3319 (ANY_EXTEND:<VWIDE>
3320 (vec_select:<VHALF>
3321 (match_operand:VQW 2 "register_operand" "w")
3322 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3323 "TARGET_SIMD"
3324 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3325 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3326 )
3327
3328 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3329 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3330 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3331 (ANY_EXTEND:<VWIDE>
3332 (vec_select:<VHALF>
3333 (match_operand:VQW 2 "register_operand" "w")
3334 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3335 "TARGET_SIMD"
3336 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3337 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3338 )
3339
3340 (define_expand "aarch64_saddw2<mode>"
3341 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3342 (match_operand:<VWIDE> 1 "register_operand" "w")
3343 (match_operand:VQW 2 "register_operand" "w")]
3344 "TARGET_SIMD"
3345 {
3346 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3347 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3348 operands[2], p));
3349 DONE;
3350 })
3351
3352 (define_expand "aarch64_uaddw2<mode>"
3353 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3354 (match_operand:<VWIDE> 1 "register_operand" "w")
3355 (match_operand:VQW 2 "register_operand" "w")]
3356 "TARGET_SIMD"
3357 {
3358 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3359 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3360 operands[2], p));
3361 DONE;
3362 })
3363
3364
3365 (define_expand "aarch64_ssubw2<mode>"
3366 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3367 (match_operand:<VWIDE> 1 "register_operand" "w")
3368 (match_operand:VQW 2 "register_operand" "w")]
3369 "TARGET_SIMD"
3370 {
3371 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3372 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3373 operands[2], p));
3374 DONE;
3375 })
3376
3377 (define_expand "aarch64_usubw2<mode>"
3378 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3379 (match_operand:<VWIDE> 1 "register_operand" "w")
3380 (match_operand:VQW 2 "register_operand" "w")]
3381 "TARGET_SIMD"
3382 {
3383 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3384 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3385 operands[2], p));
3386 DONE;
3387 })
3388
3389 ;; <su><r>h<addsub>.
3390
3391 (define_expand "<u>avg<mode>3_floor"
3392 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3393 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3394 (match_operand:VDQ_BHSI 2 "register_operand")]
3395 HADD))]
3396 "TARGET_SIMD"
3397 )
3398
3399 (define_expand "<u>avg<mode>3_ceil"
3400 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3401 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3402 (match_operand:VDQ_BHSI 2 "register_operand")]
3403 RHADD))]
3404 "TARGET_SIMD"
3405 )
3406
3407 (define_insn "aarch64_<sur>h<addsub><mode>"
3408 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3409 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3410 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3411 HADDSUB))]
3412 "TARGET_SIMD"
3413 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3414 [(set_attr "type" "neon_<addsub>_halve<q>")]
3415 )
3416
3417 ;; <r><addsub>hn<q>.
3418
3419 (define_insn "aarch64_<sur><addsub>hn<mode>"
3420 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3421 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3422 (match_operand:VQN 2 "register_operand" "w")]
3423 ADDSUBHN))]
3424 "TARGET_SIMD"
3425 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3426 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3427 )
3428
3429 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3430 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3431 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3432 (match_operand:VQN 2 "register_operand" "w")
3433 (match_operand:VQN 3 "register_operand" "w")]
3434 ADDSUBHN2))]
3435 "TARGET_SIMD"
3436 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3437 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3438 )
3439
3440 ;; pmul.
3441
3442 (define_insn "aarch64_pmul<mode>"
3443 [(set (match_operand:VB 0 "register_operand" "=w")
3444 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3445 (match_operand:VB 2 "register_operand" "w")]
3446 UNSPEC_PMUL))]
3447 "TARGET_SIMD"
3448 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3449 [(set_attr "type" "neon_mul_<Vetype><q>")]
3450 )
3451
3452 ;; fmulx.
3453
3454 (define_insn "aarch64_fmulx<mode>"
3455 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3456 (unspec:VHSDF_HSDF
3457 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3458 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3459 UNSPEC_FMULX))]
3460 "TARGET_SIMD"
3461 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3462 [(set_attr "type" "neon_fp_mul_<stype>")]
3463 )
3464
3465 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3466
3467 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3468 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3469 (unspec:VDQSF
3470 [(match_operand:VDQSF 1 "register_operand" "w")
3471 (vec_duplicate:VDQSF
3472 (vec_select:<VEL>
3473 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3474 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3475 UNSPEC_FMULX))]
3476 "TARGET_SIMD"
3477 {
3478 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3479 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3480 }
3481 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3482 )
3483
3484 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3485
3486 (define_insn "*aarch64_mulx_elt<mode>"
3487 [(set (match_operand:VDQF 0 "register_operand" "=w")
3488 (unspec:VDQF
3489 [(match_operand:VDQF 1 "register_operand" "w")
3490 (vec_duplicate:VDQF
3491 (vec_select:<VEL>
3492 (match_operand:VDQF 2 "register_operand" "w")
3493 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3494 UNSPEC_FMULX))]
3495 "TARGET_SIMD"
3496 {
3497 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3498 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3499 }
3500 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3501 )
3502
3503 ;; vmulxq_lane
3504
3505 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3506 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3507 (unspec:VHSDF
3508 [(match_operand:VHSDF 1 "register_operand" "w")
3509 (vec_duplicate:VHSDF
3510 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3511 UNSPEC_FMULX))]
3512 "TARGET_SIMD"
3513 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3514 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3515 )
3516
3517 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3518 ;; vmulxd_lane_f64 == vmulx_lane_f64
3519 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3520
3521 (define_insn "*aarch64_vgetfmulx<mode>"
3522 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3523 (unspec:<VEL>
3524 [(match_operand:<VEL> 1 "register_operand" "w")
3525 (vec_select:<VEL>
3526 (match_operand:VDQF 2 "register_operand" "w")
3527 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3528 UNSPEC_FMULX))]
3529 "TARGET_SIMD"
3530 {
3531 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3532 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3533 }
3534 [(set_attr "type" "fmul<Vetype>")]
3535 )
3536 ;; <su>q<addsub>
3537
3538 (define_insn "aarch64_<su_optab><optab><mode>"
3539 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3540 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3541 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3542 "TARGET_SIMD"
3543 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3544 [(set_attr "type" "neon_<optab><q>")]
3545 )
3546
3547 ;; suqadd and usqadd
3548
3549 (define_insn "aarch64_<sur>qadd<mode>"
3550 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3551 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3552 (match_operand:VSDQ_I 2 "register_operand" "w")]
3553 USSUQADD))]
3554 "TARGET_SIMD"
3555 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3556 [(set_attr "type" "neon_qadd<q>")]
3557 )
3558
3559 ;; sqmovun
3560
3561 (define_insn "aarch64_sqmovun<mode>"
3562 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3563 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3564 UNSPEC_SQXTUN))]
3565 "TARGET_SIMD"
3566 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3567 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3568 )
3569
3570 ;; sqmovn and uqmovn
3571
3572 (define_insn "aarch64_<sur>qmovn<mode>"
3573 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3574 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3575 SUQMOVN))]
3576 "TARGET_SIMD"
3577 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3578 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3579 )
3580
3581 ;; <su>q<absneg>
3582
3583 (define_insn "aarch64_s<optab><mode>"
3584 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3585 (UNQOPS:VSDQ_I
3586 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3587 "TARGET_SIMD"
3588 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3589 [(set_attr "type" "neon_<optab><q>")]
3590 )
3591
3592 ;; sq<r>dmulh.
3593
3594 (define_insn "aarch64_sq<r>dmulh<mode>"
3595 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3596 (unspec:VSDQ_HSI
3597 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3598 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3599 VQDMULH))]
3600 "TARGET_SIMD"
3601 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3602 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3603 )
3604
3605 ;; sq<r>dmulh_lane
3606
3607 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3608 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3609 (unspec:VDQHS
3610 [(match_operand:VDQHS 1 "register_operand" "w")
3611 (vec_select:<VEL>
3612 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3613 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3614 VQDMULH))]
3615 "TARGET_SIMD"
3616 "*
3617 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3618 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3619 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3620 )
3621
3622 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3623 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3624 (unspec:VDQHS
3625 [(match_operand:VDQHS 1 "register_operand" "w")
3626 (vec_select:<VEL>
3627 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3628 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3629 VQDMULH))]
3630 "TARGET_SIMD"
3631 "*
3632 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3633 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3634 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3635 )
3636
3637 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3638 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3639 (unspec:SD_HSI
3640 [(match_operand:SD_HSI 1 "register_operand" "w")
3641 (vec_select:<VEL>
3642 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3643 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3644 VQDMULH))]
3645 "TARGET_SIMD"
3646 "*
3647 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3648 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3649 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3650 )
3651
3652 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3653 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3654 (unspec:SD_HSI
3655 [(match_operand:SD_HSI 1 "register_operand" "w")
3656 (vec_select:<VEL>
3657 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3658 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3659 VQDMULH))]
3660 "TARGET_SIMD"
3661 "*
3662 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3663 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3664 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3665 )
3666
3667 ;; sqrdml[as]h.
3668
3669 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3670 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3671 (unspec:VSDQ_HSI
3672 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3673 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3674 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3675 SQRDMLH_AS))]
3676 "TARGET_SIMD_RDMA"
3677 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3678 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3679 )
3680
3681 ;; sqrdml[as]h_lane.
3682
3683 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3684 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3685 (unspec:VDQHS
3686 [(match_operand:VDQHS 1 "register_operand" "0")
3687 (match_operand:VDQHS 2 "register_operand" "w")
3688 (vec_select:<VEL>
3689 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3690 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3691 SQRDMLH_AS))]
3692 "TARGET_SIMD_RDMA"
3693 {
3694 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3695 return
3696 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3697 }
3698 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3699 )
3700
3701 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3702 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3703 (unspec:SD_HSI
3704 [(match_operand:SD_HSI 1 "register_operand" "0")
3705 (match_operand:SD_HSI 2 "register_operand" "w")
3706 (vec_select:<VEL>
3707 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3708 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3709 SQRDMLH_AS))]
3710 "TARGET_SIMD_RDMA"
3711 {
3712 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3713 return
3714 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3715 }
3716 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3717 )
3718
3719 ;; sqrdml[as]h_laneq.
3720
3721 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3722 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3723 (unspec:VDQHS
3724 [(match_operand:VDQHS 1 "register_operand" "0")
3725 (match_operand:VDQHS 2 "register_operand" "w")
3726 (vec_select:<VEL>
3727 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3728 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3729 SQRDMLH_AS))]
3730 "TARGET_SIMD_RDMA"
3731 {
3732 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3733 return
3734 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3735 }
3736 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3737 )
3738
3739 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3740 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3741 (unspec:SD_HSI
3742 [(match_operand:SD_HSI 1 "register_operand" "0")
3743 (match_operand:SD_HSI 2 "register_operand" "w")
3744 (vec_select:<VEL>
3745 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3746 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3747 SQRDMLH_AS))]
3748 "TARGET_SIMD_RDMA"
3749 {
3750 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3751 return
3752 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3753 }
3754 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3755 )
3756
3757 ;; vqdml[sa]l
3758
3759 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3760 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3761 (SBINQOPS:<VWIDE>
3762 (match_operand:<VWIDE> 1 "register_operand" "0")
3763 (ss_ashift:<VWIDE>
3764 (mult:<VWIDE>
3765 (sign_extend:<VWIDE>
3766 (match_operand:VSD_HSI 2 "register_operand" "w"))
3767 (sign_extend:<VWIDE>
3768 (match_operand:VSD_HSI 3 "register_operand" "w")))
3769 (const_int 1))))]
3770 "TARGET_SIMD"
3771 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3772 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3773 )
3774
3775 ;; vqdml[sa]l_lane
3776
3777 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3778 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3779 (SBINQOPS:<VWIDE>
3780 (match_operand:<VWIDE> 1 "register_operand" "0")
3781 (ss_ashift:<VWIDE>
3782 (mult:<VWIDE>
3783 (sign_extend:<VWIDE>
3784 (match_operand:VD_HSI 2 "register_operand" "w"))
3785 (sign_extend:<VWIDE>
3786 (vec_duplicate:VD_HSI
3787 (vec_select:<VEL>
3788 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3789 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3790 ))
3791 (const_int 1))))]
3792 "TARGET_SIMD"
3793 {
3794 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3795 return
3796 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3797 }
3798 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3799 )
3800
3801 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3802 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3803 (SBINQOPS:<VWIDE>
3804 (match_operand:<VWIDE> 1 "register_operand" "0")
3805 (ss_ashift:<VWIDE>
3806 (mult:<VWIDE>
3807 (sign_extend:<VWIDE>
3808 (match_operand:VD_HSI 2 "register_operand" "w"))
3809 (sign_extend:<VWIDE>
3810 (vec_duplicate:VD_HSI
3811 (vec_select:<VEL>
3812 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3813 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3814 ))
3815 (const_int 1))))]
3816 "TARGET_SIMD"
3817 {
3818 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3819 return
3820 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3821 }
3822 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3823 )
3824
3825 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3826 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3827 (SBINQOPS:<VWIDE>
3828 (match_operand:<VWIDE> 1 "register_operand" "0")
3829 (ss_ashift:<VWIDE>
3830 (mult:<VWIDE>
3831 (sign_extend:<VWIDE>
3832 (match_operand:SD_HSI 2 "register_operand" "w"))
3833 (sign_extend:<VWIDE>
3834 (vec_select:<VEL>
3835 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3836 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3837 )
3838 (const_int 1))))]
3839 "TARGET_SIMD"
3840 {
3841 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3842 return
3843 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3844 }
3845 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3846 )
3847
3848 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3849 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3850 (SBINQOPS:<VWIDE>
3851 (match_operand:<VWIDE> 1 "register_operand" "0")
3852 (ss_ashift:<VWIDE>
3853 (mult:<VWIDE>
3854 (sign_extend:<VWIDE>
3855 (match_operand:SD_HSI 2 "register_operand" "w"))
3856 (sign_extend:<VWIDE>
3857 (vec_select:<VEL>
3858 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3859 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3860 )
3861 (const_int 1))))]
3862 "TARGET_SIMD"
3863 {
3864 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3865 return
3866 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3867 }
3868 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3869 )
3870
3871 ;; vqdml[sa]l_n
3872
3873 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3874 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3875 (SBINQOPS:<VWIDE>
3876 (match_operand:<VWIDE> 1 "register_operand" "0")
3877 (ss_ashift:<VWIDE>
3878 (mult:<VWIDE>
3879 (sign_extend:<VWIDE>
3880 (match_operand:VD_HSI 2 "register_operand" "w"))
3881 (sign_extend:<VWIDE>
3882 (vec_duplicate:VD_HSI
3883 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3884 (const_int 1))))]
3885 "TARGET_SIMD"
3886 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3887 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3888 )
3889
3890 ;; sqdml[as]l2
3891
3892 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3894 (SBINQOPS:<VWIDE>
3895 (match_operand:<VWIDE> 1 "register_operand" "0")
3896 (ss_ashift:<VWIDE>
3897 (mult:<VWIDE>
3898 (sign_extend:<VWIDE>
3899 (vec_select:<VHALF>
3900 (match_operand:VQ_HSI 2 "register_operand" "w")
3901 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3902 (sign_extend:<VWIDE>
3903 (vec_select:<VHALF>
3904 (match_operand:VQ_HSI 3 "register_operand" "w")
3905 (match_dup 4))))
3906 (const_int 1))))]
3907 "TARGET_SIMD"
3908 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3909 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3910 )
3911
3912 (define_expand "aarch64_sqdmlal2<mode>"
3913 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3914 (match_operand:<VWIDE> 1 "register_operand" "w")
3915 (match_operand:VQ_HSI 2 "register_operand" "w")
3916 (match_operand:VQ_HSI 3 "register_operand" "w")]
3917 "TARGET_SIMD"
3918 {
3919 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3920 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3921 operands[2], operands[3], p));
3922 DONE;
3923 })
3924
3925 (define_expand "aarch64_sqdmlsl2<mode>"
3926 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3927 (match_operand:<VWIDE> 1 "register_operand" "w")
3928 (match_operand:VQ_HSI 2 "register_operand" "w")
3929 (match_operand:VQ_HSI 3 "register_operand" "w")]
3930 "TARGET_SIMD"
3931 {
3932 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3933 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3934 operands[2], operands[3], p));
3935 DONE;
3936 })
3937
3938 ;; vqdml[sa]l2_lane
3939
3940 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3941 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3942 (SBINQOPS:<VWIDE>
3943 (match_operand:<VWIDE> 1 "register_operand" "0")
3944 (ss_ashift:<VWIDE>
3945 (mult:<VWIDE>
3946 (sign_extend:<VWIDE>
3947 (vec_select:<VHALF>
3948 (match_operand:VQ_HSI 2 "register_operand" "w")
3949 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3950 (sign_extend:<VWIDE>
3951 (vec_duplicate:<VHALF>
3952 (vec_select:<VEL>
3953 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3954 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3955 ))))
3956 (const_int 1))))]
3957 "TARGET_SIMD"
3958 {
3959 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3960 return
3961 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3962 }
3963 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3964 )
3965
3966 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3967 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3968 (SBINQOPS:<VWIDE>
3969 (match_operand:<VWIDE> 1 "register_operand" "0")
3970 (ss_ashift:<VWIDE>
3971 (mult:<VWIDE>
3972 (sign_extend:<VWIDE>
3973 (vec_select:<VHALF>
3974 (match_operand:VQ_HSI 2 "register_operand" "w")
3975 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3976 (sign_extend:<VWIDE>
3977 (vec_duplicate:<VHALF>
3978 (vec_select:<VEL>
3979 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3980 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3981 ))))
3982 (const_int 1))))]
3983 "TARGET_SIMD"
3984 {
3985 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3986 return
3987 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3988 }
3989 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3990 )
3991
3992 (define_expand "aarch64_sqdmlal2_lane<mode>"
3993 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3994 (match_operand:<VWIDE> 1 "register_operand" "w")
3995 (match_operand:VQ_HSI 2 "register_operand" "w")
3996 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3997 (match_operand:SI 4 "immediate_operand" "i")]
3998 "TARGET_SIMD"
3999 {
4000 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4001 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4002 operands[2], operands[3],
4003 operands[4], p));
4004 DONE;
4005 })
4006
4007 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4008 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4009 (match_operand:<VWIDE> 1 "register_operand" "w")
4010 (match_operand:VQ_HSI 2 "register_operand" "w")
4011 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4012 (match_operand:SI 4 "immediate_operand" "i")]
4013 "TARGET_SIMD"
4014 {
4015 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4016 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4017 operands[2], operands[3],
4018 operands[4], p));
4019 DONE;
4020 })
4021
4022 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4023 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4024 (match_operand:<VWIDE> 1 "register_operand" "w")
4025 (match_operand:VQ_HSI 2 "register_operand" "w")
4026 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4027 (match_operand:SI 4 "immediate_operand" "i")]
4028 "TARGET_SIMD"
4029 {
4030 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4031 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4032 operands[2], operands[3],
4033 operands[4], p));
4034 DONE;
4035 })
4036
4037 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4038 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4039 (match_operand:<VWIDE> 1 "register_operand" "w")
4040 (match_operand:VQ_HSI 2 "register_operand" "w")
4041 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4042 (match_operand:SI 4 "immediate_operand" "i")]
4043 "TARGET_SIMD"
4044 {
4045 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4046 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4047 operands[2], operands[3],
4048 operands[4], p));
4049 DONE;
4050 })
4051
4052 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4053 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4054 (SBINQOPS:<VWIDE>
4055 (match_operand:<VWIDE> 1 "register_operand" "0")
4056 (ss_ashift:<VWIDE>
4057 (mult:<VWIDE>
4058 (sign_extend:<VWIDE>
4059 (vec_select:<VHALF>
4060 (match_operand:VQ_HSI 2 "register_operand" "w")
4061 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4062 (sign_extend:<VWIDE>
4063 (vec_duplicate:<VHALF>
4064 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4065 (const_int 1))))]
4066 "TARGET_SIMD"
4067 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4068 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4069 )
4070
4071 (define_expand "aarch64_sqdmlal2_n<mode>"
4072 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4073 (match_operand:<VWIDE> 1 "register_operand" "w")
4074 (match_operand:VQ_HSI 2 "register_operand" "w")
4075 (match_operand:<VEL> 3 "register_operand" "w")]
4076 "TARGET_SIMD"
4077 {
4078 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4079 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4080 operands[2], operands[3],
4081 p));
4082 DONE;
4083 })
4084
4085 (define_expand "aarch64_sqdmlsl2_n<mode>"
4086 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4087 (match_operand:<VWIDE> 1 "register_operand" "w")
4088 (match_operand:VQ_HSI 2 "register_operand" "w")
4089 (match_operand:<VEL> 3 "register_operand" "w")]
4090 "TARGET_SIMD"
4091 {
4092 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4093 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4094 operands[2], operands[3],
4095 p));
4096 DONE;
4097 })
4098
4099 ;; vqdmull
4100
4101 (define_insn "aarch64_sqdmull<mode>"
4102 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4103 (ss_ashift:<VWIDE>
4104 (mult:<VWIDE>
4105 (sign_extend:<VWIDE>
4106 (match_operand:VSD_HSI 1 "register_operand" "w"))
4107 (sign_extend:<VWIDE>
4108 (match_operand:VSD_HSI 2 "register_operand" "w")))
4109 (const_int 1)))]
4110 "TARGET_SIMD"
4111 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4112 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4113 )
4114
4115 ;; vqdmull_lane
4116
4117 (define_insn "aarch64_sqdmull_lane<mode>"
4118 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4119 (ss_ashift:<VWIDE>
4120 (mult:<VWIDE>
4121 (sign_extend:<VWIDE>
4122 (match_operand:VD_HSI 1 "register_operand" "w"))
4123 (sign_extend:<VWIDE>
4124 (vec_duplicate:VD_HSI
4125 (vec_select:<VEL>
4126 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4127 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4128 ))
4129 (const_int 1)))]
4130 "TARGET_SIMD"
4131 {
4132 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4133 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4134 }
4135 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4136 )
4137
4138 (define_insn "aarch64_sqdmull_laneq<mode>"
4139 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4140 (ss_ashift:<VWIDE>
4141 (mult:<VWIDE>
4142 (sign_extend:<VWIDE>
4143 (match_operand:VD_HSI 1 "register_operand" "w"))
4144 (sign_extend:<VWIDE>
4145 (vec_duplicate:VD_HSI
4146 (vec_select:<VEL>
4147 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4148 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4149 ))
4150 (const_int 1)))]
4151 "TARGET_SIMD"
4152 {
4153 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4154 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4155 }
4156 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4157 )
4158
4159 (define_insn "aarch64_sqdmull_lane<mode>"
4160 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4161 (ss_ashift:<VWIDE>
4162 (mult:<VWIDE>
4163 (sign_extend:<VWIDE>
4164 (match_operand:SD_HSI 1 "register_operand" "w"))
4165 (sign_extend:<VWIDE>
4166 (vec_select:<VEL>
4167 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4168 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4169 ))
4170 (const_int 1)))]
4171 "TARGET_SIMD"
4172 {
4173 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4174 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4175 }
4176 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4177 )
4178
4179 (define_insn "aarch64_sqdmull_laneq<mode>"
4180 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4181 (ss_ashift:<VWIDE>
4182 (mult:<VWIDE>
4183 (sign_extend:<VWIDE>
4184 (match_operand:SD_HSI 1 "register_operand" "w"))
4185 (sign_extend:<VWIDE>
4186 (vec_select:<VEL>
4187 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4188 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4189 ))
4190 (const_int 1)))]
4191 "TARGET_SIMD"
4192 {
4193 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4194 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4195 }
4196 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4197 )
4198
4199 ;; vqdmull_n
4200
4201 (define_insn "aarch64_sqdmull_n<mode>"
4202 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4203 (ss_ashift:<VWIDE>
4204 (mult:<VWIDE>
4205 (sign_extend:<VWIDE>
4206 (match_operand:VD_HSI 1 "register_operand" "w"))
4207 (sign_extend:<VWIDE>
4208 (vec_duplicate:VD_HSI
4209 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4210 )
4211 (const_int 1)))]
4212 "TARGET_SIMD"
4213 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4214 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4215 )
4216
4217 ;; vqdmull2
4218
4219
4220
4221 (define_insn "aarch64_sqdmull2<mode>_internal"
4222 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4223 (ss_ashift:<VWIDE>
4224 (mult:<VWIDE>
4225 (sign_extend:<VWIDE>
4226 (vec_select:<VHALF>
4227 (match_operand:VQ_HSI 1 "register_operand" "w")
4228 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4229 (sign_extend:<VWIDE>
4230 (vec_select:<VHALF>
4231 (match_operand:VQ_HSI 2 "register_operand" "w")
4232 (match_dup 3)))
4233 )
4234 (const_int 1)))]
4235 "TARGET_SIMD"
4236 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4237 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4238 )
4239
4240 (define_expand "aarch64_sqdmull2<mode>"
4241 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4242 (match_operand:VQ_HSI 1 "register_operand" "w")
4243 (match_operand:VQ_HSI 2 "register_operand" "w")]
4244 "TARGET_SIMD"
4245 {
4246 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4247 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4248 operands[2], p));
4249 DONE;
4250 })
4251
4252 ;; vqdmull2_lane
4253
4254 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4255 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4256 (ss_ashift:<VWIDE>
4257 (mult:<VWIDE>
4258 (sign_extend:<VWIDE>
4259 (vec_select:<VHALF>
4260 (match_operand:VQ_HSI 1 "register_operand" "w")
4261 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4262 (sign_extend:<VWIDE>
4263 (vec_duplicate:<VHALF>
4264 (vec_select:<VEL>
4265 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4266 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4267 ))
4268 (const_int 1)))]
4269 "TARGET_SIMD"
4270 {
4271 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4272 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4273 }
4274 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4275 )
4276
4277 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4278 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4279 (ss_ashift:<VWIDE>
4280 (mult:<VWIDE>
4281 (sign_extend:<VWIDE>
4282 (vec_select:<VHALF>
4283 (match_operand:VQ_HSI 1 "register_operand" "w")
4284 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4285 (sign_extend:<VWIDE>
4286 (vec_duplicate:<VHALF>
4287 (vec_select:<VEL>
4288 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4289 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4290 ))
4291 (const_int 1)))]
4292 "TARGET_SIMD"
4293 {
4294 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4295 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4296 }
4297 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4298 )
4299
4300 (define_expand "aarch64_sqdmull2_lane<mode>"
4301 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4302 (match_operand:VQ_HSI 1 "register_operand" "w")
4303 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4304 (match_operand:SI 3 "immediate_operand" "i")]
4305 "TARGET_SIMD"
4306 {
4307 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4308 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4309 operands[2], operands[3],
4310 p));
4311 DONE;
4312 })
4313
4314 (define_expand "aarch64_sqdmull2_laneq<mode>"
4315 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4316 (match_operand:VQ_HSI 1 "register_operand" "w")
4317 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4318 (match_operand:SI 3 "immediate_operand" "i")]
4319 "TARGET_SIMD"
4320 {
4321 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4322 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4323 operands[2], operands[3],
4324 p));
4325 DONE;
4326 })
4327
4328 ;; vqdmull2_n
4329
4330 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4331 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4332 (ss_ashift:<VWIDE>
4333 (mult:<VWIDE>
4334 (sign_extend:<VWIDE>
4335 (vec_select:<VHALF>
4336 (match_operand:VQ_HSI 1 "register_operand" "w")
4337 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4338 (sign_extend:<VWIDE>
4339 (vec_duplicate:<VHALF>
4340 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4341 )
4342 (const_int 1)))]
4343 "TARGET_SIMD"
4344 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4345 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4346 )
4347
4348 (define_expand "aarch64_sqdmull2_n<mode>"
4349 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4350 (match_operand:VQ_HSI 1 "register_operand" "w")
4351 (match_operand:<VEL> 2 "register_operand" "w")]
4352 "TARGET_SIMD"
4353 {
4354 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4355 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4356 operands[2], p));
4357 DONE;
4358 })
4359
4360 ;; vshl
4361
4362 (define_insn "aarch64_<sur>shl<mode>"
4363 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4364 (unspec:VSDQ_I_DI
4365 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4366 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4367 VSHL))]
4368 "TARGET_SIMD"
4369 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4370 [(set_attr "type" "neon_shift_reg<q>")]
4371 )
4372
4373
4374 ;; vqshl
4375
4376 (define_insn "aarch64_<sur>q<r>shl<mode>"
4377 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4378 (unspec:VSDQ_I
4379 [(match_operand:VSDQ_I 1 "register_operand" "w")
4380 (match_operand:VSDQ_I 2 "register_operand" "w")]
4381 VQSHL))]
4382 "TARGET_SIMD"
4383 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4384 [(set_attr "type" "neon_sat_shift_reg<q>")]
4385 )
4386
4387 ;; vshll_n
4388
4389 (define_insn "aarch64_<sur>shll_n<mode>"
4390 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4391 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4392 (match_operand:SI 2
4393 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4394 VSHLL))]
4395 "TARGET_SIMD"
4396 {
4397 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4398 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4399 else
4400 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4401 }
4402 [(set_attr "type" "neon_shift_imm_long")]
4403 )
4404
4405 ;; vshll_high_n
4406
4407 (define_insn "aarch64_<sur>shll2_n<mode>"
4408 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4409 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4410 (match_operand:SI 2 "immediate_operand" "i")]
4411 VSHLL))]
4412 "TARGET_SIMD"
4413 {
4414 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4415 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4416 else
4417 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4418 }
4419 [(set_attr "type" "neon_shift_imm_long")]
4420 )
4421
4422 ;; vrshr_n
4423
4424 (define_insn "aarch64_<sur>shr_n<mode>"
4425 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4426 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4427 (match_operand:SI 2
4428 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4429 VRSHR_N))]
4430 "TARGET_SIMD"
4431 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4432 [(set_attr "type" "neon_sat_shift_imm<q>")]
4433 )
4434
4435 ;; v(r)sra_n
4436
4437 (define_insn "aarch64_<sur>sra_n<mode>"
4438 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4439 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4440 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4441 (match_operand:SI 3
4442 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4443 VSRA))]
4444 "TARGET_SIMD"
4445 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4446 [(set_attr "type" "neon_shift_acc<q>")]
4447 )
4448
4449 ;; vs<lr>i_n
4450
4451 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4452 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4453 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4454 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4455 (match_operand:SI 3
4456 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4457 VSLRI))]
4458 "TARGET_SIMD"
4459 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4460 [(set_attr "type" "neon_shift_imm<q>")]
4461 )
4462
4463 ;; vqshl(u)
4464
4465 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4466 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4467 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4468 (match_operand:SI 2
4469 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4470 VQSHL_N))]
4471 "TARGET_SIMD"
4472 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4473 [(set_attr "type" "neon_sat_shift_imm<q>")]
4474 )
4475
4476
4477 ;; vq(r)shr(u)n_n
4478
4479 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4480 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4481 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4482 (match_operand:SI 2
4483 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4484 VQSHRN_N))]
4485 "TARGET_SIMD"
4486 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4487 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4488 )
4489
4490
4491 ;; cm(eq|ge|gt|lt|le)
4492 ;; Note, we have constraints for Dz and Z as different expanders
4493 ;; have different ideas of what should be passed to this pattern.
4494
4495 (define_insn "aarch64_cm<optab><mode>"
4496 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4497 (neg:<V_INT_EQUIV>
4498 (COMPARISONS:<V_INT_EQUIV>
4499 (match_operand:VDQ_I 1 "register_operand" "w,w")
4500 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4501 )))]
4502 "TARGET_SIMD"
4503 "@
4504 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4505 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4506 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4507 )
4508
4509 (define_insn_and_split "aarch64_cm<optab>di"
4510 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4511 (neg:DI
4512 (COMPARISONS:DI
4513 (match_operand:DI 1 "register_operand" "w,w,r")
4514 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4515 )))
4516 (clobber (reg:CC CC_REGNUM))]
4517 "TARGET_SIMD"
4518 "#"
4519 "&& reload_completed"
4520 [(set (match_operand:DI 0 "register_operand")
4521 (neg:DI
4522 (COMPARISONS:DI
4523 (match_operand:DI 1 "register_operand")
4524 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4525 )))]
4526 {
4527 /* If we are in the general purpose register file,
4528 we split to a sequence of comparison and store. */
4529 if (GP_REGNUM_P (REGNO (operands[0]))
4530 && GP_REGNUM_P (REGNO (operands[1])))
4531 {
4532 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4533 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4534 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4535 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4536 DONE;
4537 }
4538 /* Otherwise, we expand to a similar pattern which does not
4539 clobber CC_REGNUM. */
4540 }
4541 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4542 )
4543
4544 (define_insn "*aarch64_cm<optab>di"
4545 [(set (match_operand:DI 0 "register_operand" "=w,w")
4546 (neg:DI
4547 (COMPARISONS:DI
4548 (match_operand:DI 1 "register_operand" "w,w")
4549 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4550 )))]
4551 "TARGET_SIMD && reload_completed"
4552 "@
4553 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4554 cm<optab>\t%d0, %d1, #0"
4555 [(set_attr "type" "neon_compare, neon_compare_zero")]
4556 )
4557
4558 ;; cm(hs|hi)
4559
4560 (define_insn "aarch64_cm<optab><mode>"
4561 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4562 (neg:<V_INT_EQUIV>
4563 (UCOMPARISONS:<V_INT_EQUIV>
4564 (match_operand:VDQ_I 1 "register_operand" "w")
4565 (match_operand:VDQ_I 2 "register_operand" "w")
4566 )))]
4567 "TARGET_SIMD"
4568 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4569 [(set_attr "type" "neon_compare<q>")]
4570 )
4571
4572 (define_insn_and_split "aarch64_cm<optab>di"
4573 [(set (match_operand:DI 0 "register_operand" "=w,r")
4574 (neg:DI
4575 (UCOMPARISONS:DI
4576 (match_operand:DI 1 "register_operand" "w,r")
4577 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4578 )))
4579 (clobber (reg:CC CC_REGNUM))]
4580 "TARGET_SIMD"
4581 "#"
4582 "&& reload_completed"
4583 [(set (match_operand:DI 0 "register_operand")
4584 (neg:DI
4585 (UCOMPARISONS:DI
4586 (match_operand:DI 1 "register_operand")
4587 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4588 )))]
4589 {
4590 /* If we are in the general purpose register file,
4591 we split to a sequence of comparison and store. */
4592 if (GP_REGNUM_P (REGNO (operands[0]))
4593 && GP_REGNUM_P (REGNO (operands[1])))
4594 {
4595 machine_mode mode = CCmode;
4596 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4597 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4598 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4599 DONE;
4600 }
4601 /* Otherwise, we expand to a similar pattern which does not
4602 clobber CC_REGNUM. */
4603 }
4604 [(set_attr "type" "neon_compare,multiple")]
4605 )
4606
4607 (define_insn "*aarch64_cm<optab>di"
4608 [(set (match_operand:DI 0 "register_operand" "=w")
4609 (neg:DI
4610 (UCOMPARISONS:DI
4611 (match_operand:DI 1 "register_operand" "w")
4612 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4613 )))]
4614 "TARGET_SIMD && reload_completed"
4615 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4616 [(set_attr "type" "neon_compare")]
4617 )
4618
4619 ;; cmtst
4620
4621 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4622 ;; we don't have any insns using ne, and aarch64_vcond outputs
4623 ;; not (neg (eq (and x y) 0))
4624 ;; which is rewritten by simplify_rtx as
4625 ;; plus (eq (and x y) 0) -1.
4626
4627 (define_insn "aarch64_cmtst<mode>"
4628 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4629 (plus:<V_INT_EQUIV>
4630 (eq:<V_INT_EQUIV>
4631 (and:VDQ_I
4632 (match_operand:VDQ_I 1 "register_operand" "w")
4633 (match_operand:VDQ_I 2 "register_operand" "w"))
4634 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4635 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4636 ]
4637 "TARGET_SIMD"
4638 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4639 [(set_attr "type" "neon_tst<q>")]
4640 )
4641
4642 (define_insn_and_split "aarch64_cmtstdi"
4643 [(set (match_operand:DI 0 "register_operand" "=w,r")
4644 (neg:DI
4645 (ne:DI
4646 (and:DI
4647 (match_operand:DI 1 "register_operand" "w,r")
4648 (match_operand:DI 2 "register_operand" "w,r"))
4649 (const_int 0))))
4650 (clobber (reg:CC CC_REGNUM))]
4651 "TARGET_SIMD"
4652 "#"
4653 "&& reload_completed"
4654 [(set (match_operand:DI 0 "register_operand")
4655 (neg:DI
4656 (ne:DI
4657 (and:DI
4658 (match_operand:DI 1 "register_operand")
4659 (match_operand:DI 2 "register_operand"))
4660 (const_int 0))))]
4661 {
4662 /* If we are in the general purpose register file,
4663 we split to a sequence of comparison and store. */
4664 if (GP_REGNUM_P (REGNO (operands[0]))
4665 && GP_REGNUM_P (REGNO (operands[1])))
4666 {
4667 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4668 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4669 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4670 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4671 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4672 DONE;
4673 }
4674 /* Otherwise, we expand to a similar pattern which does not
4675 clobber CC_REGNUM. */
4676 }
4677 [(set_attr "type" "neon_tst,multiple")]
4678 )
4679
4680 (define_insn "*aarch64_cmtstdi"
4681 [(set (match_operand:DI 0 "register_operand" "=w")
4682 (neg:DI
4683 (ne:DI
4684 (and:DI
4685 (match_operand:DI 1 "register_operand" "w")
4686 (match_operand:DI 2 "register_operand" "w"))
4687 (const_int 0))))]
4688 "TARGET_SIMD"
4689 "cmtst\t%d0, %d1, %d2"
4690 [(set_attr "type" "neon_tst")]
4691 )
4692
4693 ;; fcm(eq|ge|gt|le|lt)
4694
4695 (define_insn "aarch64_cm<optab><mode>"
4696 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4697 (neg:<V_INT_EQUIV>
4698 (COMPARISONS:<V_INT_EQUIV>
4699 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4700 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4701 )))]
4702 "TARGET_SIMD"
4703 "@
4704 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4705 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4706 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4707 )
4708
4709 ;; fac(ge|gt)
4710 ;; Note we can also handle what would be fac(le|lt) by
4711 ;; generating fac(ge|gt).
4712
4713 (define_insn "aarch64_fac<optab><mode>"
4714 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4715 (neg:<V_INT_EQUIV>
4716 (FAC_COMPARISONS:<V_INT_EQUIV>
4717 (abs:VHSDF_HSDF
4718 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4719 (abs:VHSDF_HSDF
4720 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4721 )))]
4722 "TARGET_SIMD"
4723 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4724 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4725 )
4726
4727 ;; addp
4728
4729 (define_insn "aarch64_addp<mode>"
4730 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4731 (unspec:VD_BHSI
4732 [(match_operand:VD_BHSI 1 "register_operand" "w")
4733 (match_operand:VD_BHSI 2 "register_operand" "w")]
4734 UNSPEC_ADDP))]
4735 "TARGET_SIMD"
4736 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4737 [(set_attr "type" "neon_reduc_add<q>")]
4738 )
4739
4740 (define_insn "aarch64_addpdi"
4741 [(set (match_operand:DI 0 "register_operand" "=w")
4742 (unspec:DI
4743 [(match_operand:V2DI 1 "register_operand" "w")]
4744 UNSPEC_ADDP))]
4745 "TARGET_SIMD"
4746 "addp\t%d0, %1.2d"
4747 [(set_attr "type" "neon_reduc_add")]
4748 )
4749
4750 ;; sqrt
4751
4752 (define_expand "sqrt<mode>2"
4753 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4754 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4755 "TARGET_SIMD"
4756 {
4757 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4758 DONE;
4759 })
4760
4761 (define_insn "*sqrt<mode>2"
4762 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4763 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4764 "TARGET_SIMD"
4765 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4766 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4767 )
4768
4769 ;; Patterns for vector struct loads and stores.
4770
4771 (define_insn "aarch64_simd_ld2<mode>"
4772 [(set (match_operand:OI 0 "register_operand" "=w")
4773 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4774 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4775 UNSPEC_LD2))]
4776 "TARGET_SIMD"
4777 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4778 [(set_attr "type" "neon_load2_2reg<q>")]
4779 )
4780
4781 (define_insn "aarch64_simd_ld2r<mode>"
4782 [(set (match_operand:OI 0 "register_operand" "=w")
4783 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4784 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4785 UNSPEC_LD2_DUP))]
4786 "TARGET_SIMD"
4787 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4788 [(set_attr "type" "neon_load2_all_lanes<q>")]
4789 )
4790
4791 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4792 [(set (match_operand:OI 0 "register_operand" "=w")
4793 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4794 (match_operand:OI 2 "register_operand" "0")
4795 (match_operand:SI 3 "immediate_operand" "i")
4796 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4797 UNSPEC_LD2_LANE))]
4798 "TARGET_SIMD"
4799 {
4800 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4801 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4802 }
4803 [(set_attr "type" "neon_load2_one_lane")]
4804 )
4805
4806 (define_expand "vec_load_lanesoi<mode>"
4807 [(set (match_operand:OI 0 "register_operand" "=w")
4808 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4809 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4810 UNSPEC_LD2))]
4811 "TARGET_SIMD"
4812 {
4813 if (BYTES_BIG_ENDIAN)
4814 {
4815 rtx tmp = gen_reg_rtx (OImode);
4816 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4817 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4818 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4819 }
4820 else
4821 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4822 DONE;
4823 })
4824
4825 (define_insn "aarch64_simd_st2<mode>"
4826 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4827 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4828 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4829 UNSPEC_ST2))]
4830 "TARGET_SIMD"
4831 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4832 [(set_attr "type" "neon_store2_2reg<q>")]
4833 )
4834
4835 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4836 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4837 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4838 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4839 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4840 (match_operand:SI 2 "immediate_operand" "i")]
4841 UNSPEC_ST2_LANE))]
4842 "TARGET_SIMD"
4843 {
4844 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4845 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4846 }
4847 [(set_attr "type" "neon_store2_one_lane<q>")]
4848 )
4849
4850 (define_expand "vec_store_lanesoi<mode>"
4851 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4852 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4853 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4854 UNSPEC_ST2))]
4855 "TARGET_SIMD"
4856 {
4857 if (BYTES_BIG_ENDIAN)
4858 {
4859 rtx tmp = gen_reg_rtx (OImode);
4860 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4861 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4862 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4863 }
4864 else
4865 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4866 DONE;
4867 })
4868
4869 (define_insn "aarch64_simd_ld3<mode>"
4870 [(set (match_operand:CI 0 "register_operand" "=w")
4871 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4872 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4873 UNSPEC_LD3))]
4874 "TARGET_SIMD"
4875 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4876 [(set_attr "type" "neon_load3_3reg<q>")]
4877 )
4878
4879 (define_insn "aarch64_simd_ld3r<mode>"
4880 [(set (match_operand:CI 0 "register_operand" "=w")
4881 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4882 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4883 UNSPEC_LD3_DUP))]
4884 "TARGET_SIMD"
4885 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4886 [(set_attr "type" "neon_load3_all_lanes<q>")]
4887 )
4888
4889 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4890 [(set (match_operand:CI 0 "register_operand" "=w")
4891 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4892 (match_operand:CI 2 "register_operand" "0")
4893 (match_operand:SI 3 "immediate_operand" "i")
4894 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4895 UNSPEC_LD3_LANE))]
4896 "TARGET_SIMD"
4897 {
4898 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4899 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4900 }
4901 [(set_attr "type" "neon_load3_one_lane")]
4902 )
4903
4904 (define_expand "vec_load_lanesci<mode>"
4905 [(set (match_operand:CI 0 "register_operand" "=w")
4906 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4907 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4908 UNSPEC_LD3))]
4909 "TARGET_SIMD"
4910 {
4911 if (BYTES_BIG_ENDIAN)
4912 {
4913 rtx tmp = gen_reg_rtx (CImode);
4914 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4915 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4916 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4917 }
4918 else
4919 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4920 DONE;
4921 })
4922
4923 (define_insn "aarch64_simd_st3<mode>"
4924 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4925 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4926 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4927 UNSPEC_ST3))]
4928 "TARGET_SIMD"
4929 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4930 [(set_attr "type" "neon_store3_3reg<q>")]
4931 )
4932
4933 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4934 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4935 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4936 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4937 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4938 (match_operand:SI 2 "immediate_operand" "i")]
4939 UNSPEC_ST3_LANE))]
4940 "TARGET_SIMD"
4941 {
4942 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4943 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4944 }
4945 [(set_attr "type" "neon_store3_one_lane<q>")]
4946 )
4947
4948 (define_expand "vec_store_lanesci<mode>"
4949 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4950 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4951 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4952 UNSPEC_ST3))]
4953 "TARGET_SIMD"
4954 {
4955 if (BYTES_BIG_ENDIAN)
4956 {
4957 rtx tmp = gen_reg_rtx (CImode);
4958 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4959 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4960 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4961 }
4962 else
4963 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4964 DONE;
4965 })
4966
4967 (define_insn "aarch64_simd_ld4<mode>"
4968 [(set (match_operand:XI 0 "register_operand" "=w")
4969 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4970 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4971 UNSPEC_LD4))]
4972 "TARGET_SIMD"
4973 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4974 [(set_attr "type" "neon_load4_4reg<q>")]
4975 )
4976
4977 (define_insn "aarch64_simd_ld4r<mode>"
4978 [(set (match_operand:XI 0 "register_operand" "=w")
4979 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4980 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4981 UNSPEC_LD4_DUP))]
4982 "TARGET_SIMD"
4983 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4984 [(set_attr "type" "neon_load4_all_lanes<q>")]
4985 )
4986
4987 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4988 [(set (match_operand:XI 0 "register_operand" "=w")
4989 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4990 (match_operand:XI 2 "register_operand" "0")
4991 (match_operand:SI 3 "immediate_operand" "i")
4992 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4993 UNSPEC_LD4_LANE))]
4994 "TARGET_SIMD"
4995 {
4996 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4997 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4998 }
4999 [(set_attr "type" "neon_load4_one_lane")]
5000 )
5001
5002 (define_expand "vec_load_lanesxi<mode>"
5003 [(set (match_operand:XI 0 "register_operand" "=w")
5004 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5005 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5006 UNSPEC_LD4))]
5007 "TARGET_SIMD"
5008 {
5009 if (BYTES_BIG_ENDIAN)
5010 {
5011 rtx tmp = gen_reg_rtx (XImode);
5012 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5013 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5014 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5015 }
5016 else
5017 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5018 DONE;
5019 })
5020
5021 (define_insn "aarch64_simd_st4<mode>"
5022 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5023 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5024 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5025 UNSPEC_ST4))]
5026 "TARGET_SIMD"
5027 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5028 [(set_attr "type" "neon_store4_4reg<q>")]
5029 )
5030
5031 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5032 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5033 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5034 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5035 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5036 (match_operand:SI 2 "immediate_operand" "i")]
5037 UNSPEC_ST4_LANE))]
5038 "TARGET_SIMD"
5039 {
5040 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5041 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5042 }
5043 [(set_attr "type" "neon_store4_one_lane<q>")]
5044 )
5045
5046 (define_expand "vec_store_lanesxi<mode>"
5047 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5048 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5049 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5050 UNSPEC_ST4))]
5051 "TARGET_SIMD"
5052 {
5053 if (BYTES_BIG_ENDIAN)
5054 {
5055 rtx tmp = gen_reg_rtx (XImode);
5056 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5057 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5058 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5059 }
5060 else
5061 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5062 DONE;
5063 })
5064
5065 (define_insn_and_split "aarch64_rev_reglist<mode>"
5066 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5067 (unspec:VSTRUCT
5068 [(match_operand:VSTRUCT 1 "register_operand" "w")
5069 (match_operand:V16QI 2 "register_operand" "w")]
5070 UNSPEC_REV_REGLIST))]
5071 "TARGET_SIMD"
5072 "#"
5073 "&& reload_completed"
5074 [(const_int 0)]
5075 {
5076 int i;
5077 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5078 for (i = 0; i < nregs; i++)
5079 {
5080 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5081 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5082 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5083 }
5084 DONE;
5085 }
5086 [(set_attr "type" "neon_tbl1_q")
5087 (set_attr "length" "<insn_count>")]
5088 )
5089
5090 ;; Reload patterns for AdvSIMD register list operands.
5091
5092 (define_expand "mov<mode>"
5093 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5094 (match_operand:VSTRUCT 1 "general_operand" ""))]
5095 "TARGET_SIMD"
5096 {
5097 if (can_create_pseudo_p ())
5098 {
5099 if (GET_CODE (operands[0]) != REG)
5100 operands[1] = force_reg (<MODE>mode, operands[1]);
5101 }
5102 })
5103
5104
5105 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5106 [(match_operand:CI 0 "register_operand" "=w")
5107 (match_operand:DI 1 "register_operand" "r")
5108 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5109 "TARGET_SIMD"
5110 {
5111 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5112 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5113 DONE;
5114 })
5115
5116 (define_insn "aarch64_ld1_x3_<mode>"
5117 [(set (match_operand:CI 0 "register_operand" "=w")
5118 (unspec:CI
5119 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5120 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5121 "TARGET_SIMD"
5122 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5123 [(set_attr "type" "neon_load1_3reg<q>")]
5124 )
5125
5126 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5127 [(match_operand:DI 0 "register_operand" "")
5128 (match_operand:OI 1 "register_operand" "")
5129 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5130 "TARGET_SIMD"
5131 {
5132 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5133 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5134 DONE;
5135 })
5136
5137 (define_insn "aarch64_st1_x2_<mode>"
5138 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5139 (unspec:OI
5140 [(match_operand:OI 1 "register_operand" "w")
5141 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5142 "TARGET_SIMD"
5143 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5144 [(set_attr "type" "neon_store1_2reg<q>")]
5145 )
5146
5147 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5148 [(match_operand:DI 0 "register_operand" "")
5149 (match_operand:CI 1 "register_operand" "")
5150 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5151 "TARGET_SIMD"
5152 {
5153 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5154 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5155 DONE;
5156 })
5157
5158 (define_insn "aarch64_st1_x3_<mode>"
5159 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5160 (unspec:CI
5161 [(match_operand:CI 1 "register_operand" "w")
5162 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5163 "TARGET_SIMD"
5164 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5165 [(set_attr "type" "neon_store1_3reg<q>")]
5166 )
5167
5168 (define_insn "*aarch64_mov<mode>"
5169 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5170 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5171 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5172 && (register_operand (operands[0], <MODE>mode)
5173 || register_operand (operands[1], <MODE>mode))"
5174 "@
5175 #
5176 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5177 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5178 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5179 neon_load<nregs>_<nregs>reg_q")
5180 (set_attr "length" "<insn_count>,4,4")]
5181 )
5182
5183 (define_insn "aarch64_be_ld1<mode>"
5184 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5185 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5186 "aarch64_simd_struct_operand" "Utv")]
5187 UNSPEC_LD1))]
5188 "TARGET_SIMD"
5189 "ld1\\t{%0<Vmtype>}, %1"
5190 [(set_attr "type" "neon_load1_1reg<q>")]
5191 )
5192
5193 (define_insn "aarch64_be_st1<mode>"
5194 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5195 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5196 UNSPEC_ST1))]
5197 "TARGET_SIMD"
5198 "st1\\t{%1<Vmtype>}, %0"
5199 [(set_attr "type" "neon_store1_1reg<q>")]
5200 )
5201
5202 (define_insn "*aarch64_be_movoi"
5203 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5204 (match_operand:OI 1 "general_operand" " w,w,m"))]
5205 "TARGET_SIMD && BYTES_BIG_ENDIAN
5206 && (register_operand (operands[0], OImode)
5207 || register_operand (operands[1], OImode))"
5208 "@
5209 #
5210 stp\\t%q1, %R1, %0
5211 ldp\\t%q0, %R0, %1"
5212 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5213 (set_attr "length" "8,4,4")]
5214 )
5215
5216 (define_insn "*aarch64_be_movci"
5217 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5218 (match_operand:CI 1 "general_operand" " w,w,o"))]
5219 "TARGET_SIMD && BYTES_BIG_ENDIAN
5220 && (register_operand (operands[0], CImode)
5221 || register_operand (operands[1], CImode))"
5222 "#"
5223 [(set_attr "type" "multiple")
5224 (set_attr "length" "12,4,4")]
5225 )
5226
5227 (define_insn "*aarch64_be_movxi"
5228 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5229 (match_operand:XI 1 "general_operand" " w,w,o"))]
5230 "TARGET_SIMD && BYTES_BIG_ENDIAN
5231 && (register_operand (operands[0], XImode)
5232 || register_operand (operands[1], XImode))"
5233 "#"
5234 [(set_attr "type" "multiple")
5235 (set_attr "length" "16,4,4")]
5236 )
5237
5238 (define_split
5239 [(set (match_operand:OI 0 "register_operand")
5240 (match_operand:OI 1 "register_operand"))]
5241 "TARGET_SIMD && reload_completed"
5242 [(const_int 0)]
5243 {
5244 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5245 DONE;
5246 })
5247
5248 (define_split
5249 [(set (match_operand:CI 0 "nonimmediate_operand")
5250 (match_operand:CI 1 "general_operand"))]
5251 "TARGET_SIMD && reload_completed"
5252 [(const_int 0)]
5253 {
5254 if (register_operand (operands[0], CImode)
5255 && register_operand (operands[1], CImode))
5256 {
5257 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5258 DONE;
5259 }
5260 else if (BYTES_BIG_ENDIAN)
5261 {
5262 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5263 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5264 emit_move_insn (gen_lowpart (V16QImode,
5265 simplify_gen_subreg (TImode, operands[0],
5266 CImode, 32)),
5267 gen_lowpart (V16QImode,
5268 simplify_gen_subreg (TImode, operands[1],
5269 CImode, 32)));
5270 DONE;
5271 }
5272 else
5273 FAIL;
5274 })
5275
5276 (define_split
5277 [(set (match_operand:XI 0 "nonimmediate_operand")
5278 (match_operand:XI 1 "general_operand"))]
5279 "TARGET_SIMD && reload_completed"
5280 [(const_int 0)]
5281 {
5282 if (register_operand (operands[0], XImode)
5283 && register_operand (operands[1], XImode))
5284 {
5285 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5286 DONE;
5287 }
5288 else if (BYTES_BIG_ENDIAN)
5289 {
5290 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5291 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5292 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5293 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5294 DONE;
5295 }
5296 else
5297 FAIL;
5298 })
5299
5300 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5301 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5302 (match_operand:DI 1 "register_operand" "w")
5303 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5304 "TARGET_SIMD"
5305 {
5306 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5307 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5308 * <VSTRUCT:nregs>);
5309
5310 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5311 mem));
5312 DONE;
5313 })
5314
5315 (define_insn "aarch64_ld2<mode>_dreg"
5316 [(set (match_operand:OI 0 "register_operand" "=w")
5317 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5318 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5319 UNSPEC_LD2_DREG))]
5320 "TARGET_SIMD"
5321 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5322 [(set_attr "type" "neon_load2_2reg<q>")]
5323 )
5324
5325 (define_insn "aarch64_ld2<mode>_dreg"
5326 [(set (match_operand:OI 0 "register_operand" "=w")
5327 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5328 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5329 UNSPEC_LD2_DREG))]
5330 "TARGET_SIMD"
5331 "ld1\\t{%S0.1d - %T0.1d}, %1"
5332 [(set_attr "type" "neon_load1_2reg<q>")]
5333 )
5334
5335 (define_insn "aarch64_ld3<mode>_dreg"
5336 [(set (match_operand:CI 0 "register_operand" "=w")
5337 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5338 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5339 UNSPEC_LD3_DREG))]
5340 "TARGET_SIMD"
5341 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5342 [(set_attr "type" "neon_load3_3reg<q>")]
5343 )
5344
5345 (define_insn "aarch64_ld3<mode>_dreg"
5346 [(set (match_operand:CI 0 "register_operand" "=w")
5347 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5348 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5349 UNSPEC_LD3_DREG))]
5350 "TARGET_SIMD"
5351 "ld1\\t{%S0.1d - %U0.1d}, %1"
5352 [(set_attr "type" "neon_load1_3reg<q>")]
5353 )
5354
5355 (define_insn "aarch64_ld4<mode>_dreg"
5356 [(set (match_operand:XI 0 "register_operand" "=w")
5357 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5358 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5359 UNSPEC_LD4_DREG))]
5360 "TARGET_SIMD"
5361 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5362 [(set_attr "type" "neon_load4_4reg<q>")]
5363 )
5364
5365 (define_insn "aarch64_ld4<mode>_dreg"
5366 [(set (match_operand:XI 0 "register_operand" "=w")
5367 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5368 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5369 UNSPEC_LD4_DREG))]
5370 "TARGET_SIMD"
5371 "ld1\\t{%S0.1d - %V0.1d}, %1"
5372 [(set_attr "type" "neon_load1_4reg<q>")]
5373 )
5374
5375 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5376 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5377 (match_operand:DI 1 "register_operand" "r")
5378 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5379 "TARGET_SIMD"
5380 {
5381 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5382 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5383
5384 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5385 DONE;
5386 })
5387
5388 (define_expand "aarch64_ld1<VALL_F16:mode>"
5389 [(match_operand:VALL_F16 0 "register_operand")
5390 (match_operand:DI 1 "register_operand")]
5391 "TARGET_SIMD"
5392 {
5393 machine_mode mode = <VALL_F16:MODE>mode;
5394 rtx mem = gen_rtx_MEM (mode, operands[1]);
5395
5396 if (BYTES_BIG_ENDIAN)
5397 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5398 else
5399 emit_move_insn (operands[0], mem);
5400 DONE;
5401 })
5402
5403 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5404 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5405 (match_operand:DI 1 "register_operand" "r")
5406 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5407 "TARGET_SIMD"
5408 {
5409 machine_mode mode = <VSTRUCT:MODE>mode;
5410 rtx mem = gen_rtx_MEM (mode, operands[1]);
5411
5412 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5413 DONE;
5414 })
5415
5416 (define_expand "aarch64_ld1x2<VQ:mode>"
5417 [(match_operand:OI 0 "register_operand" "=w")
5418 (match_operand:DI 1 "register_operand" "r")
5419 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5420 "TARGET_SIMD"
5421 {
5422 machine_mode mode = OImode;
5423 rtx mem = gen_rtx_MEM (mode, operands[1]);
5424
5425 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5426 DONE;
5427 })
5428
5429 (define_expand "aarch64_ld1x2<VDC:mode>"
5430 [(match_operand:OI 0 "register_operand" "=w")
5431 (match_operand:DI 1 "register_operand" "r")
5432 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5433 "TARGET_SIMD"
5434 {
5435 machine_mode mode = OImode;
5436 rtx mem = gen_rtx_MEM (mode, operands[1]);
5437
5438 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5439 DONE;
5440 })
5441
5442
5443 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5444 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5445 (match_operand:DI 1 "register_operand" "w")
5446 (match_operand:VSTRUCT 2 "register_operand" "0")
5447 (match_operand:SI 3 "immediate_operand" "i")
5448 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5449 "TARGET_SIMD"
5450 {
5451 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5452 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5453 * <VSTRUCT:nregs>);
5454
5455 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5456 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5457 operands[0], mem, operands[2], operands[3]));
5458 DONE;
5459 })
5460
5461 ;; Expanders for builtins to extract vector registers from large
5462 ;; opaque integer modes.
5463
5464 ;; D-register list.
5465
5466 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5467 [(match_operand:VDC 0 "register_operand" "=w")
5468 (match_operand:VSTRUCT 1 "register_operand" "w")
5469 (match_operand:SI 2 "immediate_operand" "i")]
5470 "TARGET_SIMD"
5471 {
5472 int part = INTVAL (operands[2]);
5473 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5474 int offset = part * 16;
5475
5476 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5477 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5478 DONE;
5479 })
5480
5481 ;; Q-register list.
5482
5483 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5484 [(match_operand:VQ 0 "register_operand" "=w")
5485 (match_operand:VSTRUCT 1 "register_operand" "w")
5486 (match_operand:SI 2 "immediate_operand" "i")]
5487 "TARGET_SIMD"
5488 {
5489 int part = INTVAL (operands[2]);
5490 int offset = part * 16;
5491
5492 emit_move_insn (operands[0],
5493 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5494 DONE;
5495 })
5496
5497 ;; Permuted-store expanders for neon intrinsics.
5498
5499 ;; Permute instructions
5500
5501 ;; vec_perm support
5502
5503 (define_expand "vec_perm<mode>"
5504 [(match_operand:VB 0 "register_operand")
5505 (match_operand:VB 1 "register_operand")
5506 (match_operand:VB 2 "register_operand")
5507 (match_operand:VB 3 "register_operand")]
5508 "TARGET_SIMD"
5509 {
5510 aarch64_expand_vec_perm (operands[0], operands[1],
5511 operands[2], operands[3], <nunits>);
5512 DONE;
5513 })
5514
5515 (define_insn "aarch64_tbl1<mode>"
5516 [(set (match_operand:VB 0 "register_operand" "=w")
5517 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5518 (match_operand:VB 2 "register_operand" "w")]
5519 UNSPEC_TBL))]
5520 "TARGET_SIMD"
5521 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5522 [(set_attr "type" "neon_tbl1<q>")]
5523 )
5524
5525 ;; Two source registers.
5526
5527 (define_insn "aarch64_tbl2v16qi"
5528 [(set (match_operand:V16QI 0 "register_operand" "=w")
5529 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5530 (match_operand:V16QI 2 "register_operand" "w")]
5531 UNSPEC_TBL))]
5532 "TARGET_SIMD"
5533 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5534 [(set_attr "type" "neon_tbl2_q")]
5535 )
5536
5537 (define_insn "aarch64_tbl3<mode>"
5538 [(set (match_operand:VB 0 "register_operand" "=w")
5539 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5540 (match_operand:VB 2 "register_operand" "w")]
5541 UNSPEC_TBL))]
5542 "TARGET_SIMD"
5543 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5544 [(set_attr "type" "neon_tbl3")]
5545 )
5546
5547 (define_insn "aarch64_tbx4<mode>"
5548 [(set (match_operand:VB 0 "register_operand" "=w")
5549 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5550 (match_operand:OI 2 "register_operand" "w")
5551 (match_operand:VB 3 "register_operand" "w")]
5552 UNSPEC_TBX))]
5553 "TARGET_SIMD"
5554 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5555 [(set_attr "type" "neon_tbl4")]
5556 )
5557
5558 ;; Three source registers.
5559
5560 (define_insn "aarch64_qtbl3<mode>"
5561 [(set (match_operand:VB 0 "register_operand" "=w")
5562 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5563 (match_operand:VB 2 "register_operand" "w")]
5564 UNSPEC_TBL))]
5565 "TARGET_SIMD"
5566 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5567 [(set_attr "type" "neon_tbl3")]
5568 )
5569
5570 (define_insn "aarch64_qtbx3<mode>"
5571 [(set (match_operand:VB 0 "register_operand" "=w")
5572 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5573 (match_operand:CI 2 "register_operand" "w")
5574 (match_operand:VB 3 "register_operand" "w")]
5575 UNSPEC_TBX))]
5576 "TARGET_SIMD"
5577 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5578 [(set_attr "type" "neon_tbl3")]
5579 )
5580
5581 ;; Four source registers.
5582
5583 (define_insn "aarch64_qtbl4<mode>"
5584 [(set (match_operand:VB 0 "register_operand" "=w")
5585 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5586 (match_operand:VB 2 "register_operand" "w")]
5587 UNSPEC_TBL))]
5588 "TARGET_SIMD"
5589 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5590 [(set_attr "type" "neon_tbl4")]
5591 )
5592
5593 (define_insn "aarch64_qtbx4<mode>"
5594 [(set (match_operand:VB 0 "register_operand" "=w")
5595 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5596 (match_operand:XI 2 "register_operand" "w")
5597 (match_operand:VB 3 "register_operand" "w")]
5598 UNSPEC_TBX))]
5599 "TARGET_SIMD"
5600 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5601 [(set_attr "type" "neon_tbl4")]
5602 )
5603
5604 (define_insn_and_split "aarch64_combinev16qi"
5605 [(set (match_operand:OI 0 "register_operand" "=w")
5606 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5607 (match_operand:V16QI 2 "register_operand" "w")]
5608 UNSPEC_CONCAT))]
5609 "TARGET_SIMD"
5610 "#"
5611 "&& reload_completed"
5612 [(const_int 0)]
5613 {
5614 aarch64_split_combinev16qi (operands);
5615 DONE;
5616 }
5617 [(set_attr "type" "multiple")]
5618 )
5619
5620 ;; This instruction's pattern is generated directly by
5621 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5622 ;; need corresponding changes there.
5623 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5624 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5625 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5626 (match_operand:VALL_F16 2 "register_operand" "w")]
5627 PERMUTE))]
5628 "TARGET_SIMD"
5629 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5630 [(set_attr "type" "neon_permute<q>")]
5631 )
5632
5633 ;; This instruction's pattern is generated directly by
5634 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5635 ;; need corresponding changes there. Note that the immediate (third)
5636 ;; operand is a lane index not a byte index.
5637 (define_insn "aarch64_ext<mode>"
5638 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5639 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5640 (match_operand:VALL_F16 2 "register_operand" "w")
5641 (match_operand:SI 3 "immediate_operand" "i")]
5642 UNSPEC_EXT))]
5643 "TARGET_SIMD"
5644 {
5645 operands[3] = GEN_INT (INTVAL (operands[3])
5646 * GET_MODE_UNIT_SIZE (<MODE>mode));
5647 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5648 }
5649 [(set_attr "type" "neon_ext<q>")]
5650 )
5651
5652 ;; This instruction's pattern is generated directly by
5653 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5654 ;; need corresponding changes there.
5655 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5656 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5657 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5658 REVERSE))]
5659 "TARGET_SIMD"
5660 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5661 [(set_attr "type" "neon_rev<q>")]
5662 )
5663
5664 (define_insn "aarch64_st2<mode>_dreg"
5665 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5666 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5667 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5668 UNSPEC_ST2))]
5669 "TARGET_SIMD"
5670 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5671 [(set_attr "type" "neon_store2_2reg")]
5672 )
5673
5674 (define_insn "aarch64_st2<mode>_dreg"
5675 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5676 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5677 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5678 UNSPEC_ST2))]
5679 "TARGET_SIMD"
5680 "st1\\t{%S1.1d - %T1.1d}, %0"
5681 [(set_attr "type" "neon_store1_2reg")]
5682 )
5683
5684 (define_insn "aarch64_st3<mode>_dreg"
5685 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5686 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5687 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5688 UNSPEC_ST3))]
5689 "TARGET_SIMD"
5690 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5691 [(set_attr "type" "neon_store3_3reg")]
5692 )
5693
5694 (define_insn "aarch64_st3<mode>_dreg"
5695 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5696 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5697 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5698 UNSPEC_ST3))]
5699 "TARGET_SIMD"
5700 "st1\\t{%S1.1d - %U1.1d}, %0"
5701 [(set_attr "type" "neon_store1_3reg")]
5702 )
5703
5704 (define_insn "aarch64_st4<mode>_dreg"
5705 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5706 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5707 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5708 UNSPEC_ST4))]
5709 "TARGET_SIMD"
5710 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5711 [(set_attr "type" "neon_store4_4reg")]
5712 )
5713
5714 (define_insn "aarch64_st4<mode>_dreg"
5715 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5716 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5717 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5718 UNSPEC_ST4))]
5719 "TARGET_SIMD"
5720 "st1\\t{%S1.1d - %V1.1d}, %0"
5721 [(set_attr "type" "neon_store1_4reg")]
5722 )
5723
5724 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5725 [(match_operand:DI 0 "register_operand" "r")
5726 (match_operand:VSTRUCT 1 "register_operand" "w")
5727 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5728 "TARGET_SIMD"
5729 {
5730 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5731 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5732
5733 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5734 DONE;
5735 })
5736
5737 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5738 [(match_operand:DI 0 "register_operand" "r")
5739 (match_operand:VSTRUCT 1 "register_operand" "w")
5740 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5741 "TARGET_SIMD"
5742 {
5743 machine_mode mode = <VSTRUCT:MODE>mode;
5744 rtx mem = gen_rtx_MEM (mode, operands[0]);
5745
5746 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5747 DONE;
5748 })
5749
5750 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5751 [(match_operand:DI 0 "register_operand" "r")
5752 (match_operand:VSTRUCT 1 "register_operand" "w")
5753 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5754 (match_operand:SI 2 "immediate_operand")]
5755 "TARGET_SIMD"
5756 {
5757 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5758 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5759 * <VSTRUCT:nregs>);
5760
5761 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5762 mem, operands[1], operands[2]));
5763 DONE;
5764 })
5765
5766 (define_expand "aarch64_st1<VALL_F16:mode>"
5767 [(match_operand:DI 0 "register_operand")
5768 (match_operand:VALL_F16 1 "register_operand")]
5769 "TARGET_SIMD"
5770 {
5771 machine_mode mode = <VALL_F16:MODE>mode;
5772 rtx mem = gen_rtx_MEM (mode, operands[0]);
5773
5774 if (BYTES_BIG_ENDIAN)
5775 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5776 else
5777 emit_move_insn (mem, operands[1]);
5778 DONE;
5779 })
5780
5781 ;; Expander for builtins to insert vector registers into large
5782 ;; opaque integer modes.
5783
5784 ;; Q-register list. We don't need a D-reg inserter as we zero
5785 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5786
5787 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5788 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5789 (match_operand:VSTRUCT 1 "register_operand" "0")
5790 (match_operand:VQ 2 "register_operand" "w")
5791 (match_operand:SI 3 "immediate_operand" "i")]
5792 "TARGET_SIMD"
5793 {
5794 int part = INTVAL (operands[3]);
5795 int offset = part * 16;
5796
5797 emit_move_insn (operands[0], operands[1]);
5798 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5799 operands[2]);
5800 DONE;
5801 })
5802
5803 ;; Standard pattern name vec_init<mode><Vel>.
5804
5805 (define_expand "vec_init<mode><Vel>"
5806 [(match_operand:VALL_F16 0 "register_operand" "")
5807 (match_operand 1 "" "")]
5808 "TARGET_SIMD"
5809 {
5810 aarch64_expand_vector_init (operands[0], operands[1]);
5811 DONE;
5812 })
5813
5814 (define_insn "*aarch64_simd_ld1r<mode>"
5815 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5816 (vec_duplicate:VALL_F16
5817 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5818 "TARGET_SIMD"
5819 "ld1r\\t{%0.<Vtype>}, %1"
5820 [(set_attr "type" "neon_load1_all_lanes")]
5821 )
5822
5823 (define_insn "aarch64_simd_ld1<mode>_x2"
5824 [(set (match_operand:OI 0 "register_operand" "=w")
5825 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5826 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5827 UNSPEC_LD1))]
5828 "TARGET_SIMD"
5829 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5830 [(set_attr "type" "neon_load1_2reg<q>")]
5831 )
5832
5833 (define_insn "aarch64_simd_ld1<mode>_x2"
5834 [(set (match_operand:OI 0 "register_operand" "=w")
5835 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5836 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5837 UNSPEC_LD1))]
5838 "TARGET_SIMD"
5839 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5840 [(set_attr "type" "neon_load1_2reg<q>")]
5841 )
5842
5843
5844 (define_insn "aarch64_frecpe<mode>"
5845 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5846 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5847 UNSPEC_FRECPE))]
5848 "TARGET_SIMD"
5849 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5850 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5851 )
5852
5853 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5854 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5855 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5856 FRECP))]
5857 "TARGET_SIMD"
5858 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5859 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5860 )
5861
5862 (define_insn "aarch64_frecps<mode>"
5863 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5864 (unspec:VHSDF_HSDF
5865 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5866 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5867 UNSPEC_FRECPS))]
5868 "TARGET_SIMD"
5869 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5870 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5871 )
5872
5873 (define_insn "aarch64_urecpe<mode>"
5874 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5875 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5876 UNSPEC_URECPE))]
5877 "TARGET_SIMD"
5878 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5879 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5880
5881 ;; Standard pattern name vec_extract<mode><Vel>.
5882
5883 (define_expand "vec_extract<mode><Vel>"
5884 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5885 (match_operand:VALL_F16 1 "register_operand" "")
5886 (match_operand:SI 2 "immediate_operand" "")]
5887 "TARGET_SIMD"
5888 {
5889 emit_insn
5890 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5891 DONE;
5892 })
5893
5894 ;; aes
5895
5896 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5897 [(set (match_operand:V16QI 0 "register_operand" "=w")
5898 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5899 (match_operand:V16QI 2 "register_operand" "w")]
5900 CRYPTO_AES))]
5901 "TARGET_SIMD && TARGET_AES"
5902 "aes<aes_op>\\t%0.16b, %2.16b"
5903 [(set_attr "type" "crypto_aese")]
5904 )
5905
5906 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5907 [(set (match_operand:V16QI 0 "register_operand" "=w")
5908 (unspec:V16QI [(xor:V16QI
5909 (match_operand:V16QI 1 "register_operand" "%0")
5910 (match_operand:V16QI 2 "register_operand" "w"))
5911 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5912 CRYPTO_AES))]
5913 "TARGET_SIMD && TARGET_AES"
5914 "aes<aes_op>\\t%0.16b, %2.16b"
5915 [(set_attr "type" "crypto_aese")]
5916 )
5917
5918 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5919 [(set (match_operand:V16QI 0 "register_operand" "=w")
5920 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5921 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5922 (match_operand:V16QI 2 "register_operand" "w"))]
5923 CRYPTO_AES))]
5924 "TARGET_SIMD && TARGET_AES"
5925 "aes<aes_op>\\t%0.16b, %2.16b"
5926 [(set_attr "type" "crypto_aese")]
5927 )
5928
5929 ;; When AES/AESMC fusion is enabled we want the register allocation to
5930 ;; look like:
5931 ;; AESE Vn, _
5932 ;; AESMC Vn, Vn
5933 ;; So prefer to tie operand 1 to operand 0 when fusing.
5934
5935 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5936 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5937 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5938 CRYPTO_AESMC))]
5939 "TARGET_SIMD && TARGET_AES"
5940 "aes<aesmc_op>\\t%0.16b, %1.16b"
5941 [(set_attr "type" "crypto_aesmc")
5942 (set_attr_alternative "enabled"
5943 [(if_then_else (match_test
5944 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5945 (const_string "yes" )
5946 (const_string "no"))
5947 (const_string "yes")])]
5948 )
5949
5950 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5951 ;; and enforce the register dependency without scheduling or register
5952 ;; allocation messing up the order or introducing moves inbetween.
5953 ;; Mash the two together during combine.
5954
5955 (define_insn "*aarch64_crypto_aese_fused"
5956 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5957 (unspec:V16QI
5958 [(unspec:V16QI
5959 [(match_operand:V16QI 1 "register_operand" "0")
5960 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5961 ] UNSPEC_AESMC))]
5962 "TARGET_SIMD && TARGET_AES
5963 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5964 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5965 [(set_attr "type" "crypto_aese")
5966 (set_attr "length" "8")]
5967 )
5968
5969 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
5970 ;; and enforce the register dependency without scheduling or register
5971 ;; allocation messing up the order or introducing moves inbetween.
5972 ;; Mash the two together during combine.
5973
5974 (define_insn "*aarch64_crypto_aesd_fused"
5975 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5976 (unspec:V16QI
5977 [(unspec:V16QI
5978 [(match_operand:V16QI 1 "register_operand" "0")
5979 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
5980 ] UNSPEC_AESIMC))]
5981 "TARGET_SIMD && TARGET_AES
5982 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5983 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
5984 [(set_attr "type" "crypto_aese")
5985 (set_attr "length" "8")]
5986 )
5987
5988 ;; sha1
5989
5990 (define_insn "aarch64_crypto_sha1hsi"
5991 [(set (match_operand:SI 0 "register_operand" "=w")
5992 (unspec:SI [(match_operand:SI 1
5993 "register_operand" "w")]
5994 UNSPEC_SHA1H))]
5995 "TARGET_SIMD && TARGET_SHA2"
5996 "sha1h\\t%s0, %s1"
5997 [(set_attr "type" "crypto_sha1_fast")]
5998 )
5999
6000 (define_insn "aarch64_crypto_sha1hv4si"
6001 [(set (match_operand:SI 0 "register_operand" "=w")
6002 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6003 (parallel [(const_int 0)]))]
6004 UNSPEC_SHA1H))]
6005 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6006 "sha1h\\t%s0, %s1"
6007 [(set_attr "type" "crypto_sha1_fast")]
6008 )
6009
6010 (define_insn "aarch64_be_crypto_sha1hv4si"
6011 [(set (match_operand:SI 0 "register_operand" "=w")
6012 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6013 (parallel [(const_int 3)]))]
6014 UNSPEC_SHA1H))]
6015 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6016 "sha1h\\t%s0, %s1"
6017 [(set_attr "type" "crypto_sha1_fast")]
6018 )
6019
6020 (define_insn "aarch64_crypto_sha1su1v4si"
6021 [(set (match_operand:V4SI 0 "register_operand" "=w")
6022 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6023 (match_operand:V4SI 2 "register_operand" "w")]
6024 UNSPEC_SHA1SU1))]
6025 "TARGET_SIMD && TARGET_SHA2"
6026 "sha1su1\\t%0.4s, %2.4s"
6027 [(set_attr "type" "crypto_sha1_fast")]
6028 )
6029
6030 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6031 [(set (match_operand:V4SI 0 "register_operand" "=w")
6032 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6033 (match_operand:SI 2 "register_operand" "w")
6034 (match_operand:V4SI 3 "register_operand" "w")]
6035 CRYPTO_SHA1))]
6036 "TARGET_SIMD && TARGET_SHA2"
6037 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6038 [(set_attr "type" "crypto_sha1_slow")]
6039 )
6040
6041 (define_insn "aarch64_crypto_sha1su0v4si"
6042 [(set (match_operand:V4SI 0 "register_operand" "=w")
6043 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6044 (match_operand:V4SI 2 "register_operand" "w")
6045 (match_operand:V4SI 3 "register_operand" "w")]
6046 UNSPEC_SHA1SU0))]
6047 "TARGET_SIMD && TARGET_SHA2"
6048 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6049 [(set_attr "type" "crypto_sha1_xor")]
6050 )
6051
6052 ;; sha256
6053
6054 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6055 [(set (match_operand:V4SI 0 "register_operand" "=w")
6056 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6057 (match_operand:V4SI 2 "register_operand" "w")
6058 (match_operand:V4SI 3 "register_operand" "w")]
6059 CRYPTO_SHA256))]
6060 "TARGET_SIMD && TARGET_SHA2"
6061 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6062 [(set_attr "type" "crypto_sha256_slow")]
6063 )
6064
6065 (define_insn "aarch64_crypto_sha256su0v4si"
6066 [(set (match_operand:V4SI 0 "register_operand" "=w")
6067 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6068 (match_operand:V4SI 2 "register_operand" "w")]
6069 UNSPEC_SHA256SU0))]
6070 "TARGET_SIMD && TARGET_SHA2"
6071 "sha256su0\\t%0.4s, %2.4s"
6072 [(set_attr "type" "crypto_sha256_fast")]
6073 )
6074
6075 (define_insn "aarch64_crypto_sha256su1v4si"
6076 [(set (match_operand:V4SI 0 "register_operand" "=w")
6077 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6078 (match_operand:V4SI 2 "register_operand" "w")
6079 (match_operand:V4SI 3 "register_operand" "w")]
6080 UNSPEC_SHA256SU1))]
6081 "TARGET_SIMD && TARGET_SHA2"
6082 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6083 [(set_attr "type" "crypto_sha256_slow")]
6084 )
6085
6086 ;; sha512
6087
6088 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6089 [(set (match_operand:V2DI 0 "register_operand" "=w")
6090 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6091 (match_operand:V2DI 2 "register_operand" "w")
6092 (match_operand:V2DI 3 "register_operand" "w")]
6093 CRYPTO_SHA512))]
6094 "TARGET_SIMD && TARGET_SHA3"
6095 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6096 [(set_attr "type" "crypto_sha512")]
6097 )
6098
6099 (define_insn "aarch64_crypto_sha512su0qv2di"
6100 [(set (match_operand:V2DI 0 "register_operand" "=w")
6101 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6102 (match_operand:V2DI 2 "register_operand" "w")]
6103 UNSPEC_SHA512SU0))]
6104 "TARGET_SIMD && TARGET_SHA3"
6105 "sha512su0\\t%0.2d, %2.2d"
6106 [(set_attr "type" "crypto_sha512")]
6107 )
6108
6109 (define_insn "aarch64_crypto_sha512su1qv2di"
6110 [(set (match_operand:V2DI 0 "register_operand" "=w")
6111 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6112 (match_operand:V2DI 2 "register_operand" "w")
6113 (match_operand:V2DI 3 "register_operand" "w")]
6114 UNSPEC_SHA512SU1))]
6115 "TARGET_SIMD && TARGET_SHA3"
6116 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6117 [(set_attr "type" "crypto_sha512")]
6118 )
6119
6120 ;; sha3
6121
6122 (define_insn "eor3q<mode>4"
6123 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6124 (xor:VQ_I
6125 (xor:VQ_I
6126 (match_operand:VQ_I 2 "register_operand" "w")
6127 (match_operand:VQ_I 3 "register_operand" "w"))
6128 (match_operand:VQ_I 1 "register_operand" "w")))]
6129 "TARGET_SIMD && TARGET_SHA3"
6130 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6131 [(set_attr "type" "crypto_sha3")]
6132 )
6133
6134 (define_insn "aarch64_rax1qv2di"
6135 [(set (match_operand:V2DI 0 "register_operand" "=w")
6136 (xor:V2DI
6137 (rotate:V2DI
6138 (match_operand:V2DI 2 "register_operand" "w")
6139 (const_int 1))
6140 (match_operand:V2DI 1 "register_operand" "w")))]
6141 "TARGET_SIMD && TARGET_SHA3"
6142 "rax1\\t%0.2d, %1.2d, %2.2d"
6143 [(set_attr "type" "crypto_sha3")]
6144 )
6145
6146 (define_insn "aarch64_xarqv2di"
6147 [(set (match_operand:V2DI 0 "register_operand" "=w")
6148 (rotatert:V2DI
6149 (xor:V2DI
6150 (match_operand:V2DI 1 "register_operand" "%w")
6151 (match_operand:V2DI 2 "register_operand" "w"))
6152 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6153 "TARGET_SIMD && TARGET_SHA3"
6154 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6155 [(set_attr "type" "crypto_sha3")]
6156 )
6157
6158 (define_insn "bcaxq<mode>4"
6159 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6160 (xor:VQ_I
6161 (and:VQ_I
6162 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6163 (match_operand:VQ_I 2 "register_operand" "w"))
6164 (match_operand:VQ_I 1 "register_operand" "w")))]
6165 "TARGET_SIMD && TARGET_SHA3"
6166 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6167 [(set_attr "type" "crypto_sha3")]
6168 )
6169
6170 ;; SM3
6171
6172 (define_insn "aarch64_sm3ss1qv4si"
6173 [(set (match_operand:V4SI 0 "register_operand" "=w")
6174 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6175 (match_operand:V4SI 2 "register_operand" "w")
6176 (match_operand:V4SI 3 "register_operand" "w")]
6177 UNSPEC_SM3SS1))]
6178 "TARGET_SIMD && TARGET_SM4"
6179 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6180 [(set_attr "type" "crypto_sm3")]
6181 )
6182
6183
6184 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6185 [(set (match_operand:V4SI 0 "register_operand" "=w")
6186 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6187 (match_operand:V4SI 2 "register_operand" "w")
6188 (match_operand:V4SI 3 "register_operand" "w")
6189 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6190 CRYPTO_SM3TT))]
6191 "TARGET_SIMD && TARGET_SM4"
6192 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6193 [(set_attr "type" "crypto_sm3")]
6194 )
6195
6196 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6197 [(set (match_operand:V4SI 0 "register_operand" "=w")
6198 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6199 (match_operand:V4SI 2 "register_operand" "w")
6200 (match_operand:V4SI 3 "register_operand" "w")]
6201 CRYPTO_SM3PART))]
6202 "TARGET_SIMD && TARGET_SM4"
6203 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6204 [(set_attr "type" "crypto_sm3")]
6205 )
6206
6207 ;; SM4
6208
6209 (define_insn "aarch64_sm4eqv4si"
6210 [(set (match_operand:V4SI 0 "register_operand" "=w")
6211 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6212 (match_operand:V4SI 2 "register_operand" "w")]
6213 UNSPEC_SM4E))]
6214 "TARGET_SIMD && TARGET_SM4"
6215 "sm4e\\t%0.4s, %2.4s"
6216 [(set_attr "type" "crypto_sm4")]
6217 )
6218
6219 (define_insn "aarch64_sm4ekeyqv4si"
6220 [(set (match_operand:V4SI 0 "register_operand" "=w")
6221 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6222 (match_operand:V4SI 2 "register_operand" "w")]
6223 UNSPEC_SM4EKEY))]
6224 "TARGET_SIMD && TARGET_SM4"
6225 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6226 [(set_attr "type" "crypto_sm4")]
6227 )
6228
6229 ;; fp16fml
6230
6231 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6232 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6233 (unspec:VDQSF
6234 [(match_operand:VDQSF 1 "register_operand" "0")
6235 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6236 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6237 VFMLA16_LOW))]
6238 "TARGET_F16FML"
6239 {
6240 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6241 <nunits> * 2, false);
6242 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6243 <nunits> * 2, false);
6244
6245 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6246 operands[1],
6247 operands[2],
6248 operands[3],
6249 p1, p2));
6250 DONE;
6251
6252 })
6253
6254 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6255 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6256 (unspec:VDQSF
6257 [(match_operand:VDQSF 1 "register_operand" "0")
6258 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6259 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6260 VFMLA16_HIGH))]
6261 "TARGET_F16FML"
6262 {
6263 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6264 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6265
6266 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6267 operands[1],
6268 operands[2],
6269 operands[3],
6270 p1, p2));
6271 DONE;
6272 })
6273
6274 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6275 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6276 (fma:VDQSF
6277 (float_extend:VDQSF
6278 (vec_select:<VFMLA_SEL_W>
6279 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6280 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6281 (float_extend:VDQSF
6282 (vec_select:<VFMLA_SEL_W>
6283 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6284 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6285 (match_operand:VDQSF 1 "register_operand" "0")))]
6286 "TARGET_F16FML"
6287 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6288 [(set_attr "type" "neon_fp_mul_s")]
6289 )
6290
6291 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6292 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6293 (fma:VDQSF
6294 (float_extend:VDQSF
6295 (neg:<VFMLA_SEL_W>
6296 (vec_select:<VFMLA_SEL_W>
6297 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6298 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6299 (float_extend:VDQSF
6300 (vec_select:<VFMLA_SEL_W>
6301 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6302 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6303 (match_operand:VDQSF 1 "register_operand" "0")))]
6304 "TARGET_F16FML"
6305 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6306 [(set_attr "type" "neon_fp_mul_s")]
6307 )
6308
6309 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6310 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6311 (fma:VDQSF
6312 (float_extend:VDQSF
6313 (vec_select:<VFMLA_SEL_W>
6314 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6315 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6316 (float_extend:VDQSF
6317 (vec_select:<VFMLA_SEL_W>
6318 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6319 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6320 (match_operand:VDQSF 1 "register_operand" "0")))]
6321 "TARGET_F16FML"
6322 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6323 [(set_attr "type" "neon_fp_mul_s")]
6324 )
6325
6326 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6327 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6328 (fma:VDQSF
6329 (float_extend:VDQSF
6330 (neg:<VFMLA_SEL_W>
6331 (vec_select:<VFMLA_SEL_W>
6332 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6333 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6334 (float_extend:VDQSF
6335 (vec_select:<VFMLA_SEL_W>
6336 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6337 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6338 (match_operand:VDQSF 1 "register_operand" "0")))]
6339 "TARGET_F16FML"
6340 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6341 [(set_attr "type" "neon_fp_mul_s")]
6342 )
6343
6344 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6345 [(set (match_operand:V2SF 0 "register_operand" "")
6346 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6347 (match_operand:V4HF 2 "register_operand" "")
6348 (match_operand:V4HF 3 "register_operand" "")
6349 (match_operand:SI 4 "aarch64_imm2" "")]
6350 VFMLA16_LOW))]
6351 "TARGET_F16FML"
6352 {
6353 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6354 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6355
6356 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6357 operands[1],
6358 operands[2],
6359 operands[3],
6360 p1, lane));
6361 DONE;
6362 }
6363 )
6364
6365 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6366 [(set (match_operand:V2SF 0 "register_operand" "")
6367 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6368 (match_operand:V4HF 2 "register_operand" "")
6369 (match_operand:V4HF 3 "register_operand" "")
6370 (match_operand:SI 4 "aarch64_imm2" "")]
6371 VFMLA16_HIGH))]
6372 "TARGET_F16FML"
6373 {
6374 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6375 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6376
6377 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6378 operands[1],
6379 operands[2],
6380 operands[3],
6381 p1, lane));
6382 DONE;
6383 })
6384
6385 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6386 [(set (match_operand:V2SF 0 "register_operand" "=w")
6387 (fma:V2SF
6388 (float_extend:V2SF
6389 (vec_select:V2HF
6390 (match_operand:V4HF 2 "register_operand" "w")
6391 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6392 (float_extend:V2SF
6393 (vec_duplicate:V2HF
6394 (vec_select:HF
6395 (match_operand:V4HF 3 "register_operand" "x")
6396 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6397 (match_operand:V2SF 1 "register_operand" "0")))]
6398 "TARGET_F16FML"
6399 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6400 [(set_attr "type" "neon_fp_mul_s")]
6401 )
6402
6403 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6404 [(set (match_operand:V2SF 0 "register_operand" "=w")
6405 (fma:V2SF
6406 (float_extend:V2SF
6407 (neg:V2HF
6408 (vec_select:V2HF
6409 (match_operand:V4HF 2 "register_operand" "w")
6410 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6411 (float_extend:V2SF
6412 (vec_duplicate:V2HF
6413 (vec_select:HF
6414 (match_operand:V4HF 3 "register_operand" "x")
6415 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6416 (match_operand:V2SF 1 "register_operand" "0")))]
6417 "TARGET_F16FML"
6418 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6419 [(set_attr "type" "neon_fp_mul_s")]
6420 )
6421
6422 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6423 [(set (match_operand:V2SF 0 "register_operand" "=w")
6424 (fma:V2SF
6425 (float_extend:V2SF
6426 (vec_select:V2HF
6427 (match_operand:V4HF 2 "register_operand" "w")
6428 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6429 (float_extend:V2SF
6430 (vec_duplicate:V2HF
6431 (vec_select:HF
6432 (match_operand:V4HF 3 "register_operand" "x")
6433 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6434 (match_operand:V2SF 1 "register_operand" "0")))]
6435 "TARGET_F16FML"
6436 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6437 [(set_attr "type" "neon_fp_mul_s")]
6438 )
6439
6440 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6441 [(set (match_operand:V2SF 0 "register_operand" "=w")
6442 (fma:V2SF
6443 (float_extend:V2SF
6444 (neg:V2HF
6445 (vec_select:V2HF
6446 (match_operand:V4HF 2 "register_operand" "w")
6447 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6448 (float_extend:V2SF
6449 (vec_duplicate:V2HF
6450 (vec_select:HF
6451 (match_operand:V4HF 3 "register_operand" "x")
6452 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6453 (match_operand:V2SF 1 "register_operand" "0")))]
6454 "TARGET_F16FML"
6455 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6456 [(set_attr "type" "neon_fp_mul_s")]
6457 )
6458
6459 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6460 [(set (match_operand:V4SF 0 "register_operand" "")
6461 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6462 (match_operand:V8HF 2 "register_operand" "")
6463 (match_operand:V8HF 3 "register_operand" "")
6464 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6465 VFMLA16_LOW))]
6466 "TARGET_F16FML"
6467 {
6468 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6469 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6470
6471 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6472 operands[1],
6473 operands[2],
6474 operands[3],
6475 p1, lane));
6476 DONE;
6477 })
6478
6479 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6480 [(set (match_operand:V4SF 0 "register_operand" "")
6481 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6482 (match_operand:V8HF 2 "register_operand" "")
6483 (match_operand:V8HF 3 "register_operand" "")
6484 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6485 VFMLA16_HIGH))]
6486 "TARGET_F16FML"
6487 {
6488 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6489 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6490
6491 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6492 operands[1],
6493 operands[2],
6494 operands[3],
6495 p1, lane));
6496 DONE;
6497 })
6498
6499 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6500 [(set (match_operand:V4SF 0 "register_operand" "=w")
6501 (fma:V4SF
6502 (float_extend:V4SF
6503 (vec_select:V4HF
6504 (match_operand:V8HF 2 "register_operand" "w")
6505 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6506 (float_extend:V4SF
6507 (vec_duplicate:V4HF
6508 (vec_select:HF
6509 (match_operand:V8HF 3 "register_operand" "x")
6510 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6511 (match_operand:V4SF 1 "register_operand" "0")))]
6512 "TARGET_F16FML"
6513 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6514 [(set_attr "type" "neon_fp_mul_s")]
6515 )
6516
6517 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6518 [(set (match_operand:V4SF 0 "register_operand" "=w")
6519 (fma:V4SF
6520 (float_extend:V4SF
6521 (neg:V4HF
6522 (vec_select:V4HF
6523 (match_operand:V8HF 2 "register_operand" "w")
6524 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6525 (float_extend:V4SF
6526 (vec_duplicate:V4HF
6527 (vec_select:HF
6528 (match_operand:V8HF 3 "register_operand" "x")
6529 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6530 (match_operand:V4SF 1 "register_operand" "0")))]
6531 "TARGET_F16FML"
6532 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6533 [(set_attr "type" "neon_fp_mul_s")]
6534 )
6535
6536 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6537 [(set (match_operand:V4SF 0 "register_operand" "=w")
6538 (fma:V4SF
6539 (float_extend:V4SF
6540 (vec_select:V4HF
6541 (match_operand:V8HF 2 "register_operand" "w")
6542 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6543 (float_extend:V4SF
6544 (vec_duplicate:V4HF
6545 (vec_select:HF
6546 (match_operand:V8HF 3 "register_operand" "x")
6547 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6548 (match_operand:V4SF 1 "register_operand" "0")))]
6549 "TARGET_F16FML"
6550 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6551 [(set_attr "type" "neon_fp_mul_s")]
6552 )
6553
6554 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6555 [(set (match_operand:V4SF 0 "register_operand" "=w")
6556 (fma:V4SF
6557 (float_extend:V4SF
6558 (neg:V4HF
6559 (vec_select:V4HF
6560 (match_operand:V8HF 2 "register_operand" "w")
6561 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6562 (float_extend:V4SF
6563 (vec_duplicate:V4HF
6564 (vec_select:HF
6565 (match_operand:V8HF 3 "register_operand" "x")
6566 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6567 (match_operand:V4SF 1 "register_operand" "0")))]
6568 "TARGET_F16FML"
6569 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6570 [(set_attr "type" "neon_fp_mul_s")]
6571 )
6572
6573 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6574 [(set (match_operand:V2SF 0 "register_operand" "")
6575 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6576 (match_operand:V4HF 2 "register_operand" "")
6577 (match_operand:V8HF 3 "register_operand" "")
6578 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6579 VFMLA16_LOW))]
6580 "TARGET_F16FML"
6581 {
6582 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6583 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6584
6585 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6586 operands[1],
6587 operands[2],
6588 operands[3],
6589 p1, lane));
6590 DONE;
6591
6592 })
6593
6594 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6595 [(set (match_operand:V2SF 0 "register_operand" "")
6596 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6597 (match_operand:V4HF 2 "register_operand" "")
6598 (match_operand:V8HF 3 "register_operand" "")
6599 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6600 VFMLA16_HIGH))]
6601 "TARGET_F16FML"
6602 {
6603 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6604 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6605
6606 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6607 operands[1],
6608 operands[2],
6609 operands[3],
6610 p1, lane));
6611 DONE;
6612
6613 })
6614
6615 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6616 [(set (match_operand:V2SF 0 "register_operand" "=w")
6617 (fma:V2SF
6618 (float_extend:V2SF
6619 (vec_select:V2HF
6620 (match_operand:V4HF 2 "register_operand" "w")
6621 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6622 (float_extend:V2SF
6623 (vec_duplicate:V2HF
6624 (vec_select:HF
6625 (match_operand:V8HF 3 "register_operand" "x")
6626 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6627 (match_operand:V2SF 1 "register_operand" "0")))]
6628 "TARGET_F16FML"
6629 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6630 [(set_attr "type" "neon_fp_mul_s")]
6631 )
6632
6633 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6634 [(set (match_operand:V2SF 0 "register_operand" "=w")
6635 (fma:V2SF
6636 (float_extend:V2SF
6637 (neg:V2HF
6638 (vec_select:V2HF
6639 (match_operand:V4HF 2 "register_operand" "w")
6640 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6641 (float_extend:V2SF
6642 (vec_duplicate:V2HF
6643 (vec_select:HF
6644 (match_operand:V8HF 3 "register_operand" "x")
6645 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6646 (match_operand:V2SF 1 "register_operand" "0")))]
6647 "TARGET_F16FML"
6648 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6649 [(set_attr "type" "neon_fp_mul_s")]
6650 )
6651
6652 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6653 [(set (match_operand:V2SF 0 "register_operand" "=w")
6654 (fma:V2SF
6655 (float_extend:V2SF
6656 (vec_select:V2HF
6657 (match_operand:V4HF 2 "register_operand" "w")
6658 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6659 (float_extend:V2SF
6660 (vec_duplicate:V2HF
6661 (vec_select:HF
6662 (match_operand:V8HF 3 "register_operand" "x")
6663 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664 (match_operand:V2SF 1 "register_operand" "0")))]
6665 "TARGET_F16FML"
6666 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6667 [(set_attr "type" "neon_fp_mul_s")]
6668 )
6669
6670 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6671 [(set (match_operand:V2SF 0 "register_operand" "=w")
6672 (fma:V2SF
6673 (float_extend:V2SF
6674 (neg:V2HF
6675 (vec_select:V2HF
6676 (match_operand:V4HF 2 "register_operand" "w")
6677 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6678 (float_extend:V2SF
6679 (vec_duplicate:V2HF
6680 (vec_select:HF
6681 (match_operand:V8HF 3 "register_operand" "x")
6682 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683 (match_operand:V2SF 1 "register_operand" "0")))]
6684 "TARGET_F16FML"
6685 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6686 [(set_attr "type" "neon_fp_mul_s")]
6687 )
6688
6689 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6690 [(set (match_operand:V4SF 0 "register_operand" "")
6691 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6692 (match_operand:V8HF 2 "register_operand" "")
6693 (match_operand:V4HF 3 "register_operand" "")
6694 (match_operand:SI 4 "aarch64_imm2" "")]
6695 VFMLA16_LOW))]
6696 "TARGET_F16FML"
6697 {
6698 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6699 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6700
6701 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6702 operands[1],
6703 operands[2],
6704 operands[3],
6705 p1, lane));
6706 DONE;
6707 })
6708
6709 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6710 [(set (match_operand:V4SF 0 "register_operand" "")
6711 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6712 (match_operand:V8HF 2 "register_operand" "")
6713 (match_operand:V4HF 3 "register_operand" "")
6714 (match_operand:SI 4 "aarch64_imm2" "")]
6715 VFMLA16_HIGH))]
6716 "TARGET_F16FML"
6717 {
6718 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6719 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6720
6721 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6722 operands[1],
6723 operands[2],
6724 operands[3],
6725 p1, lane));
6726 DONE;
6727 })
6728
6729 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6730 [(set (match_operand:V4SF 0 "register_operand" "=w")
6731 (fma:V4SF
6732 (float_extend:V4SF
6733 (vec_select:V4HF
6734 (match_operand:V8HF 2 "register_operand" "w")
6735 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6736 (float_extend:V4SF
6737 (vec_duplicate:V4HF
6738 (vec_select:HF
6739 (match_operand:V4HF 3 "register_operand" "x")
6740 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6741 (match_operand:V4SF 1 "register_operand" "0")))]
6742 "TARGET_F16FML"
6743 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6744 [(set_attr "type" "neon_fp_mul_s")]
6745 )
6746
6747 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6748 [(set (match_operand:V4SF 0 "register_operand" "=w")
6749 (fma:V4SF
6750 (float_extend:V4SF
6751 (neg:V4HF
6752 (vec_select:V4HF
6753 (match_operand:V8HF 2 "register_operand" "w")
6754 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6755 (float_extend:V4SF
6756 (vec_duplicate:V4HF
6757 (vec_select:HF
6758 (match_operand:V4HF 3 "register_operand" "x")
6759 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6760 (match_operand:V4SF 1 "register_operand" "0")))]
6761 "TARGET_F16FML"
6762 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6763 [(set_attr "type" "neon_fp_mul_s")]
6764 )
6765
6766 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6767 [(set (match_operand:V4SF 0 "register_operand" "=w")
6768 (fma:V4SF
6769 (float_extend:V4SF
6770 (vec_select:V4HF
6771 (match_operand:V8HF 2 "register_operand" "w")
6772 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6773 (float_extend:V4SF
6774 (vec_duplicate:V4HF
6775 (vec_select:HF
6776 (match_operand:V4HF 3 "register_operand" "x")
6777 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6778 (match_operand:V4SF 1 "register_operand" "0")))]
6779 "TARGET_F16FML"
6780 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6781 [(set_attr "type" "neon_fp_mul_s")]
6782 )
6783
6784 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6785 [(set (match_operand:V4SF 0 "register_operand" "=w")
6786 (fma:V4SF
6787 (float_extend:V4SF
6788 (neg:V4HF
6789 (vec_select:V4HF
6790 (match_operand:V8HF 2 "register_operand" "w")
6791 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6792 (float_extend:V4SF
6793 (vec_duplicate:V4HF
6794 (vec_select:HF
6795 (match_operand:V4HF 3 "register_operand" "x")
6796 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6797 (match_operand:V4SF 1 "register_operand" "0")))]
6798 "TARGET_F16FML"
6799 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6800 [(set_attr "type" "neon_fp_mul_s")]
6801 )
6802
6803 ;; pmull
6804
6805 (define_insn "aarch64_crypto_pmulldi"
6806 [(set (match_operand:TI 0 "register_operand" "=w")
6807 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6808 (match_operand:DI 2 "register_operand" "w")]
6809 UNSPEC_PMULL))]
6810 "TARGET_SIMD && TARGET_AES"
6811 "pmull\\t%0.1q, %1.1d, %2.1d"
6812 [(set_attr "type" "crypto_pmull")]
6813 )
6814
6815 (define_insn "aarch64_crypto_pmullv2di"
6816 [(set (match_operand:TI 0 "register_operand" "=w")
6817 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6818 (match_operand:V2DI 2 "register_operand" "w")]
6819 UNSPEC_PMULL2))]
6820 "TARGET_SIMD && TARGET_AES"
6821 "pmull2\\t%0.1q, %1.2d, %2.2d"
6822 [(set_attr "type" "crypto_pmull")]
6823 )