]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %1"
219 [(set_attr "type" "neon_ldp_q")]
220 )
221
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %0"
232 [(set_attr "type" "neon_stp_q")]
233 )
234
235
236 (define_split
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
242 [(const_int 0)]
243 {
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245 DONE;
246 })
247
248 (define_split
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254 [(const_int 0)]
255 {
256 aarch64_split_simd_move (operands[0], operands[1]);
257 DONE;
258 })
259
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
263 "TARGET_SIMD"
264 {
265 rtx dst = operands[0];
266 rtx src = operands[1];
267
268 if (GP_REGNUM_P (REGNO (src)))
269 {
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
272
273 emit_insn
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
275 emit_insn
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
277 }
278
279 else
280 {
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285
286 emit_insn
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
288 emit_insn
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
290 }
291 DONE;
292 }
293 )
294
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297 (vec_select:<VHALF>
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
301 "umov\t%0, %1.d[0]"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
304 ])
305
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308 (vec_select:<VHALF>
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
312 "umov\t%0, %1.d[1]"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
315 ])
316
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
324 )
325
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
333 )
334
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
339 "TARGET_SIMD"
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
342 )
343
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
348 "TARGET_SIMD"
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
351 )
352
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
357 "TARGET_SIMD"
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
360 )
361
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365 "TARGET_SIMD"
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
368 )
369
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373 UNSPEC_RBIT))]
374 "TARGET_SIMD"
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
377 )
378
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
382 "TARGET_SIMD"
383 {
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386 <MODE>mode, 0);
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389 DONE;
390 }
391 )
392
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
397 "TARGET_SIMD"
398 {
399
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
404
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
407
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
409
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
413
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
418 DONE;
419 }
420 )
421
422 ;; These instructions map to the __builtins for the Dot Product operations.
423 (define_insn "aarch64_<sur>dot<vsi2qi>"
424 [(set (match_operand:VS 0 "register_operand" "=w")
425 (plus:VS (match_operand:VS 1 "register_operand" "0")
426 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
427 (match_operand:<VSI2QI> 3 "register_operand" "w")]
428 DOTPROD)))]
429 "TARGET_DOTPROD"
430 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
431 [(set_attr "type" "neon_dot")]
432 )
433
434 ;; These expands map to the Dot Product optab the vectorizer checks for.
435 ;; The auto-vectorizer expects a dot product builtin that also does an
436 ;; accumulation into the provided register.
437 ;; Given the following pattern
438 ;;
439 ;; for (i=0; i<len; i++) {
440 ;; c = a[i] * b[i];
441 ;; r += c;
442 ;; }
443 ;; return result;
444 ;;
445 ;; This can be auto-vectorized to
446 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
447 ;;
448 ;; given enough iterations. However the vectorizer can keep unrolling the loop
449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
451 ;; ...
452 ;;
453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
454 (define_expand "<sur>dot_prod<vsi2qi>"
455 [(set (match_operand:VS 0 "register_operand")
456 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
457 (match_operand:<VSI2QI> 2 "register_operand")]
458 DOTPROD)
459 (match_operand:VS 3 "register_operand")))]
460 "TARGET_DOTPROD"
461 {
462 emit_insn (
463 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
464 operands[2]));
465 emit_insn (gen_rtx_SET (operands[0], operands[3]));
466 DONE;
467 })
468
469 ;; These instructions map to the __builtins for the Dot Product
470 ;; indexed operations.
471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
472 [(set (match_operand:VS 0 "register_operand" "=w")
473 (plus:VS (match_operand:VS 1 "register_operand" "0")
474 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
475 (match_operand:V8QI 3 "register_operand" "<h_con>")
476 (match_operand:SI 4 "immediate_operand" "i")]
477 DOTPROD)))]
478 "TARGET_DOTPROD"
479 {
480 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
481 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
482 }
483 [(set_attr "type" "neon_dot")]
484 )
485
486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
487 [(set (match_operand:VS 0 "register_operand" "=w")
488 (plus:VS (match_operand:VS 1 "register_operand" "0")
489 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
490 (match_operand:V16QI 3 "register_operand" "<h_con>")
491 (match_operand:SI 4 "immediate_operand" "i")]
492 DOTPROD)))]
493 "TARGET_DOTPROD"
494 {
495 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
496 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
497 }
498 [(set_attr "type" "neon_dot")]
499 )
500
501 (define_expand "copysign<mode>3"
502 [(match_operand:VHSDF 0 "register_operand")
503 (match_operand:VHSDF 1 "register_operand")
504 (match_operand:VHSDF 2 "register_operand")]
505 "TARGET_FLOAT && TARGET_SIMD"
506 {
507 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
508 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
509
510 emit_move_insn (v_bitmask,
511 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
512 HOST_WIDE_INT_M1U << bits));
513 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
514 operands[2], operands[1]));
515 DONE;
516 }
517 )
518
519 (define_insn "*aarch64_mul3_elt<mode>"
520 [(set (match_operand:VMUL 0 "register_operand" "=w")
521 (mult:VMUL
522 (vec_duplicate:VMUL
523 (vec_select:<VEL>
524 (match_operand:VMUL 1 "register_operand" "<h_con>")
525 (parallel [(match_operand:SI 2 "immediate_operand")])))
526 (match_operand:VMUL 3 "register_operand" "w")))]
527 "TARGET_SIMD"
528 {
529 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
530 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
531 }
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
533 )
534
535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
536 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
537 (mult:VMUL_CHANGE_NLANES
538 (vec_duplicate:VMUL_CHANGE_NLANES
539 (vec_select:<VEL>
540 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
541 (parallel [(match_operand:SI 2 "immediate_operand")])))
542 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
543 "TARGET_SIMD"
544 {
545 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
546 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
547 }
548 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
549 )
550
551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
552 [(set (match_operand:VMUL 0 "register_operand" "=w")
553 (mult:VMUL
554 (vec_duplicate:VMUL
555 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
556 (match_operand:VMUL 2 "register_operand" "w")))]
557 "TARGET_SIMD"
558 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
559 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
560 )
561
562 (define_insn "@aarch64_rsqrte<mode>"
563 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
564 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
565 UNSPEC_RSQRTE))]
566 "TARGET_SIMD"
567 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
568 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
569
570 (define_insn "@aarch64_rsqrts<mode>"
571 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
572 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
574 UNSPEC_RSQRTS))]
575 "TARGET_SIMD"
576 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
577 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
578
579 (define_expand "rsqrt<mode>2"
580 [(set (match_operand:VALLF 0 "register_operand" "=w")
581 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
582 UNSPEC_RSQRT))]
583 "TARGET_SIMD"
584 {
585 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
586 DONE;
587 })
588
589 (define_insn "*aarch64_mul3_elt_to_64v2df"
590 [(set (match_operand:DF 0 "register_operand" "=w")
591 (mult:DF
592 (vec_select:DF
593 (match_operand:V2DF 1 "register_operand" "w")
594 (parallel [(match_operand:SI 2 "immediate_operand")]))
595 (match_operand:DF 3 "register_operand" "w")))]
596 "TARGET_SIMD"
597 {
598 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
599 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
600 }
601 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
602 )
603
604 (define_insn "neg<mode>2"
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
607 "TARGET_SIMD"
608 "neg\t%0.<Vtype>, %1.<Vtype>"
609 [(set_attr "type" "neon_neg<q>")]
610 )
611
612 (define_insn "abs<mode>2"
613 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
614 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
615 "TARGET_SIMD"
616 "abs\t%0.<Vtype>, %1.<Vtype>"
617 [(set_attr "type" "neon_abs<q>")]
618 )
619
620 ;; The intrinsic version of integer ABS must not be allowed to
621 ;; combine with any operation with an integerated ABS step, such
622 ;; as SABD.
623 (define_insn "aarch64_abs<mode>"
624 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
625 (unspec:VSDQ_I_DI
626 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
627 UNSPEC_ABS))]
628 "TARGET_SIMD"
629 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
630 [(set_attr "type" "neon_abs<q>")]
631 )
632
633 (define_insn "abd<mode>_3"
634 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
635 (abs:VDQ_BHSI (minus:VDQ_BHSI
636 (match_operand:VDQ_BHSI 1 "register_operand" "w")
637 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
638 "TARGET_SIMD"
639 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
640 [(set_attr "type" "neon_abd<q>")]
641 )
642
643 (define_insn "aarch64_<sur>abdl2<mode>_3"
644 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
645 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
646 (match_operand:VDQV_S 2 "register_operand" "w")]
647 ABDL2))]
648 "TARGET_SIMD"
649 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
650 [(set_attr "type" "neon_abd<q>")]
651 )
652
653 (define_insn "aarch64_<sur>abal<mode>_4"
654 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
655 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
656 (match_operand:VDQV_S 2 "register_operand" "w")
657 (match_operand:<VDBLW> 3 "register_operand" "0")]
658 ABAL))]
659 "TARGET_SIMD"
660 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
661 [(set_attr "type" "neon_arith_acc<q>")]
662 )
663
664 (define_insn "aarch64_<sur>adalp<mode>_3"
665 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
666 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
667 (match_operand:<VDBLW> 2 "register_operand" "0")]
668 ADALP))]
669 "TARGET_SIMD"
670 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
671 [(set_attr "type" "neon_reduc_add<q>")]
672 )
673
674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
675 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
676 ;; reduction of the difference into a V4SI vector and accumulate that into
677 ;; operand 3 before copying that into the result operand 0.
678 ;; Perform that with a sequence of:
679 ;; UABDL2 tmp.8h, op1.16b, op2.16b
680 ;; UABAL tmp.8h, op1.16b, op2.16b
681 ;; UADALP op3.4s, tmp.8h
682 ;; MOV op0, op3 // should be eliminated in later passes.
683 ;; The signed version just uses the signed variants of the above instructions.
684
685 (define_expand "<sur>sadv16qi"
686 [(use (match_operand:V4SI 0 "register_operand"))
687 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
688 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
689 (use (match_operand:V4SI 3 "register_operand"))]
690 "TARGET_SIMD"
691 {
692 rtx reduc = gen_reg_rtx (V8HImode);
693 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
694 operands[2]));
695 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
696 operands[2], reduc));
697 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
698 operands[3]));
699 emit_move_insn (operands[0], operands[3]);
700 DONE;
701 }
702 )
703
704 (define_insn "aba<mode>_3"
705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
706 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
707 (match_operand:VDQ_BHSI 1 "register_operand" "w")
708 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
709 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
710 "TARGET_SIMD"
711 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
712 [(set_attr "type" "neon_arith_acc<q>")]
713 )
714
715 (define_insn "fabd<mode>3"
716 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
717 (abs:VHSDF_HSDF
718 (minus:VHSDF_HSDF
719 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
720 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
721 "TARGET_SIMD"
722 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
723 [(set_attr "type" "neon_fp_abd_<stype><q>")]
724 )
725
726 ;; For AND (vector, register) and BIC (vector, immediate)
727 (define_insn "and<mode>3"
728 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
729 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
730 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
731 "TARGET_SIMD"
732 {
733 switch (which_alternative)
734 {
735 case 0:
736 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
737 case 1:
738 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
739 AARCH64_CHECK_BIC);
740 default:
741 gcc_unreachable ();
742 }
743 }
744 [(set_attr "type" "neon_logic<q>")]
745 )
746
747 ;; For ORR (vector, register) and ORR (vector, immediate)
748 (define_insn "ior<mode>3"
749 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
750 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
751 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
752 "TARGET_SIMD"
753 {
754 switch (which_alternative)
755 {
756 case 0:
757 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
758 case 1:
759 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
760 AARCH64_CHECK_ORR);
761 default:
762 gcc_unreachable ();
763 }
764 }
765 [(set_attr "type" "neon_logic<q>")]
766 )
767
768 (define_insn "xor<mode>3"
769 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771 (match_operand:VDQ_I 2 "register_operand" "w")))]
772 "TARGET_SIMD"
773 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
774 [(set_attr "type" "neon_logic<q>")]
775 )
776
777 (define_insn "one_cmpl<mode>2"
778 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
780 "TARGET_SIMD"
781 "not\t%0.<Vbtype>, %1.<Vbtype>"
782 [(set_attr "type" "neon_logic<q>")]
783 )
784
785 (define_insn "aarch64_simd_vec_set<mode>"
786 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
787 (vec_merge:VALL_F16
788 (vec_duplicate:VALL_F16
789 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
790 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
791 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
792 "TARGET_SIMD"
793 {
794 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
796 switch (which_alternative)
797 {
798 case 0:
799 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
800 case 1:
801 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
802 case 2:
803 return "ld1\\t{%0.<Vetype>}[%p2], %1";
804 default:
805 gcc_unreachable ();
806 }
807 }
808 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
809 )
810
811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
812 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
813 (vec_merge:VALL_F16
814 (vec_duplicate:VALL_F16
815 (vec_select:<VEL>
816 (match_operand:VALL_F16 3 "register_operand" "w")
817 (parallel
818 [(match_operand:SI 4 "immediate_operand" "i")])))
819 (match_operand:VALL_F16 1 "register_operand" "0")
820 (match_operand:SI 2 "immediate_operand" "i")))]
821 "TARGET_SIMD"
822 {
823 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
824 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
825 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
826
827 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
828 }
829 [(set_attr "type" "neon_ins<q>")]
830 )
831
832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
833 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
834 (vec_merge:VALL_F16_NO_V2Q
835 (vec_duplicate:VALL_F16_NO_V2Q
836 (vec_select:<VEL>
837 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
838 (parallel
839 [(match_operand:SI 4 "immediate_operand" "i")])))
840 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
841 (match_operand:SI 2 "immediate_operand" "i")))]
842 "TARGET_SIMD"
843 {
844 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
845 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
846 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
847 INTVAL (operands[4]));
848
849 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
850 }
851 [(set_attr "type" "neon_ins<q>")]
852 )
853
854 (define_insn "aarch64_simd_lshr<mode>"
855 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
856 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
857 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
858 "TARGET_SIMD"
859 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
860 [(set_attr "type" "neon_shift_imm<q>")]
861 )
862
863 (define_insn "aarch64_simd_ashr<mode>"
864 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
865 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
866 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
867 "TARGET_SIMD"
868 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
869 [(set_attr "type" "neon_shift_imm<q>")]
870 )
871
872 (define_insn "aarch64_simd_imm_shl<mode>"
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
876 "TARGET_SIMD"
877 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
878 [(set_attr "type" "neon_shift_imm<q>")]
879 )
880
881 (define_insn "aarch64_simd_reg_sshl<mode>"
882 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
883 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
884 (match_operand:VDQ_I 2 "register_operand" "w")))]
885 "TARGET_SIMD"
886 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
887 [(set_attr "type" "neon_shift_reg<q>")]
888 )
889
890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
891 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
892 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
893 (match_operand:VDQ_I 2 "register_operand" "w")]
894 UNSPEC_ASHIFT_UNSIGNED))]
895 "TARGET_SIMD"
896 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
897 [(set_attr "type" "neon_shift_reg<q>")]
898 )
899
900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
901 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
902 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
903 (match_operand:VDQ_I 2 "register_operand" "w")]
904 UNSPEC_ASHIFT_SIGNED))]
905 "TARGET_SIMD"
906 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
907 [(set_attr "type" "neon_shift_reg<q>")]
908 )
909
910 (define_expand "ashl<mode>3"
911 [(match_operand:VDQ_I 0 "register_operand" "")
912 (match_operand:VDQ_I 1 "register_operand" "")
913 (match_operand:SI 2 "general_operand" "")]
914 "TARGET_SIMD"
915 {
916 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
917 int shift_amount;
918
919 if (CONST_INT_P (operands[2]))
920 {
921 shift_amount = INTVAL (operands[2]);
922 if (shift_amount >= 0 && shift_amount < bit_width)
923 {
924 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
925 shift_amount);
926 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
927 operands[1],
928 tmp));
929 DONE;
930 }
931 else
932 {
933 operands[2] = force_reg (SImode, operands[2]);
934 }
935 }
936 else if (MEM_P (operands[2]))
937 {
938 operands[2] = force_reg (SImode, operands[2]);
939 }
940
941 if (REG_P (operands[2]))
942 {
943 rtx tmp = gen_reg_rtx (<MODE>mode);
944 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
945 convert_to_mode (<VEL>mode,
946 operands[2],
947 0)));
948 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
949 tmp));
950 DONE;
951 }
952 else
953 FAIL;
954 }
955 )
956
957 (define_expand "lshr<mode>3"
958 [(match_operand:VDQ_I 0 "register_operand" "")
959 (match_operand:VDQ_I 1 "register_operand" "")
960 (match_operand:SI 2 "general_operand" "")]
961 "TARGET_SIMD"
962 {
963 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
964 int shift_amount;
965
966 if (CONST_INT_P (operands[2]))
967 {
968 shift_amount = INTVAL (operands[2]);
969 if (shift_amount > 0 && shift_amount <= bit_width)
970 {
971 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
972 shift_amount);
973 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
974 operands[1],
975 tmp));
976 DONE;
977 }
978 else
979 operands[2] = force_reg (SImode, operands[2]);
980 }
981 else if (MEM_P (operands[2]))
982 {
983 operands[2] = force_reg (SImode, operands[2]);
984 }
985
986 if (REG_P (operands[2]))
987 {
988 rtx tmp = gen_reg_rtx (SImode);
989 rtx tmp1 = gen_reg_rtx (<MODE>mode);
990 emit_insn (gen_negsi2 (tmp, operands[2]));
991 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
992 convert_to_mode (<VEL>mode,
993 tmp, 0)));
994 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
995 operands[1],
996 tmp1));
997 DONE;
998 }
999 else
1000 FAIL;
1001 }
1002 )
1003
1004 (define_expand "ashr<mode>3"
1005 [(match_operand:VDQ_I 0 "register_operand" "")
1006 (match_operand:VDQ_I 1 "register_operand" "")
1007 (match_operand:SI 2 "general_operand" "")]
1008 "TARGET_SIMD"
1009 {
1010 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1011 int shift_amount;
1012
1013 if (CONST_INT_P (operands[2]))
1014 {
1015 shift_amount = INTVAL (operands[2]);
1016 if (shift_amount > 0 && shift_amount <= bit_width)
1017 {
1018 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1019 shift_amount);
1020 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1021 operands[1],
1022 tmp));
1023 DONE;
1024 }
1025 else
1026 operands[2] = force_reg (SImode, operands[2]);
1027 }
1028 else if (MEM_P (operands[2]))
1029 {
1030 operands[2] = force_reg (SImode, operands[2]);
1031 }
1032
1033 if (REG_P (operands[2]))
1034 {
1035 rtx tmp = gen_reg_rtx (SImode);
1036 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037 emit_insn (gen_negsi2 (tmp, operands[2]));
1038 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039 convert_to_mode (<VEL>mode,
1040 tmp, 0)));
1041 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1042 operands[1],
1043 tmp1));
1044 DONE;
1045 }
1046 else
1047 FAIL;
1048 }
1049 )
1050
1051 (define_expand "vashl<mode>3"
1052 [(match_operand:VDQ_I 0 "register_operand" "")
1053 (match_operand:VDQ_I 1 "register_operand" "")
1054 (match_operand:VDQ_I 2 "register_operand" "")]
1055 "TARGET_SIMD"
1056 {
1057 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1058 operands[2]));
1059 DONE;
1060 })
1061
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067 (match_operand:VDQ_BHSI 1 "register_operand" "")
1068 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1069 "TARGET_SIMD"
1070 {
1071 rtx neg = gen_reg_rtx (<MODE>mode);
1072 emit (gen_neg<mode>2 (neg, operands[2]));
1073 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1074 neg));
1075 DONE;
1076 })
1077
1078 ;; DI vector shift
1079 (define_expand "aarch64_ashr_simddi"
1080 [(match_operand:DI 0 "register_operand" "=w")
1081 (match_operand:DI 1 "register_operand" "w")
1082 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1083 "TARGET_SIMD"
1084 {
1085 /* An arithmetic shift right by 64 fills the result with copies of the sign
1086 bit, just like asr by 63 - however the standard pattern does not handle
1087 a shift by 64. */
1088 if (INTVAL (operands[2]) == 64)
1089 operands[2] = GEN_INT (63);
1090 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1091 DONE;
1092 }
1093 )
1094
1095 (define_expand "vlshr<mode>3"
1096 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097 (match_operand:VDQ_BHSI 1 "register_operand" "")
1098 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1099 "TARGET_SIMD"
1100 {
1101 rtx neg = gen_reg_rtx (<MODE>mode);
1102 emit (gen_neg<mode>2 (neg, operands[2]));
1103 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1104 neg));
1105 DONE;
1106 })
1107
1108 (define_expand "aarch64_lshr_simddi"
1109 [(match_operand:DI 0 "register_operand" "=w")
1110 (match_operand:DI 1 "register_operand" "w")
1111 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1112 "TARGET_SIMD"
1113 {
1114 if (INTVAL (operands[2]) == 64)
1115 emit_move_insn (operands[0], const0_rtx);
1116 else
1117 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1118 DONE;
1119 }
1120 )
1121
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124 [(set (match_operand:VD 0 "register_operand" "=w")
1125 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126 (match_operand:SI 2 "immediate_operand" "i")]
1127 UNSPEC_VEC_SHR))]
1128 "TARGET_SIMD"
1129 {
1130 if (BYTES_BIG_ENDIAN)
1131 return "shl %d0, %d1, %2";
1132 else
1133 return "ushr %d0, %d1, %2";
1134 }
1135 [(set_attr "type" "neon_shift_imm")]
1136 )
1137
1138 (define_expand "vec_set<mode>"
1139 [(match_operand:VALL_F16 0 "register_operand" "+w")
1140 (match_operand:<VEL> 1 "register_operand" "w")
1141 (match_operand:SI 2 "immediate_operand" "")]
1142 "TARGET_SIMD"
1143 {
1144 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146 GEN_INT (elem), operands[0]));
1147 DONE;
1148 }
1149 )
1150
1151
1152 (define_insn "aarch64_mla<mode>"
1153 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154 (plus:VDQ_BHSI (mult:VDQ_BHSI
1155 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1158 "TARGET_SIMD"
1159 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160 [(set_attr "type" "neon_mla_<Vetype><q>")]
1161 )
1162
1163 (define_insn "*aarch64_mla_elt<mode>"
1164 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1165 (plus:VDQHS
1166 (mult:VDQHS
1167 (vec_duplicate:VDQHS
1168 (vec_select:<VEL>
1169 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170 (parallel [(match_operand:SI 2 "immediate_operand")])))
1171 (match_operand:VDQHS 3 "register_operand" "w"))
1172 (match_operand:VDQHS 4 "register_operand" "0")))]
1173 "TARGET_SIMD"
1174 {
1175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1177 }
1178 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1179 )
1180
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1183 (plus:VDQHS
1184 (mult:VDQHS
1185 (vec_duplicate:VDQHS
1186 (vec_select:<VEL>
1187 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188 (parallel [(match_operand:SI 2 "immediate_operand")])))
1189 (match_operand:VDQHS 3 "register_operand" "w"))
1190 (match_operand:VDQHS 4 "register_operand" "0")))]
1191 "TARGET_SIMD"
1192 {
1193 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1195 }
1196 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1197 )
1198
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201 (plus:VDQHS
1202 (mult:VDQHS (vec_duplicate:VDQHS
1203 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204 (match_operand:VDQHS 2 "register_operand" "w"))
1205 (match_operand:VDQHS 3 "register_operand" "0")))]
1206 "TARGET_SIMD"
1207 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "aarch64_mls<mode>"
1212 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1216 "TARGET_SIMD"
1217 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218 [(set_attr "type" "neon_mla_<Vetype><q>")]
1219 )
1220
1221 (define_insn "*aarch64_mls_elt<mode>"
1222 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1223 (minus:VDQHS
1224 (match_operand:VDQHS 4 "register_operand" "0")
1225 (mult:VDQHS
1226 (vec_duplicate:VDQHS
1227 (vec_select:<VEL>
1228 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229 (parallel [(match_operand:SI 2 "immediate_operand")])))
1230 (match_operand:VDQHS 3 "register_operand" "w"))))]
1231 "TARGET_SIMD"
1232 {
1233 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1235 }
1236 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1237 )
1238
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1241 (minus:VDQHS
1242 (match_operand:VDQHS 4 "register_operand" "0")
1243 (mult:VDQHS
1244 (vec_duplicate:VDQHS
1245 (vec_select:<VEL>
1246 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247 (parallel [(match_operand:SI 2 "immediate_operand")])))
1248 (match_operand:VDQHS 3 "register_operand" "w"))))]
1249 "TARGET_SIMD"
1250 {
1251 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1253 }
1254 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1255 )
1256
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1259 (minus:VDQHS
1260 (match_operand:VDQHS 1 "register_operand" "0")
1261 (mult:VDQHS (vec_duplicate:VDQHS
1262 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263 (match_operand:VDQHS 3 "register_operand" "w"))))]
1264 "TARGET_SIMD"
1265 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1267 )
1268
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1274 "TARGET_SIMD"
1275 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276 [(set_attr "type" "neon_minmax<q>")]
1277 )
1278
1279 (define_expand "<su><maxmin>v2di3"
1280 [(set (match_operand:V2DI 0 "register_operand" "")
1281 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282 (match_operand:V2DI 2 "register_operand" "")))]
1283 "TARGET_SIMD"
1284 {
1285 enum rtx_code cmp_operator;
1286 rtx cmp_fmt;
1287
1288 switch (<CODE>)
1289 {
1290 case UMIN:
1291 cmp_operator = LTU;
1292 break;
1293 case SMIN:
1294 cmp_operator = LT;
1295 break;
1296 case UMAX:
1297 cmp_operator = GTU;
1298 break;
1299 case SMAX:
1300 cmp_operator = GT;
1301 break;
1302 default:
1303 gcc_unreachable ();
1304 }
1305
1306 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308 operands[2], cmp_fmt, operands[1], operands[2]));
1309 DONE;
1310 })
1311
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1317 MAXMINV))]
1318 "TARGET_SIMD"
1319 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320 [(set_attr "type" "neon_minmax<q>")]
1321 )
1322
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327 (match_operand:VHSDF 2 "register_operand" "w")]
1328 FMAXMINV))]
1329 "TARGET_SIMD"
1330 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331 [(set_attr "type" "neon_minmax<q>")]
1332 )
1333
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1339
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1343
1344 (define_insn "move_lo_quad_internal_<mode>"
1345 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1346 (vec_concat:VQ_NO2E
1347 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348 (vec_duplicate:<VHALF> (const_int 0))))]
1349 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1350 "@
1351 dup\\t%d0, %1.d[0]
1352 fmov\\t%d0, %1
1353 dup\\t%d0, %1"
1354 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355 (set_attr "length" "4")
1356 (set_attr "arch" "simd,fp,simd")]
1357 )
1358
1359 (define_insn "move_lo_quad_internal_<mode>"
1360 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1361 (vec_concat:VQ_2E
1362 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1363 (const_int 0)))]
1364 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1365 "@
1366 dup\\t%d0, %1.d[0]
1367 fmov\\t%d0, %1
1368 dup\\t%d0, %1"
1369 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1370 (set_attr "length" "4")
1371 (set_attr "arch" "simd,fp,simd")]
1372 )
1373
1374 (define_insn "move_lo_quad_internal_be_<mode>"
1375 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1376 (vec_concat:VQ_NO2E
1377 (vec_duplicate:<VHALF> (const_int 0))
1378 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1379 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1380 "@
1381 dup\\t%d0, %1.d[0]
1382 fmov\\t%d0, %1
1383 dup\\t%d0, %1"
1384 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1385 (set_attr "length" "4")
1386 (set_attr "arch" "simd,fp,simd")]
1387 )
1388
1389 (define_insn "move_lo_quad_internal_be_<mode>"
1390 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1391 (vec_concat:VQ_2E
1392 (const_int 0)
1393 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1394 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1395 "@
1396 dup\\t%d0, %1.d[0]
1397 fmov\\t%d0, %1
1398 dup\\t%d0, %1"
1399 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1400 (set_attr "length" "4")
1401 (set_attr "arch" "simd,fp,simd")]
1402 )
1403
1404 (define_expand "move_lo_quad_<mode>"
1405 [(match_operand:VQ 0 "register_operand")
1406 (match_operand:VQ 1 "register_operand")]
1407 "TARGET_SIMD"
1408 {
1409 if (BYTES_BIG_ENDIAN)
1410 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1411 else
1412 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1413 DONE;
1414 }
1415 )
1416
1417 ;; Move operand1 to the high architectural bits of the register, keeping
1418 ;; the low architectural bits of operand2.
1419 ;; For little-endian this is { operand2, operand1 }
1420 ;; For big-endian this is { operand1, operand2 }
1421
1422 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1423 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1424 (vec_concat:VQ
1425 (vec_select:<VHALF>
1426 (match_dup 0)
1427 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1428 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1429 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1430 "@
1431 ins\\t%0.d[1], %1.d[0]
1432 ins\\t%0.d[1], %1"
1433 [(set_attr "type" "neon_ins")]
1434 )
1435
1436 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1437 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1438 (vec_concat:VQ
1439 (match_operand:<VHALF> 1 "register_operand" "w,r")
1440 (vec_select:<VHALF>
1441 (match_dup 0)
1442 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1443 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1444 "@
1445 ins\\t%0.d[1], %1.d[0]
1446 ins\\t%0.d[1], %1"
1447 [(set_attr "type" "neon_ins")]
1448 )
1449
1450 (define_expand "move_hi_quad_<mode>"
1451 [(match_operand:VQ 0 "register_operand" "")
1452 (match_operand:<VHALF> 1 "register_operand" "")]
1453 "TARGET_SIMD"
1454 {
1455 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1456 if (BYTES_BIG_ENDIAN)
1457 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1458 operands[1], p));
1459 else
1460 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1461 operands[1], p));
1462 DONE;
1463 })
1464
1465 ;; Narrowing operations.
1466
1467 ;; For doubles.
1468 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1469 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1470 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1471 "TARGET_SIMD"
1472 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1473 [(set_attr "type" "neon_shift_imm_narrow_q")]
1474 )
1475
1476 (define_expand "vec_pack_trunc_<mode>"
1477 [(match_operand:<VNARROWD> 0 "register_operand" "")
1478 (match_operand:VDN 1 "register_operand" "")
1479 (match_operand:VDN 2 "register_operand" "")]
1480 "TARGET_SIMD"
1481 {
1482 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1483 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1484 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1485
1486 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1487 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1488 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1489 DONE;
1490 })
1491
1492 ;; For quads.
1493
1494 (define_insn "vec_pack_trunc_<mode>"
1495 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1496 (vec_concat:<VNARROWQ2>
1497 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1498 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1499 "TARGET_SIMD"
1500 {
1501 if (BYTES_BIG_ENDIAN)
1502 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1503 else
1504 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1505 }
1506 [(set_attr "type" "multiple")
1507 (set_attr "length" "8")]
1508 )
1509
1510 ;; Widening operations.
1511
1512 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1513 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1514 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1515 (match_operand:VQW 1 "register_operand" "w")
1516 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1517 )))]
1518 "TARGET_SIMD"
1519 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1520 [(set_attr "type" "neon_shift_imm_long")]
1521 )
1522
1523 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1524 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1525 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1526 (match_operand:VQW 1 "register_operand" "w")
1527 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1528 )))]
1529 "TARGET_SIMD"
1530 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1531 [(set_attr "type" "neon_shift_imm_long")]
1532 )
1533
1534 (define_expand "vec_unpack<su>_hi_<mode>"
1535 [(match_operand:<VWIDE> 0 "register_operand" "")
1536 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1537 "TARGET_SIMD"
1538 {
1539 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1540 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1541 operands[1], p));
1542 DONE;
1543 }
1544 )
1545
1546 (define_expand "vec_unpack<su>_lo_<mode>"
1547 [(match_operand:<VWIDE> 0 "register_operand" "")
1548 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1549 "TARGET_SIMD"
1550 {
1551 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1552 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1553 operands[1], p));
1554 DONE;
1555 }
1556 )
1557
1558 ;; Widening arithmetic.
1559
1560 (define_insn "*aarch64_<su>mlal_lo<mode>"
1561 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1562 (plus:<VWIDE>
1563 (mult:<VWIDE>
1564 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1565 (match_operand:VQW 2 "register_operand" "w")
1566 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1567 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1568 (match_operand:VQW 4 "register_operand" "w")
1569 (match_dup 3))))
1570 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1571 "TARGET_SIMD"
1572 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1573 [(set_attr "type" "neon_mla_<Vetype>_long")]
1574 )
1575
1576 (define_insn "*aarch64_<su>mlal_hi<mode>"
1577 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1578 (plus:<VWIDE>
1579 (mult:<VWIDE>
1580 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581 (match_operand:VQW 2 "register_operand" "w")
1582 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1583 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584 (match_operand:VQW 4 "register_operand" "w")
1585 (match_dup 3))))
1586 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1587 "TARGET_SIMD"
1588 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1589 [(set_attr "type" "neon_mla_<Vetype>_long")]
1590 )
1591
1592 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1593 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1594 (minus:<VWIDE>
1595 (match_operand:<VWIDE> 1 "register_operand" "0")
1596 (mult:<VWIDE>
1597 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1598 (match_operand:VQW 2 "register_operand" "w")
1599 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1600 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1601 (match_operand:VQW 4 "register_operand" "w")
1602 (match_dup 3))))))]
1603 "TARGET_SIMD"
1604 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1605 [(set_attr "type" "neon_mla_<Vetype>_long")]
1606 )
1607
1608 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1609 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1610 (minus:<VWIDE>
1611 (match_operand:<VWIDE> 1 "register_operand" "0")
1612 (mult:<VWIDE>
1613 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1614 (match_operand:VQW 2 "register_operand" "w")
1615 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1616 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1617 (match_operand:VQW 4 "register_operand" "w")
1618 (match_dup 3))))))]
1619 "TARGET_SIMD"
1620 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1621 [(set_attr "type" "neon_mla_<Vetype>_long")]
1622 )
1623
1624 (define_insn "*aarch64_<su>mlal<mode>"
1625 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1626 (plus:<VWIDE>
1627 (mult:<VWIDE>
1628 (ANY_EXTEND:<VWIDE>
1629 (match_operand:VD_BHSI 1 "register_operand" "w"))
1630 (ANY_EXTEND:<VWIDE>
1631 (match_operand:VD_BHSI 2 "register_operand" "w")))
1632 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1633 "TARGET_SIMD"
1634 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1635 [(set_attr "type" "neon_mla_<Vetype>_long")]
1636 )
1637
1638 (define_insn "*aarch64_<su>mlsl<mode>"
1639 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1640 (minus:<VWIDE>
1641 (match_operand:<VWIDE> 1 "register_operand" "0")
1642 (mult:<VWIDE>
1643 (ANY_EXTEND:<VWIDE>
1644 (match_operand:VD_BHSI 2 "register_operand" "w"))
1645 (ANY_EXTEND:<VWIDE>
1646 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1647 "TARGET_SIMD"
1648 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1649 [(set_attr "type" "neon_mla_<Vetype>_long")]
1650 )
1651
1652 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1654 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1655 (match_operand:VQW 1 "register_operand" "w")
1656 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1657 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658 (match_operand:VQW 2 "register_operand" "w")
1659 (match_dup 3)))))]
1660 "TARGET_SIMD"
1661 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1662 [(set_attr "type" "neon_mul_<Vetype>_long")]
1663 )
1664
1665 (define_expand "vec_widen_<su>mult_lo_<mode>"
1666 [(match_operand:<VWIDE> 0 "register_operand" "")
1667 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1668 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1669 "TARGET_SIMD"
1670 {
1671 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1672 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1673 operands[1],
1674 operands[2], p));
1675 DONE;
1676 }
1677 )
1678
1679 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1680 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1681 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682 (match_operand:VQW 1 "register_operand" "w")
1683 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685 (match_operand:VQW 2 "register_operand" "w")
1686 (match_dup 3)))))]
1687 "TARGET_SIMD"
1688 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1689 [(set_attr "type" "neon_mul_<Vetype>_long")]
1690 )
1691
1692 (define_expand "vec_widen_<su>mult_hi_<mode>"
1693 [(match_operand:<VWIDE> 0 "register_operand" "")
1694 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1695 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1696 "TARGET_SIMD"
1697 {
1698 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1699 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1700 operands[1],
1701 operands[2], p));
1702 DONE;
1703
1704 }
1705 )
1706
1707 ;; FP vector operations.
1708 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1709 ;; double-precision (64-bit) floating-point data types and arithmetic as
1710 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1711 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1712 ;;
1713 ;; Floating-point operations can raise an exception. Vectorizing such
1714 ;; operations are safe because of reasons explained below.
1715 ;;
1716 ;; ARMv8 permits an extension to enable trapped floating-point
1717 ;; exception handling, however this is an optional feature. In the
1718 ;; event of a floating-point exception being raised by vectorised
1719 ;; code then:
1720 ;; 1. If trapped floating-point exceptions are available, then a trap
1721 ;; will be taken when any lane raises an enabled exception. A trap
1722 ;; handler may determine which lane raised the exception.
1723 ;; 2. Alternatively a sticky exception flag is set in the
1724 ;; floating-point status register (FPSR). Software may explicitly
1725 ;; test the exception flags, in which case the tests will either
1726 ;; prevent vectorisation, allowing precise identification of the
1727 ;; failing operation, or if tested outside of vectorisable regions
1728 ;; then the specific operation and lane are not of interest.
1729
1730 ;; FP arithmetic operations.
1731
1732 (define_insn "add<mode>3"
1733 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1734 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1735 (match_operand:VHSDF 2 "register_operand" "w")))]
1736 "TARGET_SIMD"
1737 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1738 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1739 )
1740
1741 (define_insn "sub<mode>3"
1742 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1743 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1744 (match_operand:VHSDF 2 "register_operand" "w")))]
1745 "TARGET_SIMD"
1746 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1747 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1748 )
1749
1750 (define_insn "mul<mode>3"
1751 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1752 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1753 (match_operand:VHSDF 2 "register_operand" "w")))]
1754 "TARGET_SIMD"
1755 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1756 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1757 )
1758
1759 (define_expand "div<mode>3"
1760 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1761 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1762 (match_operand:VHSDF 2 "register_operand" "w")))]
1763 "TARGET_SIMD"
1764 {
1765 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1766 DONE;
1767
1768 operands[1] = force_reg (<MODE>mode, operands[1]);
1769 })
1770
1771 (define_insn "*div<mode>3"
1772 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1774 (match_operand:VHSDF 2 "register_operand" "w")))]
1775 "TARGET_SIMD"
1776 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1777 [(set_attr "type" "neon_fp_div_<stype><q>")]
1778 )
1779
1780 (define_insn "neg<mode>2"
1781 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1782 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1783 "TARGET_SIMD"
1784 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1785 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1786 )
1787
1788 (define_insn "abs<mode>2"
1789 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1790 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1791 "TARGET_SIMD"
1792 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1793 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1794 )
1795
1796 (define_insn "fma<mode>4"
1797 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1798 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1799 (match_operand:VHSDF 2 "register_operand" "w")
1800 (match_operand:VHSDF 3 "register_operand" "0")))]
1801 "TARGET_SIMD"
1802 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1803 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1804 )
1805
1806 (define_insn "*aarch64_fma4_elt<mode>"
1807 [(set (match_operand:VDQF 0 "register_operand" "=w")
1808 (fma:VDQF
1809 (vec_duplicate:VDQF
1810 (vec_select:<VEL>
1811 (match_operand:VDQF 1 "register_operand" "<h_con>")
1812 (parallel [(match_operand:SI 2 "immediate_operand")])))
1813 (match_operand:VDQF 3 "register_operand" "w")
1814 (match_operand:VDQF 4 "register_operand" "0")))]
1815 "TARGET_SIMD"
1816 {
1817 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1818 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1819 }
1820 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1821 )
1822
1823 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1824 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1825 (fma:VDQSF
1826 (vec_duplicate:VDQSF
1827 (vec_select:<VEL>
1828 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1829 (parallel [(match_operand:SI 2 "immediate_operand")])))
1830 (match_operand:VDQSF 3 "register_operand" "w")
1831 (match_operand:VDQSF 4 "register_operand" "0")))]
1832 "TARGET_SIMD"
1833 {
1834 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1835 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1836 }
1837 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1838 )
1839
1840 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1841 [(set (match_operand:VMUL 0 "register_operand" "=w")
1842 (fma:VMUL
1843 (vec_duplicate:VMUL
1844 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1845 (match_operand:VMUL 2 "register_operand" "w")
1846 (match_operand:VMUL 3 "register_operand" "0")))]
1847 "TARGET_SIMD"
1848 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1849 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1850 )
1851
1852 (define_insn "*aarch64_fma4_elt_to_64v2df"
1853 [(set (match_operand:DF 0 "register_operand" "=w")
1854 (fma:DF
1855 (vec_select:DF
1856 (match_operand:V2DF 1 "register_operand" "w")
1857 (parallel [(match_operand:SI 2 "immediate_operand")]))
1858 (match_operand:DF 3 "register_operand" "w")
1859 (match_operand:DF 4 "register_operand" "0")))]
1860 "TARGET_SIMD"
1861 {
1862 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1863 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1864 }
1865 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1866 )
1867
1868 (define_insn "fnma<mode>4"
1869 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1870 (fma:VHSDF
1871 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1872 (match_operand:VHSDF 2 "register_operand" "w")
1873 (match_operand:VHSDF 3 "register_operand" "0")))]
1874 "TARGET_SIMD"
1875 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1876 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1877 )
1878
1879 (define_insn "*aarch64_fnma4_elt<mode>"
1880 [(set (match_operand:VDQF 0 "register_operand" "=w")
1881 (fma:VDQF
1882 (neg:VDQF
1883 (match_operand:VDQF 3 "register_operand" "w"))
1884 (vec_duplicate:VDQF
1885 (vec_select:<VEL>
1886 (match_operand:VDQF 1 "register_operand" "<h_con>")
1887 (parallel [(match_operand:SI 2 "immediate_operand")])))
1888 (match_operand:VDQF 4 "register_operand" "0")))]
1889 "TARGET_SIMD"
1890 {
1891 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1892 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1893 }
1894 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1895 )
1896
1897 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1898 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1899 (fma:VDQSF
1900 (neg:VDQSF
1901 (match_operand:VDQSF 3 "register_operand" "w"))
1902 (vec_duplicate:VDQSF
1903 (vec_select:<VEL>
1904 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1905 (parallel [(match_operand:SI 2 "immediate_operand")])))
1906 (match_operand:VDQSF 4 "register_operand" "0")))]
1907 "TARGET_SIMD"
1908 {
1909 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1910 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1911 }
1912 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1913 )
1914
1915 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1916 [(set (match_operand:VMUL 0 "register_operand" "=w")
1917 (fma:VMUL
1918 (neg:VMUL
1919 (match_operand:VMUL 2 "register_operand" "w"))
1920 (vec_duplicate:VMUL
1921 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1922 (match_operand:VMUL 3 "register_operand" "0")))]
1923 "TARGET_SIMD"
1924 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1925 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1926 )
1927
1928 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1929 [(set (match_operand:DF 0 "register_operand" "=w")
1930 (fma:DF
1931 (vec_select:DF
1932 (match_operand:V2DF 1 "register_operand" "w")
1933 (parallel [(match_operand:SI 2 "immediate_operand")]))
1934 (neg:DF
1935 (match_operand:DF 3 "register_operand" "w"))
1936 (match_operand:DF 4 "register_operand" "0")))]
1937 "TARGET_SIMD"
1938 {
1939 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1940 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1941 }
1942 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1943 )
1944
1945 ;; Vector versions of the floating-point frint patterns.
1946 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1947 (define_insn "<frint_pattern><mode>2"
1948 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1949 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1950 FRINT))]
1951 "TARGET_SIMD"
1952 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1953 [(set_attr "type" "neon_fp_round_<stype><q>")]
1954 )
1955
1956 ;; Vector versions of the fcvt standard patterns.
1957 ;; Expands to lbtrunc, lround, lceil, lfloor
1958 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1959 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1960 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1961 [(match_operand:VHSDF 1 "register_operand" "w")]
1962 FCVT)))]
1963 "TARGET_SIMD"
1964 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1965 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1966 )
1967
1968 ;; HF Scalar variants of related SIMD instructions.
1969 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1970 [(set (match_operand:HI 0 "register_operand" "=w")
1971 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1972 FCVT)))]
1973 "TARGET_SIMD_F16INST"
1974 "fcvt<frint_suffix><su>\t%h0, %h1"
1975 [(set_attr "type" "neon_fp_to_int_s")]
1976 )
1977
1978 (define_insn "<optab>_trunchfhi2"
1979 [(set (match_operand:HI 0 "register_operand" "=w")
1980 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1981 "TARGET_SIMD_F16INST"
1982 "fcvtz<su>\t%h0, %h1"
1983 [(set_attr "type" "neon_fp_to_int_s")]
1984 )
1985
1986 (define_insn "<optab>hihf2"
1987 [(set (match_operand:HF 0 "register_operand" "=w")
1988 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1989 "TARGET_SIMD_F16INST"
1990 "<su_optab>cvtf\t%h0, %h1"
1991 [(set_attr "type" "neon_int_to_fp_s")]
1992 )
1993
1994 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1995 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1996 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1997 [(mult:VDQF
1998 (match_operand:VDQF 1 "register_operand" "w")
1999 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2000 UNSPEC_FRINTZ)))]
2001 "TARGET_SIMD
2002 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2003 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2004 {
2005 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2006 char buf[64];
2007 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2008 output_asm_insn (buf, operands);
2009 return "";
2010 }
2011 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2012 )
2013
2014 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2015 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2016 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2017 [(match_operand:VHSDF 1 "register_operand")]
2018 UNSPEC_FRINTZ)))]
2019 "TARGET_SIMD"
2020 {})
2021
2022 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2023 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2024 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2025 [(match_operand:VHSDF 1 "register_operand")]
2026 UNSPEC_FRINTZ)))]
2027 "TARGET_SIMD"
2028 {})
2029
2030 (define_expand "ftrunc<VHSDF:mode>2"
2031 [(set (match_operand:VHSDF 0 "register_operand")
2032 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2033 UNSPEC_FRINTZ))]
2034 "TARGET_SIMD"
2035 {})
2036
2037 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2038 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2039 (FLOATUORS:VHSDF
2040 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2041 "TARGET_SIMD"
2042 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2043 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2044 )
2045
2046 ;; Conversions between vectors of floats and doubles.
2047 ;; Contains a mix of patterns to match standard pattern names
2048 ;; and those for intrinsics.
2049
2050 ;; Float widening operations.
2051
2052 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2053 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2054 (float_extend:<VWIDE> (vec_select:<VHALF>
2055 (match_operand:VQ_HSF 1 "register_operand" "w")
2056 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2057 )))]
2058 "TARGET_SIMD"
2059 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2060 [(set_attr "type" "neon_fp_cvt_widen_s")]
2061 )
2062
2063 ;; Convert between fixed-point and floating-point (vector modes)
2064
2065 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2066 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2067 (unspec:<VHSDF:FCVT_TARGET>
2068 [(match_operand:VHSDF 1 "register_operand" "w")
2069 (match_operand:SI 2 "immediate_operand" "i")]
2070 FCVT_F2FIXED))]
2071 "TARGET_SIMD"
2072 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2073 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2074 )
2075
2076 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2077 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2078 (unspec:<VDQ_HSDI:FCVT_TARGET>
2079 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2080 (match_operand:SI 2 "immediate_operand" "i")]
2081 FCVT_FIXED2F))]
2082 "TARGET_SIMD"
2083 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2084 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2085 )
2086
2087 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2088 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2089 ;; the meaning of HI and LO changes depending on the target endianness.
2090 ;; While elsewhere we map the higher numbered elements of a vector to
2091 ;; the lower architectural lanes of the vector, for these patterns we want
2092 ;; to always treat "hi" as referring to the higher architectural lanes.
2093 ;; Consequently, while the patterns below look inconsistent with our
2094 ;; other big-endian patterns their behavior is as required.
2095
2096 (define_expand "vec_unpacks_lo_<mode>"
2097 [(match_operand:<VWIDE> 0 "register_operand" "")
2098 (match_operand:VQ_HSF 1 "register_operand" "")]
2099 "TARGET_SIMD"
2100 {
2101 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2102 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2103 operands[1], p));
2104 DONE;
2105 }
2106 )
2107
2108 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2109 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2110 (float_extend:<VWIDE> (vec_select:<VHALF>
2111 (match_operand:VQ_HSF 1 "register_operand" "w")
2112 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2113 )))]
2114 "TARGET_SIMD"
2115 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2116 [(set_attr "type" "neon_fp_cvt_widen_s")]
2117 )
2118
2119 (define_expand "vec_unpacks_hi_<mode>"
2120 [(match_operand:<VWIDE> 0 "register_operand" "")
2121 (match_operand:VQ_HSF 1 "register_operand" "")]
2122 "TARGET_SIMD"
2123 {
2124 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2125 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2126 operands[1], p));
2127 DONE;
2128 }
2129 )
2130 (define_insn "aarch64_float_extend_lo_<Vwide>"
2131 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2132 (float_extend:<VWIDE>
2133 (match_operand:VDF 1 "register_operand" "w")))]
2134 "TARGET_SIMD"
2135 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2136 [(set_attr "type" "neon_fp_cvt_widen_s")]
2137 )
2138
2139 ;; Float narrowing operations.
2140
2141 (define_insn "aarch64_float_truncate_lo_<mode>"
2142 [(set (match_operand:VDF 0 "register_operand" "=w")
2143 (float_truncate:VDF
2144 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2145 "TARGET_SIMD"
2146 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2147 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2148 )
2149
2150 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2151 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2152 (vec_concat:<VDBL>
2153 (match_operand:VDF 1 "register_operand" "0")
2154 (float_truncate:VDF
2155 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2156 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2157 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2158 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2159 )
2160
2161 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2162 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2163 (vec_concat:<VDBL>
2164 (float_truncate:VDF
2165 (match_operand:<VWIDE> 2 "register_operand" "w"))
2166 (match_operand:VDF 1 "register_operand" "0")))]
2167 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2168 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2169 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2170 )
2171
2172 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2173 [(match_operand:<VDBL> 0 "register_operand" "=w")
2174 (match_operand:VDF 1 "register_operand" "0")
2175 (match_operand:<VWIDE> 2 "register_operand" "w")]
2176 "TARGET_SIMD"
2177 {
2178 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2179 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2180 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2181 emit_insn (gen (operands[0], operands[1], operands[2]));
2182 DONE;
2183 }
2184 )
2185
2186 (define_expand "vec_pack_trunc_v2df"
2187 [(set (match_operand:V4SF 0 "register_operand")
2188 (vec_concat:V4SF
2189 (float_truncate:V2SF
2190 (match_operand:V2DF 1 "register_operand"))
2191 (float_truncate:V2SF
2192 (match_operand:V2DF 2 "register_operand"))
2193 ))]
2194 "TARGET_SIMD"
2195 {
2196 rtx tmp = gen_reg_rtx (V2SFmode);
2197 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2198 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2199
2200 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2201 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2202 tmp, operands[hi]));
2203 DONE;
2204 }
2205 )
2206
2207 (define_expand "vec_pack_trunc_df"
2208 [(set (match_operand:V2SF 0 "register_operand")
2209 (vec_concat:V2SF
2210 (float_truncate:SF
2211 (match_operand:DF 1 "register_operand"))
2212 (float_truncate:SF
2213 (match_operand:DF 2 "register_operand"))
2214 ))]
2215 "TARGET_SIMD"
2216 {
2217 rtx tmp = gen_reg_rtx (V2SFmode);
2218 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2219 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2220
2221 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2222 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2223 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2224 DONE;
2225 }
2226 )
2227
2228 ;; FP Max/Min
2229 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2230 ;; expression like:
2231 ;; a = (b < c) ? b : c;
2232 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2233 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2234 ;; -ffast-math.
2235 ;;
2236 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2237 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2238 ;; operand will be returned when both operands are zero (i.e. they may not
2239 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2240 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2241 ;; NaNs.
2242
2243 (define_insn "<su><maxmin><mode>3"
2244 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2245 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2246 (match_operand:VHSDF 2 "register_operand" "w")))]
2247 "TARGET_SIMD"
2248 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2249 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2250 )
2251
2252 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2253 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2254 ;; which implement the IEEE fmax ()/fmin () functions.
2255 (define_insn "<maxmin_uns><mode>3"
2256 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2257 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2258 (match_operand:VHSDF 2 "register_operand" "w")]
2259 FMAXMIN_UNS))]
2260 "TARGET_SIMD"
2261 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2262 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2263 )
2264
2265 ;; 'across lanes' add.
2266
2267 (define_expand "reduc_plus_scal_<mode>"
2268 [(match_operand:<VEL> 0 "register_operand" "=w")
2269 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2270 UNSPEC_ADDV)]
2271 "TARGET_SIMD"
2272 {
2273 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2274 rtx scratch = gen_reg_rtx (<MODE>mode);
2275 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2276 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2277 DONE;
2278 }
2279 )
2280
2281 (define_insn "aarch64_faddp<mode>"
2282 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2283 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2284 (match_operand:VHSDF 2 "register_operand" "w")]
2285 UNSPEC_FADDV))]
2286 "TARGET_SIMD"
2287 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2288 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2289 )
2290
2291 (define_insn "aarch64_reduc_plus_internal<mode>"
2292 [(set (match_operand:VDQV 0 "register_operand" "=w")
2293 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2294 UNSPEC_ADDV))]
2295 "TARGET_SIMD"
2296 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2297 [(set_attr "type" "neon_reduc_add<q>")]
2298 )
2299
2300 (define_insn "aarch64_reduc_plus_internalv2si"
2301 [(set (match_operand:V2SI 0 "register_operand" "=w")
2302 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2303 UNSPEC_ADDV))]
2304 "TARGET_SIMD"
2305 "addp\\t%0.2s, %1.2s, %1.2s"
2306 [(set_attr "type" "neon_reduc_add")]
2307 )
2308
2309 (define_insn "reduc_plus_scal_<mode>"
2310 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2311 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2312 UNSPEC_FADDV))]
2313 "TARGET_SIMD"
2314 "faddp\\t%<Vetype>0, %1.<Vtype>"
2315 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2316 )
2317
2318 (define_expand "reduc_plus_scal_v4sf"
2319 [(set (match_operand:SF 0 "register_operand")
2320 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2321 UNSPEC_FADDV))]
2322 "TARGET_SIMD"
2323 {
2324 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2325 rtx scratch = gen_reg_rtx (V4SFmode);
2326 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2327 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2328 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2329 DONE;
2330 })
2331
2332 (define_insn "clrsb<mode>2"
2333 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2334 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2335 "TARGET_SIMD"
2336 "cls\\t%0.<Vtype>, %1.<Vtype>"
2337 [(set_attr "type" "neon_cls<q>")]
2338 )
2339
2340 (define_insn "clz<mode>2"
2341 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2342 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2343 "TARGET_SIMD"
2344 "clz\\t%0.<Vtype>, %1.<Vtype>"
2345 [(set_attr "type" "neon_cls<q>")]
2346 )
2347
2348 (define_insn "popcount<mode>2"
2349 [(set (match_operand:VB 0 "register_operand" "=w")
2350 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2351 "TARGET_SIMD"
2352 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2353 [(set_attr "type" "neon_cnt<q>")]
2354 )
2355
2356 ;; 'across lanes' max and min ops.
2357
2358 ;; Template for outputting a scalar, so we can create __builtins which can be
2359 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2360 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2361 [(match_operand:<VEL> 0 "register_operand")
2362 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2363 FMAXMINV)]
2364 "TARGET_SIMD"
2365 {
2366 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2367 rtx scratch = gen_reg_rtx (<MODE>mode);
2368 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2369 operands[1]));
2370 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2371 DONE;
2372 }
2373 )
2374
2375 ;; Likewise for integer cases, signed and unsigned.
2376 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2377 [(match_operand:<VEL> 0 "register_operand")
2378 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2379 MAXMINV)]
2380 "TARGET_SIMD"
2381 {
2382 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2383 rtx scratch = gen_reg_rtx (<MODE>mode);
2384 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2385 operands[1]));
2386 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2387 DONE;
2388 }
2389 )
2390
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2393 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2394 MAXMINV))]
2395 "TARGET_SIMD"
2396 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2397 [(set_attr "type" "neon_reduc_minmax<q>")]
2398 )
2399
2400 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2401 [(set (match_operand:V2SI 0 "register_operand" "=w")
2402 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2403 MAXMINV))]
2404 "TARGET_SIMD"
2405 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2406 [(set_attr "type" "neon_reduc_minmax")]
2407 )
2408
2409 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2410 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2411 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2412 FMAXMINV))]
2413 "TARGET_SIMD"
2414 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2415 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2416 )
2417
2418 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2419 ;; allocation.
2420 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2421 ;; to select.
2422 ;;
2423 ;; Thus our BSL is of the form:
2424 ;; op0 = bsl (mask, op2, op3)
2425 ;; We can use any of:
2426 ;;
2427 ;; if (op0 = mask)
2428 ;; bsl mask, op1, op2
2429 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2430 ;; bit op0, op2, mask
2431 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2432 ;; bif op0, op1, mask
2433 ;;
2434 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2435 ;; Some forms of straight-line code may generate the equivalent form
2436 ;; in *aarch64_simd_bsl<mode>_alt.
2437
2438 (define_insn "aarch64_simd_bsl<mode>_internal"
2439 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2440 (xor:VDQ_I
2441 (and:VDQ_I
2442 (xor:VDQ_I
2443 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2444 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2445 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2446 (match_dup:<V_INT_EQUIV> 3)
2447 ))]
2448 "TARGET_SIMD"
2449 "@
2450 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2451 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2452 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2453 [(set_attr "type" "neon_bsl<q>")]
2454 )
2455
2456 ;; We need this form in addition to the above pattern to match the case
2457 ;; when combine tries merging three insns such that the second operand of
2458 ;; the outer XOR matches the second operand of the inner XOR rather than
2459 ;; the first. The two are equivalent but since recog doesn't try all
2460 ;; permutations of commutative operations, we have to have a separate pattern.
2461
2462 (define_insn "*aarch64_simd_bsl<mode>_alt"
2463 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2464 (xor:VDQ_I
2465 (and:VDQ_I
2466 (xor:VDQ_I
2467 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2468 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2469 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2470 (match_dup:<V_INT_EQUIV> 2)))]
2471 "TARGET_SIMD"
2472 "@
2473 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2474 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2475 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2476 [(set_attr "type" "neon_bsl<q>")]
2477 )
2478
2479 ;; DImode is special, we want to avoid computing operations which are
2480 ;; more naturally computed in general purpose registers in the vector
2481 ;; registers. If we do that, we need to move all three operands from general
2482 ;; purpose registers to vector registers, then back again. However, we
2483 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2484 ;; optimizations based on the component operations of a BSL.
2485 ;;
2486 ;; That means we need a splitter back to the individual operations, if they
2487 ;; would be better calculated on the integer side.
2488
2489 (define_insn_and_split "aarch64_simd_bsldi_internal"
2490 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2491 (xor:DI
2492 (and:DI
2493 (xor:DI
2494 (match_operand:DI 3 "register_operand" "w,0,w,r")
2495 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2496 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2497 (match_dup:DI 3)
2498 ))]
2499 "TARGET_SIMD"
2500 "@
2501 bsl\\t%0.8b, %2.8b, %3.8b
2502 bit\\t%0.8b, %2.8b, %1.8b
2503 bif\\t%0.8b, %3.8b, %1.8b
2504 #"
2505 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2506 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2507 {
2508 /* Split back to individual operations. If we're before reload, and
2509 able to create a temporary register, do so. If we're after reload,
2510 we've got an early-clobber destination register, so use that.
2511 Otherwise, we can't create pseudos and we can't yet guarantee that
2512 operands[0] is safe to write, so FAIL to split. */
2513
2514 rtx scratch;
2515 if (reload_completed)
2516 scratch = operands[0];
2517 else if (can_create_pseudo_p ())
2518 scratch = gen_reg_rtx (DImode);
2519 else
2520 FAIL;
2521
2522 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2523 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2524 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2525 DONE;
2526 }
2527 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2528 (set_attr "length" "4,4,4,12")]
2529 )
2530
2531 (define_insn_and_split "aarch64_simd_bsldi_alt"
2532 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2533 (xor:DI
2534 (and:DI
2535 (xor:DI
2536 (match_operand:DI 3 "register_operand" "w,w,0,r")
2537 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2538 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2539 (match_dup:DI 2)
2540 ))]
2541 "TARGET_SIMD"
2542 "@
2543 bsl\\t%0.8b, %3.8b, %2.8b
2544 bit\\t%0.8b, %3.8b, %1.8b
2545 bif\\t%0.8b, %2.8b, %1.8b
2546 #"
2547 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2548 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2549 {
2550 /* Split back to individual operations. If we're before reload, and
2551 able to create a temporary register, do so. If we're after reload,
2552 we've got an early-clobber destination register, so use that.
2553 Otherwise, we can't create pseudos and we can't yet guarantee that
2554 operands[0] is safe to write, so FAIL to split. */
2555
2556 rtx scratch;
2557 if (reload_completed)
2558 scratch = operands[0];
2559 else if (can_create_pseudo_p ())
2560 scratch = gen_reg_rtx (DImode);
2561 else
2562 FAIL;
2563
2564 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2565 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2566 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2567 DONE;
2568 }
2569 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2570 (set_attr "length" "4,4,4,12")]
2571 )
2572
2573 (define_expand "aarch64_simd_bsl<mode>"
2574 [(match_operand:VALLDIF 0 "register_operand")
2575 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2576 (match_operand:VALLDIF 2 "register_operand")
2577 (match_operand:VALLDIF 3 "register_operand")]
2578 "TARGET_SIMD"
2579 {
2580 /* We can't alias operands together if they have different modes. */
2581 rtx tmp = operands[0];
2582 if (FLOAT_MODE_P (<MODE>mode))
2583 {
2584 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2585 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2586 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2587 }
2588 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2589 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2590 operands[1],
2591 operands[2],
2592 operands[3]));
2593 if (tmp != operands[0])
2594 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2595
2596 DONE;
2597 })
2598
2599 (define_expand "vcond_mask_<mode><v_int_equiv>"
2600 [(match_operand:VALLDI 0 "register_operand")
2601 (match_operand:VALLDI 1 "nonmemory_operand")
2602 (match_operand:VALLDI 2 "nonmemory_operand")
2603 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2604 "TARGET_SIMD"
2605 {
2606 /* If we have (a = (P) ? -1 : 0);
2607 Then we can simply move the generated mask (result must be int). */
2608 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2609 && operands[2] == CONST0_RTX (<MODE>mode))
2610 emit_move_insn (operands[0], operands[3]);
2611 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2612 else if (operands[1] == CONST0_RTX (<MODE>mode)
2613 && operands[2] == CONSTM1_RTX (<MODE>mode))
2614 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2615 else
2616 {
2617 if (!REG_P (operands[1]))
2618 operands[1] = force_reg (<MODE>mode, operands[1]);
2619 if (!REG_P (operands[2]))
2620 operands[2] = force_reg (<MODE>mode, operands[2]);
2621 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2622 operands[1], operands[2]));
2623 }
2624
2625 DONE;
2626 })
2627
2628 ;; Patterns comparing two vectors to produce a mask.
2629
2630 (define_expand "vec_cmp<mode><mode>"
2631 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2632 (match_operator 1 "comparison_operator"
2633 [(match_operand:VSDQ_I_DI 2 "register_operand")
2634 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2635 "TARGET_SIMD"
2636 {
2637 rtx mask = operands[0];
2638 enum rtx_code code = GET_CODE (operands[1]);
2639
2640 switch (code)
2641 {
2642 case NE:
2643 case LE:
2644 case LT:
2645 case GE:
2646 case GT:
2647 case EQ:
2648 if (operands[3] == CONST0_RTX (<MODE>mode))
2649 break;
2650
2651 /* Fall through. */
2652 default:
2653 if (!REG_P (operands[3]))
2654 operands[3] = force_reg (<MODE>mode, operands[3]);
2655
2656 break;
2657 }
2658
2659 switch (code)
2660 {
2661 case LT:
2662 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2663 break;
2664
2665 case GE:
2666 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2667 break;
2668
2669 case LE:
2670 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2671 break;
2672
2673 case GT:
2674 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2675 break;
2676
2677 case LTU:
2678 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2679 break;
2680
2681 case GEU:
2682 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2683 break;
2684
2685 case LEU:
2686 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2687 break;
2688
2689 case GTU:
2690 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2691 break;
2692
2693 case NE:
2694 /* Handle NE as !EQ. */
2695 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2696 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2697 break;
2698
2699 case EQ:
2700 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2701 break;
2702
2703 default:
2704 gcc_unreachable ();
2705 }
2706
2707 DONE;
2708 })
2709
2710 (define_expand "vec_cmp<mode><v_int_equiv>"
2711 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2712 (match_operator 1 "comparison_operator"
2713 [(match_operand:VDQF 2 "register_operand")
2714 (match_operand:VDQF 3 "nonmemory_operand")]))]
2715 "TARGET_SIMD"
2716 {
2717 int use_zero_form = 0;
2718 enum rtx_code code = GET_CODE (operands[1]);
2719 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2720
2721 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2722
2723 switch (code)
2724 {
2725 case LE:
2726 case LT:
2727 case GE:
2728 case GT:
2729 case EQ:
2730 if (operands[3] == CONST0_RTX (<MODE>mode))
2731 {
2732 use_zero_form = 1;
2733 break;
2734 }
2735 /* Fall through. */
2736 default:
2737 if (!REG_P (operands[3]))
2738 operands[3] = force_reg (<MODE>mode, operands[3]);
2739
2740 break;
2741 }
2742
2743 switch (code)
2744 {
2745 case LT:
2746 if (use_zero_form)
2747 {
2748 comparison = gen_aarch64_cmlt<mode>;
2749 break;
2750 }
2751 /* Fall through. */
2752 case UNLT:
2753 std::swap (operands[2], operands[3]);
2754 /* Fall through. */
2755 case UNGT:
2756 case GT:
2757 comparison = gen_aarch64_cmgt<mode>;
2758 break;
2759 case LE:
2760 if (use_zero_form)
2761 {
2762 comparison = gen_aarch64_cmle<mode>;
2763 break;
2764 }
2765 /* Fall through. */
2766 case UNLE:
2767 std::swap (operands[2], operands[3]);
2768 /* Fall through. */
2769 case UNGE:
2770 case GE:
2771 comparison = gen_aarch64_cmge<mode>;
2772 break;
2773 case NE:
2774 case EQ:
2775 comparison = gen_aarch64_cmeq<mode>;
2776 break;
2777 case UNEQ:
2778 case ORDERED:
2779 case UNORDERED:
2780 case LTGT:
2781 break;
2782 default:
2783 gcc_unreachable ();
2784 }
2785
2786 switch (code)
2787 {
2788 case UNGE:
2789 case UNGT:
2790 case UNLE:
2791 case UNLT:
2792 {
2793 /* All of the above must not raise any FP exceptions. Thus we first
2794 check each operand for NaNs and force any elements containing NaN to
2795 zero before using them in the compare.
2796 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2797 (cm<cc> (isnan (a) ? 0.0 : a,
2798 isnan (b) ? 0.0 : b))
2799 We use the following transformations for doing the comparisions:
2800 a UNGE b -> a GE b
2801 a UNGT b -> a GT b
2802 a UNLE b -> b GE a
2803 a UNLT b -> b GT a. */
2804
2805 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2806 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2807 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2808 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2809 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2810 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2811 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2812 lowpart_subreg (<V_INT_EQUIV>mode,
2813 operands[2],
2814 <MODE>mode)));
2815 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2816 lowpart_subreg (<V_INT_EQUIV>mode,
2817 operands[3],
2818 <MODE>mode)));
2819 gcc_assert (comparison != NULL);
2820 emit_insn (comparison (operands[0],
2821 lowpart_subreg (<MODE>mode,
2822 tmp0, <V_INT_EQUIV>mode),
2823 lowpart_subreg (<MODE>mode,
2824 tmp1, <V_INT_EQUIV>mode)));
2825 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2826 }
2827 break;
2828
2829 case LT:
2830 case LE:
2831 case GT:
2832 case GE:
2833 case EQ:
2834 case NE:
2835 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2836 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2837 a GE b -> a GE b
2838 a GT b -> a GT b
2839 a LE b -> b GE a
2840 a LT b -> b GT a
2841 a EQ b -> a EQ b
2842 a NE b -> ~(a EQ b) */
2843 gcc_assert (comparison != NULL);
2844 emit_insn (comparison (operands[0], operands[2], operands[3]));
2845 if (code == NE)
2846 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2847 break;
2848
2849 case LTGT:
2850 /* LTGT is not guranteed to not generate a FP exception. So let's
2851 go the faster way : ((a > b) || (b > a)). */
2852 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2853 operands[2], operands[3]));
2854 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2855 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2856 break;
2857
2858 case ORDERED:
2859 case UNORDERED:
2860 case UNEQ:
2861 /* cmeq (a, a) & cmeq (b, b). */
2862 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2863 operands[2], operands[2]));
2864 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2865 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2866
2867 if (code == UNORDERED)
2868 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2869 else if (code == UNEQ)
2870 {
2871 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2872 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2873 }
2874 break;
2875
2876 default:
2877 gcc_unreachable ();
2878 }
2879
2880 DONE;
2881 })
2882
2883 (define_expand "vec_cmpu<mode><mode>"
2884 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2885 (match_operator 1 "comparison_operator"
2886 [(match_operand:VSDQ_I_DI 2 "register_operand")
2887 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2888 "TARGET_SIMD"
2889 {
2890 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2891 operands[2], operands[3]));
2892 DONE;
2893 })
2894
2895 (define_expand "vcond<mode><mode>"
2896 [(set (match_operand:VALLDI 0 "register_operand")
2897 (if_then_else:VALLDI
2898 (match_operator 3 "comparison_operator"
2899 [(match_operand:VALLDI 4 "register_operand")
2900 (match_operand:VALLDI 5 "nonmemory_operand")])
2901 (match_operand:VALLDI 1 "nonmemory_operand")
2902 (match_operand:VALLDI 2 "nonmemory_operand")))]
2903 "TARGET_SIMD"
2904 {
2905 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2906 enum rtx_code code = GET_CODE (operands[3]);
2907
2908 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2909 it as well as switch operands 1/2 in order to avoid the additional
2910 NOT instruction. */
2911 if (code == NE)
2912 {
2913 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2914 operands[4], operands[5]);
2915 std::swap (operands[1], operands[2]);
2916 }
2917 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2918 operands[4], operands[5]));
2919 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2920 operands[2], mask));
2921
2922 DONE;
2923 })
2924
2925 (define_expand "vcond<v_cmp_mixed><mode>"
2926 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2927 (if_then_else:<V_cmp_mixed>
2928 (match_operator 3 "comparison_operator"
2929 [(match_operand:VDQF_COND 4 "register_operand")
2930 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2931 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2932 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2933 "TARGET_SIMD"
2934 {
2935 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2936 enum rtx_code code = GET_CODE (operands[3]);
2937
2938 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2939 it as well as switch operands 1/2 in order to avoid the additional
2940 NOT instruction. */
2941 if (code == NE)
2942 {
2943 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2944 operands[4], operands[5]);
2945 std::swap (operands[1], operands[2]);
2946 }
2947 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2948 operands[4], operands[5]));
2949 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2950 operands[0], operands[1],
2951 operands[2], mask));
2952
2953 DONE;
2954 })
2955
2956 (define_expand "vcondu<mode><mode>"
2957 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2958 (if_then_else:VSDQ_I_DI
2959 (match_operator 3 "comparison_operator"
2960 [(match_operand:VSDQ_I_DI 4 "register_operand")
2961 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2962 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2963 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2964 "TARGET_SIMD"
2965 {
2966 rtx mask = gen_reg_rtx (<MODE>mode);
2967 enum rtx_code code = GET_CODE (operands[3]);
2968
2969 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2970 it as well as switch operands 1/2 in order to avoid the additional
2971 NOT instruction. */
2972 if (code == NE)
2973 {
2974 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2975 operands[4], operands[5]);
2976 std::swap (operands[1], operands[2]);
2977 }
2978 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2979 operands[4], operands[5]));
2980 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2981 operands[2], mask));
2982 DONE;
2983 })
2984
2985 (define_expand "vcondu<mode><v_cmp_mixed>"
2986 [(set (match_operand:VDQF 0 "register_operand")
2987 (if_then_else:VDQF
2988 (match_operator 3 "comparison_operator"
2989 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2990 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2991 (match_operand:VDQF 1 "nonmemory_operand")
2992 (match_operand:VDQF 2 "nonmemory_operand")))]
2993 "TARGET_SIMD"
2994 {
2995 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2996 enum rtx_code code = GET_CODE (operands[3]);
2997
2998 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2999 it as well as switch operands 1/2 in order to avoid the additional
3000 NOT instruction. */
3001 if (code == NE)
3002 {
3003 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3004 operands[4], operands[5]);
3005 std::swap (operands[1], operands[2]);
3006 }
3007 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3008 mask, operands[3],
3009 operands[4], operands[5]));
3010 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3011 operands[2], mask));
3012 DONE;
3013 })
3014
3015 ;; Patterns for AArch64 SIMD Intrinsics.
3016
3017 ;; Lane extraction with sign extension to general purpose register.
3018 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3019 [(set (match_operand:GPI 0 "register_operand" "=r")
3020 (sign_extend:GPI
3021 (vec_select:<VEL>
3022 (match_operand:VDQQH 1 "register_operand" "w")
3023 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3024 "TARGET_SIMD"
3025 {
3026 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3027 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3028 }
3029 [(set_attr "type" "neon_to_gp<q>")]
3030 )
3031
3032 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3033 [(set (match_operand:GPI 0 "register_operand" "=r")
3034 (zero_extend:GPI
3035 (vec_select:<VEL>
3036 (match_operand:VDQQH 1 "register_operand" "w")
3037 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3038 "TARGET_SIMD"
3039 {
3040 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3041 INTVAL (operands[2]));
3042 return "umov\\t%w0, %1.<Vetype>[%2]";
3043 }
3044 [(set_attr "type" "neon_to_gp<q>")]
3045 )
3046
3047 ;; Lane extraction of a value, neither sign nor zero extension
3048 ;; is guaranteed so upper bits should be considered undefined.
3049 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3050 (define_insn "aarch64_get_lane<mode>"
3051 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3052 (vec_select:<VEL>
3053 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3054 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3055 "TARGET_SIMD"
3056 {
3057 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3058 switch (which_alternative)
3059 {
3060 case 0:
3061 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3062 case 1:
3063 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3064 case 2:
3065 return "st1\\t{%1.<Vetype>}[%2], %0";
3066 default:
3067 gcc_unreachable ();
3068 }
3069 }
3070 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3071 )
3072
3073 (define_insn "load_pair_lanes<mode>"
3074 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3075 (vec_concat:<VDBL>
3076 (match_operand:VDC 1 "memory_operand" "Utq")
3077 (match_operand:VDC 2 "memory_operand" "m")))]
3078 "TARGET_SIMD && !STRICT_ALIGNMENT
3079 && rtx_equal_p (XEXP (operands[2], 0),
3080 plus_constant (Pmode,
3081 XEXP (operands[1], 0),
3082 GET_MODE_SIZE (<MODE>mode)))"
3083 "ldr\\t%q0, %1"
3084 [(set_attr "type" "neon_load1_1reg_q")]
3085 )
3086
3087 (define_insn "store_pair_lanes<mode>"
3088 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3089 (vec_concat:<VDBL>
3090 (match_operand:VDC 1 "register_operand" "w, r")
3091 (match_operand:VDC 2 "register_operand" "w, r")))]
3092 "TARGET_SIMD"
3093 "@
3094 stp\\t%d1, %d2, %y0
3095 stp\\t%x1, %x2, %y0"
3096 [(set_attr "type" "neon_stp, store_16")]
3097 )
3098
3099 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3100 ;; dest vector.
3101
3102 (define_insn "*aarch64_combinez<mode>"
3103 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3104 (vec_concat:<VDBL>
3105 (match_operand:VDC 1 "general_operand" "w,?r,m")
3106 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3107 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3108 "@
3109 mov\\t%0.8b, %1.8b
3110 fmov\t%d0, %1
3111 ldr\\t%d0, %1"
3112 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3113 (set_attr "arch" "simd,fp,simd")]
3114 )
3115
3116 (define_insn "*aarch64_combinez_be<mode>"
3117 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3118 (vec_concat:<VDBL>
3119 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3120 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3121 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3122 "@
3123 mov\\t%0.8b, %1.8b
3124 fmov\t%d0, %1
3125 ldr\\t%d0, %1"
3126 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3127 (set_attr "arch" "simd,fp,simd")]
3128 )
3129
3130 (define_expand "aarch64_combine<mode>"
3131 [(match_operand:<VDBL> 0 "register_operand")
3132 (match_operand:VDC 1 "register_operand")
3133 (match_operand:VDC 2 "register_operand")]
3134 "TARGET_SIMD"
3135 {
3136 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3137
3138 DONE;
3139 }
3140 )
3141
3142 (define_expand "@aarch64_simd_combine<mode>"
3143 [(match_operand:<VDBL> 0 "register_operand")
3144 (match_operand:VDC 1 "register_operand")
3145 (match_operand:VDC 2 "register_operand")]
3146 "TARGET_SIMD"
3147 {
3148 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3149 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3150 DONE;
3151 }
3152 [(set_attr "type" "multiple")]
3153 )
3154
3155 ;; <su><addsub>l<q>.
3156
3157 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3158 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3159 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3160 (match_operand:VQW 1 "register_operand" "w")
3161 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3162 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3163 (match_operand:VQW 2 "register_operand" "w")
3164 (match_dup 3)))))]
3165 "TARGET_SIMD"
3166 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3167 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3168 )
3169
3170 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3171 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3172 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3173 (match_operand:VQW 1 "register_operand" "w")
3174 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3175 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3176 (match_operand:VQW 2 "register_operand" "w")
3177 (match_dup 3)))))]
3178 "TARGET_SIMD"
3179 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3180 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3181 )
3182
3183
3184 (define_expand "aarch64_saddl2<mode>"
3185 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186 (match_operand:VQW 1 "register_operand" "w")
3187 (match_operand:VQW 2 "register_operand" "w")]
3188 "TARGET_SIMD"
3189 {
3190 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3192 operands[2], p));
3193 DONE;
3194 })
3195
3196 (define_expand "aarch64_uaddl2<mode>"
3197 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198 (match_operand:VQW 1 "register_operand" "w")
3199 (match_operand:VQW 2 "register_operand" "w")]
3200 "TARGET_SIMD"
3201 {
3202 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3204 operands[2], p));
3205 DONE;
3206 })
3207
3208 (define_expand "aarch64_ssubl2<mode>"
3209 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3210 (match_operand:VQW 1 "register_operand" "w")
3211 (match_operand:VQW 2 "register_operand" "w")]
3212 "TARGET_SIMD"
3213 {
3214 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3215 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3216 operands[2], p));
3217 DONE;
3218 })
3219
3220 (define_expand "aarch64_usubl2<mode>"
3221 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3222 (match_operand:VQW 1 "register_operand" "w")
3223 (match_operand:VQW 2 "register_operand" "w")]
3224 "TARGET_SIMD"
3225 {
3226 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3227 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3228 operands[2], p));
3229 DONE;
3230 })
3231
3232 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3234 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3235 (match_operand:VD_BHSI 1 "register_operand" "w"))
3236 (ANY_EXTEND:<VWIDE>
3237 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3238 "TARGET_SIMD"
3239 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3240 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3241 )
3242
3243 ;; <su><addsub>w<q>.
3244
3245 (define_expand "widen_ssum<mode>3"
3246 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3247 (plus:<VDBLW> (sign_extend:<VDBLW>
3248 (match_operand:VQW 1 "register_operand" ""))
3249 (match_operand:<VDBLW> 2 "register_operand" "")))]
3250 "TARGET_SIMD"
3251 {
3252 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3253 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3254
3255 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3256 operands[1], p));
3257 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3258 DONE;
3259 }
3260 )
3261
3262 (define_expand "widen_ssum<mode>3"
3263 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3264 (plus:<VWIDE> (sign_extend:<VWIDE>
3265 (match_operand:VD_BHSI 1 "register_operand" ""))
3266 (match_operand:<VWIDE> 2 "register_operand" "")))]
3267 "TARGET_SIMD"
3268 {
3269 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3270 DONE;
3271 })
3272
3273 (define_expand "widen_usum<mode>3"
3274 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3275 (plus:<VDBLW> (zero_extend:<VDBLW>
3276 (match_operand:VQW 1 "register_operand" ""))
3277 (match_operand:<VDBLW> 2 "register_operand" "")))]
3278 "TARGET_SIMD"
3279 {
3280 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3281 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3282
3283 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3284 operands[1], p));
3285 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3286 DONE;
3287 }
3288 )
3289
3290 (define_expand "widen_usum<mode>3"
3291 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3292 (plus:<VWIDE> (zero_extend:<VWIDE>
3293 (match_operand:VD_BHSI 1 "register_operand" ""))
3294 (match_operand:<VWIDE> 2 "register_operand" "")))]
3295 "TARGET_SIMD"
3296 {
3297 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3298 DONE;
3299 })
3300
3301 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3302 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3303 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3304 (ANY_EXTEND:<VWIDE>
3305 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3306 "TARGET_SIMD"
3307 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308 [(set_attr "type" "neon_sub_widen")]
3309 )
3310
3311 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3312 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3313 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3314 (ANY_EXTEND:<VWIDE>
3315 (vec_select:<VHALF>
3316 (match_operand:VQW 2 "register_operand" "w")
3317 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3318 "TARGET_SIMD"
3319 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3320 [(set_attr "type" "neon_sub_widen")]
3321 )
3322
3323 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3324 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3325 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3326 (ANY_EXTEND:<VWIDE>
3327 (vec_select:<VHALF>
3328 (match_operand:VQW 2 "register_operand" "w")
3329 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3330 "TARGET_SIMD"
3331 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3332 [(set_attr "type" "neon_sub_widen")]
3333 )
3334
3335 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3336 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3337 (plus:<VWIDE>
3338 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3339 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3340 "TARGET_SIMD"
3341 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3342 [(set_attr "type" "neon_add_widen")]
3343 )
3344
3345 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3346 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3347 (plus:<VWIDE>
3348 (ANY_EXTEND:<VWIDE>
3349 (vec_select:<VHALF>
3350 (match_operand:VQW 2 "register_operand" "w")
3351 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3352 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3353 "TARGET_SIMD"
3354 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3355 [(set_attr "type" "neon_add_widen")]
3356 )
3357
3358 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3359 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3360 (plus:<VWIDE>
3361 (ANY_EXTEND:<VWIDE>
3362 (vec_select:<VHALF>
3363 (match_operand:VQW 2 "register_operand" "w")
3364 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3365 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3366 "TARGET_SIMD"
3367 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3368 [(set_attr "type" "neon_add_widen")]
3369 )
3370
3371 (define_expand "aarch64_saddw2<mode>"
3372 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3373 (match_operand:<VWIDE> 1 "register_operand" "w")
3374 (match_operand:VQW 2 "register_operand" "w")]
3375 "TARGET_SIMD"
3376 {
3377 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3378 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3379 operands[2], p));
3380 DONE;
3381 })
3382
3383 (define_expand "aarch64_uaddw2<mode>"
3384 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3385 (match_operand:<VWIDE> 1 "register_operand" "w")
3386 (match_operand:VQW 2 "register_operand" "w")]
3387 "TARGET_SIMD"
3388 {
3389 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3390 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3391 operands[2], p));
3392 DONE;
3393 })
3394
3395
3396 (define_expand "aarch64_ssubw2<mode>"
3397 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3398 (match_operand:<VWIDE> 1 "register_operand" "w")
3399 (match_operand:VQW 2 "register_operand" "w")]
3400 "TARGET_SIMD"
3401 {
3402 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3403 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3404 operands[2], p));
3405 DONE;
3406 })
3407
3408 (define_expand "aarch64_usubw2<mode>"
3409 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3410 (match_operand:<VWIDE> 1 "register_operand" "w")
3411 (match_operand:VQW 2 "register_operand" "w")]
3412 "TARGET_SIMD"
3413 {
3414 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3415 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3416 operands[2], p));
3417 DONE;
3418 })
3419
3420 ;; <su><r>h<addsub>.
3421
3422 (define_expand "<u>avg<mode>3_floor"
3423 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3424 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3425 (match_operand:VDQ_BHSI 2 "register_operand")]
3426 HADD))]
3427 "TARGET_SIMD"
3428 )
3429
3430 (define_expand "<u>avg<mode>3_ceil"
3431 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3432 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3433 (match_operand:VDQ_BHSI 2 "register_operand")]
3434 RHADD))]
3435 "TARGET_SIMD"
3436 )
3437
3438 (define_insn "aarch64_<sur>h<addsub><mode>"
3439 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3440 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3441 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3442 HADDSUB))]
3443 "TARGET_SIMD"
3444 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3445 [(set_attr "type" "neon_<addsub>_halve<q>")]
3446 )
3447
3448 ;; <r><addsub>hn<q>.
3449
3450 (define_insn "aarch64_<sur><addsub>hn<mode>"
3451 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3452 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3453 (match_operand:VQN 2 "register_operand" "w")]
3454 ADDSUBHN))]
3455 "TARGET_SIMD"
3456 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3457 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3458 )
3459
3460 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3461 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3462 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3463 (match_operand:VQN 2 "register_operand" "w")
3464 (match_operand:VQN 3 "register_operand" "w")]
3465 ADDSUBHN2))]
3466 "TARGET_SIMD"
3467 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3468 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3469 )
3470
3471 ;; pmul.
3472
3473 (define_insn "aarch64_pmul<mode>"
3474 [(set (match_operand:VB 0 "register_operand" "=w")
3475 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3476 (match_operand:VB 2 "register_operand" "w")]
3477 UNSPEC_PMUL))]
3478 "TARGET_SIMD"
3479 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3480 [(set_attr "type" "neon_mul_<Vetype><q>")]
3481 )
3482
3483 ;; fmulx.
3484
3485 (define_insn "aarch64_fmulx<mode>"
3486 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3487 (unspec:VHSDF_HSDF
3488 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3489 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3490 UNSPEC_FMULX))]
3491 "TARGET_SIMD"
3492 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3493 [(set_attr "type" "neon_fp_mul_<stype>")]
3494 )
3495
3496 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3497
3498 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3499 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3500 (unspec:VDQSF
3501 [(match_operand:VDQSF 1 "register_operand" "w")
3502 (vec_duplicate:VDQSF
3503 (vec_select:<VEL>
3504 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3505 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3506 UNSPEC_FMULX))]
3507 "TARGET_SIMD"
3508 {
3509 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3510 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3511 }
3512 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3513 )
3514
3515 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3516
3517 (define_insn "*aarch64_mulx_elt<mode>"
3518 [(set (match_operand:VDQF 0 "register_operand" "=w")
3519 (unspec:VDQF
3520 [(match_operand:VDQF 1 "register_operand" "w")
3521 (vec_duplicate:VDQF
3522 (vec_select:<VEL>
3523 (match_operand:VDQF 2 "register_operand" "w")
3524 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3525 UNSPEC_FMULX))]
3526 "TARGET_SIMD"
3527 {
3528 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3529 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3530 }
3531 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3532 )
3533
3534 ;; vmulxq_lane
3535
3536 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3537 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3538 (unspec:VHSDF
3539 [(match_operand:VHSDF 1 "register_operand" "w")
3540 (vec_duplicate:VHSDF
3541 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3542 UNSPEC_FMULX))]
3543 "TARGET_SIMD"
3544 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3545 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3546 )
3547
3548 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3549 ;; vmulxd_lane_f64 == vmulx_lane_f64
3550 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3551
3552 (define_insn "*aarch64_vgetfmulx<mode>"
3553 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3554 (unspec:<VEL>
3555 [(match_operand:<VEL> 1 "register_operand" "w")
3556 (vec_select:<VEL>
3557 (match_operand:VDQF 2 "register_operand" "w")
3558 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3559 UNSPEC_FMULX))]
3560 "TARGET_SIMD"
3561 {
3562 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3563 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3564 }
3565 [(set_attr "type" "fmul<Vetype>")]
3566 )
3567 ;; <su>q<addsub>
3568
3569 (define_insn "aarch64_<su_optab><optab><mode>"
3570 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3571 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3572 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3573 "TARGET_SIMD"
3574 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3575 [(set_attr "type" "neon_<optab><q>")]
3576 )
3577
3578 ;; suqadd and usqadd
3579
3580 (define_insn "aarch64_<sur>qadd<mode>"
3581 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3582 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3583 (match_operand:VSDQ_I 2 "register_operand" "w")]
3584 USSUQADD))]
3585 "TARGET_SIMD"
3586 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3587 [(set_attr "type" "neon_qadd<q>")]
3588 )
3589
3590 ;; sqmovun
3591
3592 (define_insn "aarch64_sqmovun<mode>"
3593 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3594 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3595 UNSPEC_SQXTUN))]
3596 "TARGET_SIMD"
3597 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3598 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3599 )
3600
3601 ;; sqmovn and uqmovn
3602
3603 (define_insn "aarch64_<sur>qmovn<mode>"
3604 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3605 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3606 SUQMOVN))]
3607 "TARGET_SIMD"
3608 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3609 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3610 )
3611
3612 ;; <su>q<absneg>
3613
3614 (define_insn "aarch64_s<optab><mode>"
3615 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3616 (UNQOPS:VSDQ_I
3617 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3618 "TARGET_SIMD"
3619 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3620 [(set_attr "type" "neon_<optab><q>")]
3621 )
3622
3623 ;; sq<r>dmulh.
3624
3625 (define_insn "aarch64_sq<r>dmulh<mode>"
3626 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3627 (unspec:VSDQ_HSI
3628 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3629 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3630 VQDMULH))]
3631 "TARGET_SIMD"
3632 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3633 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3634 )
3635
3636 ;; sq<r>dmulh_lane
3637
3638 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3639 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3640 (unspec:VDQHS
3641 [(match_operand:VDQHS 1 "register_operand" "w")
3642 (vec_select:<VEL>
3643 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3644 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3645 VQDMULH))]
3646 "TARGET_SIMD"
3647 "*
3648 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3649 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3650 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3651 )
3652
3653 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3654 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3655 (unspec:VDQHS
3656 [(match_operand:VDQHS 1 "register_operand" "w")
3657 (vec_select:<VEL>
3658 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3659 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3660 VQDMULH))]
3661 "TARGET_SIMD"
3662 "*
3663 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3664 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3665 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3666 )
3667
3668 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3669 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3670 (unspec:SD_HSI
3671 [(match_operand:SD_HSI 1 "register_operand" "w")
3672 (vec_select:<VEL>
3673 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3674 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3675 VQDMULH))]
3676 "TARGET_SIMD"
3677 "*
3678 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3679 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3680 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3681 )
3682
3683 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3684 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3685 (unspec:SD_HSI
3686 [(match_operand:SD_HSI 1 "register_operand" "w")
3687 (vec_select:<VEL>
3688 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3689 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3690 VQDMULH))]
3691 "TARGET_SIMD"
3692 "*
3693 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3694 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3695 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3696 )
3697
3698 ;; sqrdml[as]h.
3699
3700 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3701 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3702 (unspec:VSDQ_HSI
3703 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3704 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3705 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3706 SQRDMLH_AS))]
3707 "TARGET_SIMD_RDMA"
3708 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3709 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3710 )
3711
3712 ;; sqrdml[as]h_lane.
3713
3714 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3715 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3716 (unspec:VDQHS
3717 [(match_operand:VDQHS 1 "register_operand" "0")
3718 (match_operand:VDQHS 2 "register_operand" "w")
3719 (vec_select:<VEL>
3720 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3721 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3722 SQRDMLH_AS))]
3723 "TARGET_SIMD_RDMA"
3724 {
3725 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3726 return
3727 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3728 }
3729 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3730 )
3731
3732 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3733 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3734 (unspec:SD_HSI
3735 [(match_operand:SD_HSI 1 "register_operand" "0")
3736 (match_operand:SD_HSI 2 "register_operand" "w")
3737 (vec_select:<VEL>
3738 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3739 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3740 SQRDMLH_AS))]
3741 "TARGET_SIMD_RDMA"
3742 {
3743 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3744 return
3745 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3746 }
3747 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3748 )
3749
3750 ;; sqrdml[as]h_laneq.
3751
3752 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3753 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3754 (unspec:VDQHS
3755 [(match_operand:VDQHS 1 "register_operand" "0")
3756 (match_operand:VDQHS 2 "register_operand" "w")
3757 (vec_select:<VEL>
3758 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3759 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3760 SQRDMLH_AS))]
3761 "TARGET_SIMD_RDMA"
3762 {
3763 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3764 return
3765 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3766 }
3767 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3768 )
3769
3770 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3771 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3772 (unspec:SD_HSI
3773 [(match_operand:SD_HSI 1 "register_operand" "0")
3774 (match_operand:SD_HSI 2 "register_operand" "w")
3775 (vec_select:<VEL>
3776 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3777 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3778 SQRDMLH_AS))]
3779 "TARGET_SIMD_RDMA"
3780 {
3781 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3782 return
3783 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3784 }
3785 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3786 )
3787
3788 ;; vqdml[sa]l
3789
3790 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3791 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3792 (SBINQOPS:<VWIDE>
3793 (match_operand:<VWIDE> 1 "register_operand" "0")
3794 (ss_ashift:<VWIDE>
3795 (mult:<VWIDE>
3796 (sign_extend:<VWIDE>
3797 (match_operand:VSD_HSI 2 "register_operand" "w"))
3798 (sign_extend:<VWIDE>
3799 (match_operand:VSD_HSI 3 "register_operand" "w")))
3800 (const_int 1))))]
3801 "TARGET_SIMD"
3802 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3803 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3804 )
3805
3806 ;; vqdml[sa]l_lane
3807
3808 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3809 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3810 (SBINQOPS:<VWIDE>
3811 (match_operand:<VWIDE> 1 "register_operand" "0")
3812 (ss_ashift:<VWIDE>
3813 (mult:<VWIDE>
3814 (sign_extend:<VWIDE>
3815 (match_operand:VD_HSI 2 "register_operand" "w"))
3816 (sign_extend:<VWIDE>
3817 (vec_duplicate:VD_HSI
3818 (vec_select:<VEL>
3819 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3820 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3821 ))
3822 (const_int 1))))]
3823 "TARGET_SIMD"
3824 {
3825 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3826 return
3827 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3828 }
3829 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3830 )
3831
3832 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3833 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3834 (SBINQOPS:<VWIDE>
3835 (match_operand:<VWIDE> 1 "register_operand" "0")
3836 (ss_ashift:<VWIDE>
3837 (mult:<VWIDE>
3838 (sign_extend:<VWIDE>
3839 (match_operand:VD_HSI 2 "register_operand" "w"))
3840 (sign_extend:<VWIDE>
3841 (vec_duplicate:VD_HSI
3842 (vec_select:<VEL>
3843 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3844 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3845 ))
3846 (const_int 1))))]
3847 "TARGET_SIMD"
3848 {
3849 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3850 return
3851 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3852 }
3853 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3854 )
3855
3856 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3857 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3858 (SBINQOPS:<VWIDE>
3859 (match_operand:<VWIDE> 1 "register_operand" "0")
3860 (ss_ashift:<VWIDE>
3861 (mult:<VWIDE>
3862 (sign_extend:<VWIDE>
3863 (match_operand:SD_HSI 2 "register_operand" "w"))
3864 (sign_extend:<VWIDE>
3865 (vec_select:<VEL>
3866 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3867 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3868 )
3869 (const_int 1))))]
3870 "TARGET_SIMD"
3871 {
3872 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3873 return
3874 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3875 }
3876 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3877 )
3878
3879 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3880 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3881 (SBINQOPS:<VWIDE>
3882 (match_operand:<VWIDE> 1 "register_operand" "0")
3883 (ss_ashift:<VWIDE>
3884 (mult:<VWIDE>
3885 (sign_extend:<VWIDE>
3886 (match_operand:SD_HSI 2 "register_operand" "w"))
3887 (sign_extend:<VWIDE>
3888 (vec_select:<VEL>
3889 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3890 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3891 )
3892 (const_int 1))))]
3893 "TARGET_SIMD"
3894 {
3895 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3896 return
3897 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3898 }
3899 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3900 )
3901
3902 ;; vqdml[sa]l_n
3903
3904 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3905 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3906 (SBINQOPS:<VWIDE>
3907 (match_operand:<VWIDE> 1 "register_operand" "0")
3908 (ss_ashift:<VWIDE>
3909 (mult:<VWIDE>
3910 (sign_extend:<VWIDE>
3911 (match_operand:VD_HSI 2 "register_operand" "w"))
3912 (sign_extend:<VWIDE>
3913 (vec_duplicate:VD_HSI
3914 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3915 (const_int 1))))]
3916 "TARGET_SIMD"
3917 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3918 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3919 )
3920
3921 ;; sqdml[as]l2
3922
3923 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3924 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3925 (SBINQOPS:<VWIDE>
3926 (match_operand:<VWIDE> 1 "register_operand" "0")
3927 (ss_ashift:<VWIDE>
3928 (mult:<VWIDE>
3929 (sign_extend:<VWIDE>
3930 (vec_select:<VHALF>
3931 (match_operand:VQ_HSI 2 "register_operand" "w")
3932 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3933 (sign_extend:<VWIDE>
3934 (vec_select:<VHALF>
3935 (match_operand:VQ_HSI 3 "register_operand" "w")
3936 (match_dup 4))))
3937 (const_int 1))))]
3938 "TARGET_SIMD"
3939 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3940 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3941 )
3942
3943 (define_expand "aarch64_sqdmlal2<mode>"
3944 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3945 (match_operand:<VWIDE> 1 "register_operand" "w")
3946 (match_operand:VQ_HSI 2 "register_operand" "w")
3947 (match_operand:VQ_HSI 3 "register_operand" "w")]
3948 "TARGET_SIMD"
3949 {
3950 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3951 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3952 operands[2], operands[3], p));
3953 DONE;
3954 })
3955
3956 (define_expand "aarch64_sqdmlsl2<mode>"
3957 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3958 (match_operand:<VWIDE> 1 "register_operand" "w")
3959 (match_operand:VQ_HSI 2 "register_operand" "w")
3960 (match_operand:VQ_HSI 3 "register_operand" "w")]
3961 "TARGET_SIMD"
3962 {
3963 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3964 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3965 operands[2], operands[3], p));
3966 DONE;
3967 })
3968
3969 ;; vqdml[sa]l2_lane
3970
3971 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3972 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3973 (SBINQOPS:<VWIDE>
3974 (match_operand:<VWIDE> 1 "register_operand" "0")
3975 (ss_ashift:<VWIDE>
3976 (mult:<VWIDE>
3977 (sign_extend:<VWIDE>
3978 (vec_select:<VHALF>
3979 (match_operand:VQ_HSI 2 "register_operand" "w")
3980 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3981 (sign_extend:<VWIDE>
3982 (vec_duplicate:<VHALF>
3983 (vec_select:<VEL>
3984 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3985 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3986 ))))
3987 (const_int 1))))]
3988 "TARGET_SIMD"
3989 {
3990 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3991 return
3992 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3993 }
3994 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3995 )
3996
3997 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3998 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3999 (SBINQOPS:<VWIDE>
4000 (match_operand:<VWIDE> 1 "register_operand" "0")
4001 (ss_ashift:<VWIDE>
4002 (mult:<VWIDE>
4003 (sign_extend:<VWIDE>
4004 (vec_select:<VHALF>
4005 (match_operand:VQ_HSI 2 "register_operand" "w")
4006 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4007 (sign_extend:<VWIDE>
4008 (vec_duplicate:<VHALF>
4009 (vec_select:<VEL>
4010 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4011 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4012 ))))
4013 (const_int 1))))]
4014 "TARGET_SIMD"
4015 {
4016 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4017 return
4018 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4019 }
4020 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4021 )
4022
4023 (define_expand "aarch64_sqdmlal2_lane<mode>"
4024 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4025 (match_operand:<VWIDE> 1 "register_operand" "w")
4026 (match_operand:VQ_HSI 2 "register_operand" "w")
4027 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4028 (match_operand:SI 4 "immediate_operand" "i")]
4029 "TARGET_SIMD"
4030 {
4031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4032 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4033 operands[2], operands[3],
4034 operands[4], p));
4035 DONE;
4036 })
4037
4038 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4039 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4040 (match_operand:<VWIDE> 1 "register_operand" "w")
4041 (match_operand:VQ_HSI 2 "register_operand" "w")
4042 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4043 (match_operand:SI 4 "immediate_operand" "i")]
4044 "TARGET_SIMD"
4045 {
4046 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4047 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4048 operands[2], operands[3],
4049 operands[4], p));
4050 DONE;
4051 })
4052
4053 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4054 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4055 (match_operand:<VWIDE> 1 "register_operand" "w")
4056 (match_operand:VQ_HSI 2 "register_operand" "w")
4057 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4058 (match_operand:SI 4 "immediate_operand" "i")]
4059 "TARGET_SIMD"
4060 {
4061 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4062 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4063 operands[2], operands[3],
4064 operands[4], p));
4065 DONE;
4066 })
4067
4068 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4069 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4070 (match_operand:<VWIDE> 1 "register_operand" "w")
4071 (match_operand:VQ_HSI 2 "register_operand" "w")
4072 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4073 (match_operand:SI 4 "immediate_operand" "i")]
4074 "TARGET_SIMD"
4075 {
4076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4077 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4078 operands[2], operands[3],
4079 operands[4], p));
4080 DONE;
4081 })
4082
4083 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4084 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4085 (SBINQOPS:<VWIDE>
4086 (match_operand:<VWIDE> 1 "register_operand" "0")
4087 (ss_ashift:<VWIDE>
4088 (mult:<VWIDE>
4089 (sign_extend:<VWIDE>
4090 (vec_select:<VHALF>
4091 (match_operand:VQ_HSI 2 "register_operand" "w")
4092 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4093 (sign_extend:<VWIDE>
4094 (vec_duplicate:<VHALF>
4095 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4096 (const_int 1))))]
4097 "TARGET_SIMD"
4098 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4099 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4100 )
4101
4102 (define_expand "aarch64_sqdmlal2_n<mode>"
4103 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4104 (match_operand:<VWIDE> 1 "register_operand" "w")
4105 (match_operand:VQ_HSI 2 "register_operand" "w")
4106 (match_operand:<VEL> 3 "register_operand" "w")]
4107 "TARGET_SIMD"
4108 {
4109 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4110 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4111 operands[2], operands[3],
4112 p));
4113 DONE;
4114 })
4115
4116 (define_expand "aarch64_sqdmlsl2_n<mode>"
4117 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4118 (match_operand:<VWIDE> 1 "register_operand" "w")
4119 (match_operand:VQ_HSI 2 "register_operand" "w")
4120 (match_operand:<VEL> 3 "register_operand" "w")]
4121 "TARGET_SIMD"
4122 {
4123 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4124 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4125 operands[2], operands[3],
4126 p));
4127 DONE;
4128 })
4129
4130 ;; vqdmull
4131
4132 (define_insn "aarch64_sqdmull<mode>"
4133 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4134 (ss_ashift:<VWIDE>
4135 (mult:<VWIDE>
4136 (sign_extend:<VWIDE>
4137 (match_operand:VSD_HSI 1 "register_operand" "w"))
4138 (sign_extend:<VWIDE>
4139 (match_operand:VSD_HSI 2 "register_operand" "w")))
4140 (const_int 1)))]
4141 "TARGET_SIMD"
4142 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4143 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4144 )
4145
4146 ;; vqdmull_lane
4147
4148 (define_insn "aarch64_sqdmull_lane<mode>"
4149 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4150 (ss_ashift:<VWIDE>
4151 (mult:<VWIDE>
4152 (sign_extend:<VWIDE>
4153 (match_operand:VD_HSI 1 "register_operand" "w"))
4154 (sign_extend:<VWIDE>
4155 (vec_duplicate:VD_HSI
4156 (vec_select:<VEL>
4157 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4158 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4159 ))
4160 (const_int 1)))]
4161 "TARGET_SIMD"
4162 {
4163 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4164 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4165 }
4166 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4167 )
4168
4169 (define_insn "aarch64_sqdmull_laneq<mode>"
4170 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4171 (ss_ashift:<VWIDE>
4172 (mult:<VWIDE>
4173 (sign_extend:<VWIDE>
4174 (match_operand:VD_HSI 1 "register_operand" "w"))
4175 (sign_extend:<VWIDE>
4176 (vec_duplicate:VD_HSI
4177 (vec_select:<VEL>
4178 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4179 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4180 ))
4181 (const_int 1)))]
4182 "TARGET_SIMD"
4183 {
4184 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4185 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4186 }
4187 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4188 )
4189
4190 (define_insn "aarch64_sqdmull_lane<mode>"
4191 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4192 (ss_ashift:<VWIDE>
4193 (mult:<VWIDE>
4194 (sign_extend:<VWIDE>
4195 (match_operand:SD_HSI 1 "register_operand" "w"))
4196 (sign_extend:<VWIDE>
4197 (vec_select:<VEL>
4198 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4199 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4200 ))
4201 (const_int 1)))]
4202 "TARGET_SIMD"
4203 {
4204 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4205 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4206 }
4207 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4208 )
4209
4210 (define_insn "aarch64_sqdmull_laneq<mode>"
4211 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4212 (ss_ashift:<VWIDE>
4213 (mult:<VWIDE>
4214 (sign_extend:<VWIDE>
4215 (match_operand:SD_HSI 1 "register_operand" "w"))
4216 (sign_extend:<VWIDE>
4217 (vec_select:<VEL>
4218 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4219 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4220 ))
4221 (const_int 1)))]
4222 "TARGET_SIMD"
4223 {
4224 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4225 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4226 }
4227 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4228 )
4229
4230 ;; vqdmull_n
4231
4232 (define_insn "aarch64_sqdmull_n<mode>"
4233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234 (ss_ashift:<VWIDE>
4235 (mult:<VWIDE>
4236 (sign_extend:<VWIDE>
4237 (match_operand:VD_HSI 1 "register_operand" "w"))
4238 (sign_extend:<VWIDE>
4239 (vec_duplicate:VD_HSI
4240 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4241 )
4242 (const_int 1)))]
4243 "TARGET_SIMD"
4244 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4245 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4246 )
4247
4248 ;; vqdmull2
4249
4250
4251
4252 (define_insn "aarch64_sqdmull2<mode>_internal"
4253 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4254 (ss_ashift:<VWIDE>
4255 (mult:<VWIDE>
4256 (sign_extend:<VWIDE>
4257 (vec_select:<VHALF>
4258 (match_operand:VQ_HSI 1 "register_operand" "w")
4259 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4260 (sign_extend:<VWIDE>
4261 (vec_select:<VHALF>
4262 (match_operand:VQ_HSI 2 "register_operand" "w")
4263 (match_dup 3)))
4264 )
4265 (const_int 1)))]
4266 "TARGET_SIMD"
4267 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4268 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4269 )
4270
4271 (define_expand "aarch64_sqdmull2<mode>"
4272 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4273 (match_operand:VQ_HSI 1 "register_operand" "w")
4274 (match_operand:VQ_HSI 2 "register_operand" "w")]
4275 "TARGET_SIMD"
4276 {
4277 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4278 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4279 operands[2], p));
4280 DONE;
4281 })
4282
4283 ;; vqdmull2_lane
4284
4285 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4286 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4287 (ss_ashift:<VWIDE>
4288 (mult:<VWIDE>
4289 (sign_extend:<VWIDE>
4290 (vec_select:<VHALF>
4291 (match_operand:VQ_HSI 1 "register_operand" "w")
4292 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4293 (sign_extend:<VWIDE>
4294 (vec_duplicate:<VHALF>
4295 (vec_select:<VEL>
4296 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4297 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4298 ))
4299 (const_int 1)))]
4300 "TARGET_SIMD"
4301 {
4302 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4303 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4304 }
4305 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4306 )
4307
4308 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4309 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4310 (ss_ashift:<VWIDE>
4311 (mult:<VWIDE>
4312 (sign_extend:<VWIDE>
4313 (vec_select:<VHALF>
4314 (match_operand:VQ_HSI 1 "register_operand" "w")
4315 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4316 (sign_extend:<VWIDE>
4317 (vec_duplicate:<VHALF>
4318 (vec_select:<VEL>
4319 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4320 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4321 ))
4322 (const_int 1)))]
4323 "TARGET_SIMD"
4324 {
4325 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4326 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4327 }
4328 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4329 )
4330
4331 (define_expand "aarch64_sqdmull2_lane<mode>"
4332 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4333 (match_operand:VQ_HSI 1 "register_operand" "w")
4334 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4335 (match_operand:SI 3 "immediate_operand" "i")]
4336 "TARGET_SIMD"
4337 {
4338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4339 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4340 operands[2], operands[3],
4341 p));
4342 DONE;
4343 })
4344
4345 (define_expand "aarch64_sqdmull2_laneq<mode>"
4346 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4347 (match_operand:VQ_HSI 1 "register_operand" "w")
4348 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4349 (match_operand:SI 3 "immediate_operand" "i")]
4350 "TARGET_SIMD"
4351 {
4352 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4353 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4354 operands[2], operands[3],
4355 p));
4356 DONE;
4357 })
4358
4359 ;; vqdmull2_n
4360
4361 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4362 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4363 (ss_ashift:<VWIDE>
4364 (mult:<VWIDE>
4365 (sign_extend:<VWIDE>
4366 (vec_select:<VHALF>
4367 (match_operand:VQ_HSI 1 "register_operand" "w")
4368 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4369 (sign_extend:<VWIDE>
4370 (vec_duplicate:<VHALF>
4371 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4372 )
4373 (const_int 1)))]
4374 "TARGET_SIMD"
4375 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4376 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4377 )
4378
4379 (define_expand "aarch64_sqdmull2_n<mode>"
4380 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4381 (match_operand:VQ_HSI 1 "register_operand" "w")
4382 (match_operand:<VEL> 2 "register_operand" "w")]
4383 "TARGET_SIMD"
4384 {
4385 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4386 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4387 operands[2], p));
4388 DONE;
4389 })
4390
4391 ;; vshl
4392
4393 (define_insn "aarch64_<sur>shl<mode>"
4394 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4395 (unspec:VSDQ_I_DI
4396 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4397 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4398 VSHL))]
4399 "TARGET_SIMD"
4400 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4401 [(set_attr "type" "neon_shift_reg<q>")]
4402 )
4403
4404
4405 ;; vqshl
4406
4407 (define_insn "aarch64_<sur>q<r>shl<mode>"
4408 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4409 (unspec:VSDQ_I
4410 [(match_operand:VSDQ_I 1 "register_operand" "w")
4411 (match_operand:VSDQ_I 2 "register_operand" "w")]
4412 VQSHL))]
4413 "TARGET_SIMD"
4414 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4415 [(set_attr "type" "neon_sat_shift_reg<q>")]
4416 )
4417
4418 ;; vshll_n
4419
4420 (define_insn "aarch64_<sur>shll_n<mode>"
4421 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4422 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4423 (match_operand:SI 2
4424 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4425 VSHLL))]
4426 "TARGET_SIMD"
4427 {
4428 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4429 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4430 else
4431 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4432 }
4433 [(set_attr "type" "neon_shift_imm_long")]
4434 )
4435
4436 ;; vshll_high_n
4437
4438 (define_insn "aarch64_<sur>shll2_n<mode>"
4439 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4440 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4441 (match_operand:SI 2 "immediate_operand" "i")]
4442 VSHLL))]
4443 "TARGET_SIMD"
4444 {
4445 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4446 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4447 else
4448 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4449 }
4450 [(set_attr "type" "neon_shift_imm_long")]
4451 )
4452
4453 ;; vrshr_n
4454
4455 (define_insn "aarch64_<sur>shr_n<mode>"
4456 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4457 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4458 (match_operand:SI 2
4459 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4460 VRSHR_N))]
4461 "TARGET_SIMD"
4462 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4463 [(set_attr "type" "neon_sat_shift_imm<q>")]
4464 )
4465
4466 ;; v(r)sra_n
4467
4468 (define_insn "aarch64_<sur>sra_n<mode>"
4469 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4470 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4471 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4472 (match_operand:SI 3
4473 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4474 VSRA))]
4475 "TARGET_SIMD"
4476 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4477 [(set_attr "type" "neon_shift_acc<q>")]
4478 )
4479
4480 ;; vs<lr>i_n
4481
4482 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4483 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4484 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4485 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4486 (match_operand:SI 3
4487 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4488 VSLRI))]
4489 "TARGET_SIMD"
4490 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4491 [(set_attr "type" "neon_shift_imm<q>")]
4492 )
4493
4494 ;; vqshl(u)
4495
4496 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4497 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4498 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4499 (match_operand:SI 2
4500 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4501 VQSHL_N))]
4502 "TARGET_SIMD"
4503 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4504 [(set_attr "type" "neon_sat_shift_imm<q>")]
4505 )
4506
4507
4508 ;; vq(r)shr(u)n_n
4509
4510 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4511 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4512 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4513 (match_operand:SI 2
4514 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4515 VQSHRN_N))]
4516 "TARGET_SIMD"
4517 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4518 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4519 )
4520
4521
4522 ;; cm(eq|ge|gt|lt|le)
4523 ;; Note, we have constraints for Dz and Z as different expanders
4524 ;; have different ideas of what should be passed to this pattern.
4525
4526 (define_insn "aarch64_cm<optab><mode>"
4527 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4528 (neg:<V_INT_EQUIV>
4529 (COMPARISONS:<V_INT_EQUIV>
4530 (match_operand:VDQ_I 1 "register_operand" "w,w")
4531 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4532 )))]
4533 "TARGET_SIMD"
4534 "@
4535 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4536 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4537 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4538 )
4539
4540 (define_insn_and_split "aarch64_cm<optab>di"
4541 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4542 (neg:DI
4543 (COMPARISONS:DI
4544 (match_operand:DI 1 "register_operand" "w,w,r")
4545 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4546 )))
4547 (clobber (reg:CC CC_REGNUM))]
4548 "TARGET_SIMD"
4549 "#"
4550 "&& reload_completed"
4551 [(set (match_operand:DI 0 "register_operand")
4552 (neg:DI
4553 (COMPARISONS:DI
4554 (match_operand:DI 1 "register_operand")
4555 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4556 )))]
4557 {
4558 /* If we are in the general purpose register file,
4559 we split to a sequence of comparison and store. */
4560 if (GP_REGNUM_P (REGNO (operands[0]))
4561 && GP_REGNUM_P (REGNO (operands[1])))
4562 {
4563 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4564 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4565 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4566 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4567 DONE;
4568 }
4569 /* Otherwise, we expand to a similar pattern which does not
4570 clobber CC_REGNUM. */
4571 }
4572 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4573 )
4574
4575 (define_insn "*aarch64_cm<optab>di"
4576 [(set (match_operand:DI 0 "register_operand" "=w,w")
4577 (neg:DI
4578 (COMPARISONS:DI
4579 (match_operand:DI 1 "register_operand" "w,w")
4580 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4581 )))]
4582 "TARGET_SIMD && reload_completed"
4583 "@
4584 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4585 cm<optab>\t%d0, %d1, #0"
4586 [(set_attr "type" "neon_compare, neon_compare_zero")]
4587 )
4588
4589 ;; cm(hs|hi)
4590
4591 (define_insn "aarch64_cm<optab><mode>"
4592 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4593 (neg:<V_INT_EQUIV>
4594 (UCOMPARISONS:<V_INT_EQUIV>
4595 (match_operand:VDQ_I 1 "register_operand" "w")
4596 (match_operand:VDQ_I 2 "register_operand" "w")
4597 )))]
4598 "TARGET_SIMD"
4599 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4600 [(set_attr "type" "neon_compare<q>")]
4601 )
4602
4603 (define_insn_and_split "aarch64_cm<optab>di"
4604 [(set (match_operand:DI 0 "register_operand" "=w,r")
4605 (neg:DI
4606 (UCOMPARISONS:DI
4607 (match_operand:DI 1 "register_operand" "w,r")
4608 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4609 )))
4610 (clobber (reg:CC CC_REGNUM))]
4611 "TARGET_SIMD"
4612 "#"
4613 "&& reload_completed"
4614 [(set (match_operand:DI 0 "register_operand")
4615 (neg:DI
4616 (UCOMPARISONS:DI
4617 (match_operand:DI 1 "register_operand")
4618 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4619 )))]
4620 {
4621 /* If we are in the general purpose register file,
4622 we split to a sequence of comparison and store. */
4623 if (GP_REGNUM_P (REGNO (operands[0]))
4624 && GP_REGNUM_P (REGNO (operands[1])))
4625 {
4626 machine_mode mode = CCmode;
4627 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4628 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4629 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4630 DONE;
4631 }
4632 /* Otherwise, we expand to a similar pattern which does not
4633 clobber CC_REGNUM. */
4634 }
4635 [(set_attr "type" "neon_compare,multiple")]
4636 )
4637
4638 (define_insn "*aarch64_cm<optab>di"
4639 [(set (match_operand:DI 0 "register_operand" "=w")
4640 (neg:DI
4641 (UCOMPARISONS:DI
4642 (match_operand:DI 1 "register_operand" "w")
4643 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4644 )))]
4645 "TARGET_SIMD && reload_completed"
4646 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4647 [(set_attr "type" "neon_compare")]
4648 )
4649
4650 ;; cmtst
4651
4652 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4653 ;; we don't have any insns using ne, and aarch64_vcond outputs
4654 ;; not (neg (eq (and x y) 0))
4655 ;; which is rewritten by simplify_rtx as
4656 ;; plus (eq (and x y) 0) -1.
4657
4658 (define_insn "aarch64_cmtst<mode>"
4659 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4660 (plus:<V_INT_EQUIV>
4661 (eq:<V_INT_EQUIV>
4662 (and:VDQ_I
4663 (match_operand:VDQ_I 1 "register_operand" "w")
4664 (match_operand:VDQ_I 2 "register_operand" "w"))
4665 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4666 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4667 ]
4668 "TARGET_SIMD"
4669 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4670 [(set_attr "type" "neon_tst<q>")]
4671 )
4672
4673 (define_insn_and_split "aarch64_cmtstdi"
4674 [(set (match_operand:DI 0 "register_operand" "=w,r")
4675 (neg:DI
4676 (ne:DI
4677 (and:DI
4678 (match_operand:DI 1 "register_operand" "w,r")
4679 (match_operand:DI 2 "register_operand" "w,r"))
4680 (const_int 0))))
4681 (clobber (reg:CC CC_REGNUM))]
4682 "TARGET_SIMD"
4683 "#"
4684 "&& reload_completed"
4685 [(set (match_operand:DI 0 "register_operand")
4686 (neg:DI
4687 (ne:DI
4688 (and:DI
4689 (match_operand:DI 1 "register_operand")
4690 (match_operand:DI 2 "register_operand"))
4691 (const_int 0))))]
4692 {
4693 /* If we are in the general purpose register file,
4694 we split to a sequence of comparison and store. */
4695 if (GP_REGNUM_P (REGNO (operands[0]))
4696 && GP_REGNUM_P (REGNO (operands[1])))
4697 {
4698 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4699 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4700 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4701 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4702 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4703 DONE;
4704 }
4705 /* Otherwise, we expand to a similar pattern which does not
4706 clobber CC_REGNUM. */
4707 }
4708 [(set_attr "type" "neon_tst,multiple")]
4709 )
4710
4711 (define_insn "*aarch64_cmtstdi"
4712 [(set (match_operand:DI 0 "register_operand" "=w")
4713 (neg:DI
4714 (ne:DI
4715 (and:DI
4716 (match_operand:DI 1 "register_operand" "w")
4717 (match_operand:DI 2 "register_operand" "w"))
4718 (const_int 0))))]
4719 "TARGET_SIMD"
4720 "cmtst\t%d0, %d1, %d2"
4721 [(set_attr "type" "neon_tst")]
4722 )
4723
4724 ;; fcm(eq|ge|gt|le|lt)
4725
4726 (define_insn "aarch64_cm<optab><mode>"
4727 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4728 (neg:<V_INT_EQUIV>
4729 (COMPARISONS:<V_INT_EQUIV>
4730 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4731 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4732 )))]
4733 "TARGET_SIMD"
4734 "@
4735 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4736 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4737 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4738 )
4739
4740 ;; fac(ge|gt)
4741 ;; Note we can also handle what would be fac(le|lt) by
4742 ;; generating fac(ge|gt).
4743
4744 (define_insn "aarch64_fac<optab><mode>"
4745 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4746 (neg:<V_INT_EQUIV>
4747 (FAC_COMPARISONS:<V_INT_EQUIV>
4748 (abs:VHSDF_HSDF
4749 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4750 (abs:VHSDF_HSDF
4751 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4752 )))]
4753 "TARGET_SIMD"
4754 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4755 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4756 )
4757
4758 ;; addp
4759
4760 (define_insn "aarch64_addp<mode>"
4761 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4762 (unspec:VD_BHSI
4763 [(match_operand:VD_BHSI 1 "register_operand" "w")
4764 (match_operand:VD_BHSI 2 "register_operand" "w")]
4765 UNSPEC_ADDP))]
4766 "TARGET_SIMD"
4767 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4768 [(set_attr "type" "neon_reduc_add<q>")]
4769 )
4770
4771 (define_insn "aarch64_addpdi"
4772 [(set (match_operand:DI 0 "register_operand" "=w")
4773 (unspec:DI
4774 [(match_operand:V2DI 1 "register_operand" "w")]
4775 UNSPEC_ADDP))]
4776 "TARGET_SIMD"
4777 "addp\t%d0, %1.2d"
4778 [(set_attr "type" "neon_reduc_add")]
4779 )
4780
4781 ;; sqrt
4782
4783 (define_expand "sqrt<mode>2"
4784 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4785 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4786 "TARGET_SIMD"
4787 {
4788 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4789 DONE;
4790 })
4791
4792 (define_insn "*sqrt<mode>2"
4793 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4794 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4795 "TARGET_SIMD"
4796 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4797 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4798 )
4799
4800 ;; Patterns for vector struct loads and stores.
4801
4802 (define_insn "aarch64_simd_ld2<mode>"
4803 [(set (match_operand:OI 0 "register_operand" "=w")
4804 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4805 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4806 UNSPEC_LD2))]
4807 "TARGET_SIMD"
4808 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4809 [(set_attr "type" "neon_load2_2reg<q>")]
4810 )
4811
4812 (define_insn "aarch64_simd_ld2r<mode>"
4813 [(set (match_operand:OI 0 "register_operand" "=w")
4814 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4815 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4816 UNSPEC_LD2_DUP))]
4817 "TARGET_SIMD"
4818 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4819 [(set_attr "type" "neon_load2_all_lanes<q>")]
4820 )
4821
4822 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4823 [(set (match_operand:OI 0 "register_operand" "=w")
4824 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4825 (match_operand:OI 2 "register_operand" "0")
4826 (match_operand:SI 3 "immediate_operand" "i")
4827 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4828 UNSPEC_LD2_LANE))]
4829 "TARGET_SIMD"
4830 {
4831 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4832 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4833 }
4834 [(set_attr "type" "neon_load2_one_lane")]
4835 )
4836
4837 (define_expand "vec_load_lanesoi<mode>"
4838 [(set (match_operand:OI 0 "register_operand" "=w")
4839 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4840 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4841 UNSPEC_LD2))]
4842 "TARGET_SIMD"
4843 {
4844 if (BYTES_BIG_ENDIAN)
4845 {
4846 rtx tmp = gen_reg_rtx (OImode);
4847 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4848 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4849 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4850 }
4851 else
4852 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4853 DONE;
4854 })
4855
4856 (define_insn "aarch64_simd_st2<mode>"
4857 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4858 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4859 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4860 UNSPEC_ST2))]
4861 "TARGET_SIMD"
4862 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4863 [(set_attr "type" "neon_store2_2reg<q>")]
4864 )
4865
4866 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4867 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4868 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4869 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4870 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4871 (match_operand:SI 2 "immediate_operand" "i")]
4872 UNSPEC_ST2_LANE))]
4873 "TARGET_SIMD"
4874 {
4875 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4876 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4877 }
4878 [(set_attr "type" "neon_store2_one_lane<q>")]
4879 )
4880
4881 (define_expand "vec_store_lanesoi<mode>"
4882 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4883 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4884 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4885 UNSPEC_ST2))]
4886 "TARGET_SIMD"
4887 {
4888 if (BYTES_BIG_ENDIAN)
4889 {
4890 rtx tmp = gen_reg_rtx (OImode);
4891 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4892 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4893 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4894 }
4895 else
4896 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4897 DONE;
4898 })
4899
4900 (define_insn "aarch64_simd_ld3<mode>"
4901 [(set (match_operand:CI 0 "register_operand" "=w")
4902 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4903 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4904 UNSPEC_LD3))]
4905 "TARGET_SIMD"
4906 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4907 [(set_attr "type" "neon_load3_3reg<q>")]
4908 )
4909
4910 (define_insn "aarch64_simd_ld3r<mode>"
4911 [(set (match_operand:CI 0 "register_operand" "=w")
4912 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4913 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4914 UNSPEC_LD3_DUP))]
4915 "TARGET_SIMD"
4916 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4917 [(set_attr "type" "neon_load3_all_lanes<q>")]
4918 )
4919
4920 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4921 [(set (match_operand:CI 0 "register_operand" "=w")
4922 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4923 (match_operand:CI 2 "register_operand" "0")
4924 (match_operand:SI 3 "immediate_operand" "i")
4925 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926 UNSPEC_LD3_LANE))]
4927 "TARGET_SIMD"
4928 {
4929 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4930 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4931 }
4932 [(set_attr "type" "neon_load3_one_lane")]
4933 )
4934
4935 (define_expand "vec_load_lanesci<mode>"
4936 [(set (match_operand:CI 0 "register_operand" "=w")
4937 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4938 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4939 UNSPEC_LD3))]
4940 "TARGET_SIMD"
4941 {
4942 if (BYTES_BIG_ENDIAN)
4943 {
4944 rtx tmp = gen_reg_rtx (CImode);
4945 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4946 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4947 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4948 }
4949 else
4950 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4951 DONE;
4952 })
4953
4954 (define_insn "aarch64_simd_st3<mode>"
4955 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4956 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4957 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4958 UNSPEC_ST3))]
4959 "TARGET_SIMD"
4960 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4961 [(set_attr "type" "neon_store3_3reg<q>")]
4962 )
4963
4964 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4965 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4966 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4967 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4968 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4969 (match_operand:SI 2 "immediate_operand" "i")]
4970 UNSPEC_ST3_LANE))]
4971 "TARGET_SIMD"
4972 {
4973 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4974 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4975 }
4976 [(set_attr "type" "neon_store3_one_lane<q>")]
4977 )
4978
4979 (define_expand "vec_store_lanesci<mode>"
4980 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4981 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4982 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4983 UNSPEC_ST3))]
4984 "TARGET_SIMD"
4985 {
4986 if (BYTES_BIG_ENDIAN)
4987 {
4988 rtx tmp = gen_reg_rtx (CImode);
4989 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4990 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4991 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4992 }
4993 else
4994 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4995 DONE;
4996 })
4997
4998 (define_insn "aarch64_simd_ld4<mode>"
4999 [(set (match_operand:XI 0 "register_operand" "=w")
5000 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5001 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5002 UNSPEC_LD4))]
5003 "TARGET_SIMD"
5004 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5005 [(set_attr "type" "neon_load4_4reg<q>")]
5006 )
5007
5008 (define_insn "aarch64_simd_ld4r<mode>"
5009 [(set (match_operand:XI 0 "register_operand" "=w")
5010 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5011 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5012 UNSPEC_LD4_DUP))]
5013 "TARGET_SIMD"
5014 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5015 [(set_attr "type" "neon_load4_all_lanes<q>")]
5016 )
5017
5018 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5019 [(set (match_operand:XI 0 "register_operand" "=w")
5020 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5021 (match_operand:XI 2 "register_operand" "0")
5022 (match_operand:SI 3 "immediate_operand" "i")
5023 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5024 UNSPEC_LD4_LANE))]
5025 "TARGET_SIMD"
5026 {
5027 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5028 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5029 }
5030 [(set_attr "type" "neon_load4_one_lane")]
5031 )
5032
5033 (define_expand "vec_load_lanesxi<mode>"
5034 [(set (match_operand:XI 0 "register_operand" "=w")
5035 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5036 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5037 UNSPEC_LD4))]
5038 "TARGET_SIMD"
5039 {
5040 if (BYTES_BIG_ENDIAN)
5041 {
5042 rtx tmp = gen_reg_rtx (XImode);
5043 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5044 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5045 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5046 }
5047 else
5048 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5049 DONE;
5050 })
5051
5052 (define_insn "aarch64_simd_st4<mode>"
5053 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5054 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5055 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5056 UNSPEC_ST4))]
5057 "TARGET_SIMD"
5058 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5059 [(set_attr "type" "neon_store4_4reg<q>")]
5060 )
5061
5062 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5063 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5064 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5065 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5066 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5067 (match_operand:SI 2 "immediate_operand" "i")]
5068 UNSPEC_ST4_LANE))]
5069 "TARGET_SIMD"
5070 {
5071 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5072 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5073 }
5074 [(set_attr "type" "neon_store4_one_lane<q>")]
5075 )
5076
5077 (define_expand "vec_store_lanesxi<mode>"
5078 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5079 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5080 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5081 UNSPEC_ST4))]
5082 "TARGET_SIMD"
5083 {
5084 if (BYTES_BIG_ENDIAN)
5085 {
5086 rtx tmp = gen_reg_rtx (XImode);
5087 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5088 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5089 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5090 }
5091 else
5092 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5093 DONE;
5094 })
5095
5096 (define_insn_and_split "aarch64_rev_reglist<mode>"
5097 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5098 (unspec:VSTRUCT
5099 [(match_operand:VSTRUCT 1 "register_operand" "w")
5100 (match_operand:V16QI 2 "register_operand" "w")]
5101 UNSPEC_REV_REGLIST))]
5102 "TARGET_SIMD"
5103 "#"
5104 "&& reload_completed"
5105 [(const_int 0)]
5106 {
5107 int i;
5108 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5109 for (i = 0; i < nregs; i++)
5110 {
5111 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5112 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5113 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5114 }
5115 DONE;
5116 }
5117 [(set_attr "type" "neon_tbl1_q")
5118 (set_attr "length" "<insn_count>")]
5119 )
5120
5121 ;; Reload patterns for AdvSIMD register list operands.
5122
5123 (define_expand "mov<mode>"
5124 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5125 (match_operand:VSTRUCT 1 "general_operand" ""))]
5126 "TARGET_SIMD"
5127 {
5128 if (can_create_pseudo_p ())
5129 {
5130 if (GET_CODE (operands[0]) != REG)
5131 operands[1] = force_reg (<MODE>mode, operands[1]);
5132 }
5133 })
5134
5135
5136 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5137 [(match_operand:CI 0 "register_operand" "=w")
5138 (match_operand:DI 1 "register_operand" "r")
5139 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140 "TARGET_SIMD"
5141 {
5142 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5143 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5144 DONE;
5145 })
5146
5147 (define_insn "aarch64_ld1_x3_<mode>"
5148 [(set (match_operand:CI 0 "register_operand" "=w")
5149 (unspec:CI
5150 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5151 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5152 "TARGET_SIMD"
5153 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5154 [(set_attr "type" "neon_load1_3reg<q>")]
5155 )
5156
5157 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5158 [(match_operand:DI 0 "register_operand" "")
5159 (match_operand:OI 1 "register_operand" "")
5160 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5161 "TARGET_SIMD"
5162 {
5163 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5164 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5165 DONE;
5166 })
5167
5168 (define_insn "aarch64_st1_x2_<mode>"
5169 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5170 (unspec:OI
5171 [(match_operand:OI 1 "register_operand" "w")
5172 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5173 "TARGET_SIMD"
5174 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5175 [(set_attr "type" "neon_store1_2reg<q>")]
5176 )
5177
5178 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5179 [(match_operand:DI 0 "register_operand" "")
5180 (match_operand:CI 1 "register_operand" "")
5181 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5182 "TARGET_SIMD"
5183 {
5184 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5185 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5186 DONE;
5187 })
5188
5189 (define_insn "aarch64_st1_x3_<mode>"
5190 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5191 (unspec:CI
5192 [(match_operand:CI 1 "register_operand" "w")
5193 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5194 "TARGET_SIMD"
5195 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5196 [(set_attr "type" "neon_store1_3reg<q>")]
5197 )
5198
5199 (define_insn "*aarch64_mov<mode>"
5200 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5201 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5202 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5203 && (register_operand (operands[0], <MODE>mode)
5204 || register_operand (operands[1], <MODE>mode))"
5205 "@
5206 #
5207 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5208 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5209 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5210 neon_load<nregs>_<nregs>reg_q")
5211 (set_attr "length" "<insn_count>,4,4")]
5212 )
5213
5214 (define_insn "aarch64_be_ld1<mode>"
5215 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5216 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5217 "aarch64_simd_struct_operand" "Utv")]
5218 UNSPEC_LD1))]
5219 "TARGET_SIMD"
5220 "ld1\\t{%0<Vmtype>}, %1"
5221 [(set_attr "type" "neon_load1_1reg<q>")]
5222 )
5223
5224 (define_insn "aarch64_be_st1<mode>"
5225 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5226 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5227 UNSPEC_ST1))]
5228 "TARGET_SIMD"
5229 "st1\\t{%1<Vmtype>}, %0"
5230 [(set_attr "type" "neon_store1_1reg<q>")]
5231 )
5232
5233 (define_insn "*aarch64_be_movoi"
5234 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5235 (match_operand:OI 1 "general_operand" " w,w,m"))]
5236 "TARGET_SIMD && BYTES_BIG_ENDIAN
5237 && (register_operand (operands[0], OImode)
5238 || register_operand (operands[1], OImode))"
5239 "@
5240 #
5241 stp\\t%q1, %R1, %0
5242 ldp\\t%q0, %R0, %1"
5243 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5244 (set_attr "length" "8,4,4")]
5245 )
5246
5247 (define_insn "*aarch64_be_movci"
5248 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5249 (match_operand:CI 1 "general_operand" " w,w,o"))]
5250 "TARGET_SIMD && BYTES_BIG_ENDIAN
5251 && (register_operand (operands[0], CImode)
5252 || register_operand (operands[1], CImode))"
5253 "#"
5254 [(set_attr "type" "multiple")
5255 (set_attr "length" "12,4,4")]
5256 )
5257
5258 (define_insn "*aarch64_be_movxi"
5259 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5260 (match_operand:XI 1 "general_operand" " w,w,o"))]
5261 "TARGET_SIMD && BYTES_BIG_ENDIAN
5262 && (register_operand (operands[0], XImode)
5263 || register_operand (operands[1], XImode))"
5264 "#"
5265 [(set_attr "type" "multiple")
5266 (set_attr "length" "16,4,4")]
5267 )
5268
5269 (define_split
5270 [(set (match_operand:OI 0 "register_operand")
5271 (match_operand:OI 1 "register_operand"))]
5272 "TARGET_SIMD && reload_completed"
5273 [(const_int 0)]
5274 {
5275 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5276 DONE;
5277 })
5278
5279 (define_split
5280 [(set (match_operand:CI 0 "nonimmediate_operand")
5281 (match_operand:CI 1 "general_operand"))]
5282 "TARGET_SIMD && reload_completed"
5283 [(const_int 0)]
5284 {
5285 if (register_operand (operands[0], CImode)
5286 && register_operand (operands[1], CImode))
5287 {
5288 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5289 DONE;
5290 }
5291 else if (BYTES_BIG_ENDIAN)
5292 {
5293 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5294 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5295 emit_move_insn (gen_lowpart (V16QImode,
5296 simplify_gen_subreg (TImode, operands[0],
5297 CImode, 32)),
5298 gen_lowpart (V16QImode,
5299 simplify_gen_subreg (TImode, operands[1],
5300 CImode, 32)));
5301 DONE;
5302 }
5303 else
5304 FAIL;
5305 })
5306
5307 (define_split
5308 [(set (match_operand:XI 0 "nonimmediate_operand")
5309 (match_operand:XI 1 "general_operand"))]
5310 "TARGET_SIMD && reload_completed"
5311 [(const_int 0)]
5312 {
5313 if (register_operand (operands[0], XImode)
5314 && register_operand (operands[1], XImode))
5315 {
5316 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5317 DONE;
5318 }
5319 else if (BYTES_BIG_ENDIAN)
5320 {
5321 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5322 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5323 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5324 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5325 DONE;
5326 }
5327 else
5328 FAIL;
5329 })
5330
5331 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5332 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5333 (match_operand:DI 1 "register_operand" "w")
5334 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5335 "TARGET_SIMD"
5336 {
5337 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5338 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5339 * <VSTRUCT:nregs>);
5340
5341 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5342 mem));
5343 DONE;
5344 })
5345
5346 (define_insn "aarch64_ld2<mode>_dreg"
5347 [(set (match_operand:OI 0 "register_operand" "=w")
5348 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5349 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5350 UNSPEC_LD2_DREG))]
5351 "TARGET_SIMD"
5352 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5353 [(set_attr "type" "neon_load2_2reg<q>")]
5354 )
5355
5356 (define_insn "aarch64_ld2<mode>_dreg"
5357 [(set (match_operand:OI 0 "register_operand" "=w")
5358 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5359 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5360 UNSPEC_LD2_DREG))]
5361 "TARGET_SIMD"
5362 "ld1\\t{%S0.1d - %T0.1d}, %1"
5363 [(set_attr "type" "neon_load1_2reg<q>")]
5364 )
5365
5366 (define_insn "aarch64_ld3<mode>_dreg"
5367 [(set (match_operand:CI 0 "register_operand" "=w")
5368 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5369 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5370 UNSPEC_LD3_DREG))]
5371 "TARGET_SIMD"
5372 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5373 [(set_attr "type" "neon_load3_3reg<q>")]
5374 )
5375
5376 (define_insn "aarch64_ld3<mode>_dreg"
5377 [(set (match_operand:CI 0 "register_operand" "=w")
5378 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5379 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5380 UNSPEC_LD3_DREG))]
5381 "TARGET_SIMD"
5382 "ld1\\t{%S0.1d - %U0.1d}, %1"
5383 [(set_attr "type" "neon_load1_3reg<q>")]
5384 )
5385
5386 (define_insn "aarch64_ld4<mode>_dreg"
5387 [(set (match_operand:XI 0 "register_operand" "=w")
5388 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5389 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5390 UNSPEC_LD4_DREG))]
5391 "TARGET_SIMD"
5392 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5393 [(set_attr "type" "neon_load4_4reg<q>")]
5394 )
5395
5396 (define_insn "aarch64_ld4<mode>_dreg"
5397 [(set (match_operand:XI 0 "register_operand" "=w")
5398 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5399 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5400 UNSPEC_LD4_DREG))]
5401 "TARGET_SIMD"
5402 "ld1\\t{%S0.1d - %V0.1d}, %1"
5403 [(set_attr "type" "neon_load1_4reg<q>")]
5404 )
5405
5406 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5407 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5408 (match_operand:DI 1 "register_operand" "r")
5409 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5410 "TARGET_SIMD"
5411 {
5412 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5413 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5414
5415 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5416 DONE;
5417 })
5418
5419 (define_expand "aarch64_ld1<VALL_F16:mode>"
5420 [(match_operand:VALL_F16 0 "register_operand")
5421 (match_operand:DI 1 "register_operand")]
5422 "TARGET_SIMD"
5423 {
5424 machine_mode mode = <VALL_F16:MODE>mode;
5425 rtx mem = gen_rtx_MEM (mode, operands[1]);
5426
5427 if (BYTES_BIG_ENDIAN)
5428 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5429 else
5430 emit_move_insn (operands[0], mem);
5431 DONE;
5432 })
5433
5434 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5435 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5436 (match_operand:DI 1 "register_operand" "r")
5437 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5438 "TARGET_SIMD"
5439 {
5440 machine_mode mode = <VSTRUCT:MODE>mode;
5441 rtx mem = gen_rtx_MEM (mode, operands[1]);
5442
5443 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5444 DONE;
5445 })
5446
5447 (define_expand "aarch64_ld1x2<VQ:mode>"
5448 [(match_operand:OI 0 "register_operand" "=w")
5449 (match_operand:DI 1 "register_operand" "r")
5450 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5451 "TARGET_SIMD"
5452 {
5453 machine_mode mode = OImode;
5454 rtx mem = gen_rtx_MEM (mode, operands[1]);
5455
5456 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5457 DONE;
5458 })
5459
5460 (define_expand "aarch64_ld1x2<VDC:mode>"
5461 [(match_operand:OI 0 "register_operand" "=w")
5462 (match_operand:DI 1 "register_operand" "r")
5463 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5464 "TARGET_SIMD"
5465 {
5466 machine_mode mode = OImode;
5467 rtx mem = gen_rtx_MEM (mode, operands[1]);
5468
5469 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5470 DONE;
5471 })
5472
5473
5474 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5475 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5476 (match_operand:DI 1 "register_operand" "w")
5477 (match_operand:VSTRUCT 2 "register_operand" "0")
5478 (match_operand:SI 3 "immediate_operand" "i")
5479 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5480 "TARGET_SIMD"
5481 {
5482 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5483 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5484 * <VSTRUCT:nregs>);
5485
5486 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5487 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5488 operands[0], mem, operands[2], operands[3]));
5489 DONE;
5490 })
5491
5492 ;; Expanders for builtins to extract vector registers from large
5493 ;; opaque integer modes.
5494
5495 ;; D-register list.
5496
5497 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5498 [(match_operand:VDC 0 "register_operand" "=w")
5499 (match_operand:VSTRUCT 1 "register_operand" "w")
5500 (match_operand:SI 2 "immediate_operand" "i")]
5501 "TARGET_SIMD"
5502 {
5503 int part = INTVAL (operands[2]);
5504 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5505 int offset = part * 16;
5506
5507 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5508 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5509 DONE;
5510 })
5511
5512 ;; Q-register list.
5513
5514 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5515 [(match_operand:VQ 0 "register_operand" "=w")
5516 (match_operand:VSTRUCT 1 "register_operand" "w")
5517 (match_operand:SI 2 "immediate_operand" "i")]
5518 "TARGET_SIMD"
5519 {
5520 int part = INTVAL (operands[2]);
5521 int offset = part * 16;
5522
5523 emit_move_insn (operands[0],
5524 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5525 DONE;
5526 })
5527
5528 ;; Permuted-store expanders for neon intrinsics.
5529
5530 ;; Permute instructions
5531
5532 ;; vec_perm support
5533
5534 (define_expand "vec_perm<mode>"
5535 [(match_operand:VB 0 "register_operand")
5536 (match_operand:VB 1 "register_operand")
5537 (match_operand:VB 2 "register_operand")
5538 (match_operand:VB 3 "register_operand")]
5539 "TARGET_SIMD"
5540 {
5541 aarch64_expand_vec_perm (operands[0], operands[1],
5542 operands[2], operands[3], <nunits>);
5543 DONE;
5544 })
5545
5546 (define_insn "aarch64_tbl1<mode>"
5547 [(set (match_operand:VB 0 "register_operand" "=w")
5548 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5549 (match_operand:VB 2 "register_operand" "w")]
5550 UNSPEC_TBL))]
5551 "TARGET_SIMD"
5552 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5553 [(set_attr "type" "neon_tbl1<q>")]
5554 )
5555
5556 ;; Two source registers.
5557
5558 (define_insn "aarch64_tbl2v16qi"
5559 [(set (match_operand:V16QI 0 "register_operand" "=w")
5560 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5561 (match_operand:V16QI 2 "register_operand" "w")]
5562 UNSPEC_TBL))]
5563 "TARGET_SIMD"
5564 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5565 [(set_attr "type" "neon_tbl2_q")]
5566 )
5567
5568 (define_insn "aarch64_tbl3<mode>"
5569 [(set (match_operand:VB 0 "register_operand" "=w")
5570 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5571 (match_operand:VB 2 "register_operand" "w")]
5572 UNSPEC_TBL))]
5573 "TARGET_SIMD"
5574 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5575 [(set_attr "type" "neon_tbl3")]
5576 )
5577
5578 (define_insn "aarch64_tbx4<mode>"
5579 [(set (match_operand:VB 0 "register_operand" "=w")
5580 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5581 (match_operand:OI 2 "register_operand" "w")
5582 (match_operand:VB 3 "register_operand" "w")]
5583 UNSPEC_TBX))]
5584 "TARGET_SIMD"
5585 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5586 [(set_attr "type" "neon_tbl4")]
5587 )
5588
5589 ;; Three source registers.
5590
5591 (define_insn "aarch64_qtbl3<mode>"
5592 [(set (match_operand:VB 0 "register_operand" "=w")
5593 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5594 (match_operand:VB 2 "register_operand" "w")]
5595 UNSPEC_TBL))]
5596 "TARGET_SIMD"
5597 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5598 [(set_attr "type" "neon_tbl3")]
5599 )
5600
5601 (define_insn "aarch64_qtbx3<mode>"
5602 [(set (match_operand:VB 0 "register_operand" "=w")
5603 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5604 (match_operand:CI 2 "register_operand" "w")
5605 (match_operand:VB 3 "register_operand" "w")]
5606 UNSPEC_TBX))]
5607 "TARGET_SIMD"
5608 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5609 [(set_attr "type" "neon_tbl3")]
5610 )
5611
5612 ;; Four source registers.
5613
5614 (define_insn "aarch64_qtbl4<mode>"
5615 [(set (match_operand:VB 0 "register_operand" "=w")
5616 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5617 (match_operand:VB 2 "register_operand" "w")]
5618 UNSPEC_TBL))]
5619 "TARGET_SIMD"
5620 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5621 [(set_attr "type" "neon_tbl4")]
5622 )
5623
5624 (define_insn "aarch64_qtbx4<mode>"
5625 [(set (match_operand:VB 0 "register_operand" "=w")
5626 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5627 (match_operand:XI 2 "register_operand" "w")
5628 (match_operand:VB 3 "register_operand" "w")]
5629 UNSPEC_TBX))]
5630 "TARGET_SIMD"
5631 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5632 [(set_attr "type" "neon_tbl4")]
5633 )
5634
5635 (define_insn_and_split "aarch64_combinev16qi"
5636 [(set (match_operand:OI 0 "register_operand" "=w")
5637 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5638 (match_operand:V16QI 2 "register_operand" "w")]
5639 UNSPEC_CONCAT))]
5640 "TARGET_SIMD"
5641 "#"
5642 "&& reload_completed"
5643 [(const_int 0)]
5644 {
5645 aarch64_split_combinev16qi (operands);
5646 DONE;
5647 }
5648 [(set_attr "type" "multiple")]
5649 )
5650
5651 ;; This instruction's pattern is generated directly by
5652 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5653 ;; need corresponding changes there.
5654 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5655 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5656 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5657 (match_operand:VALL_F16 2 "register_operand" "w")]
5658 PERMUTE))]
5659 "TARGET_SIMD"
5660 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5661 [(set_attr "type" "neon_permute<q>")]
5662 )
5663
5664 ;; This instruction's pattern is generated directly by
5665 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5666 ;; need corresponding changes there. Note that the immediate (third)
5667 ;; operand is a lane index not a byte index.
5668 (define_insn "aarch64_ext<mode>"
5669 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5670 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5671 (match_operand:VALL_F16 2 "register_operand" "w")
5672 (match_operand:SI 3 "immediate_operand" "i")]
5673 UNSPEC_EXT))]
5674 "TARGET_SIMD"
5675 {
5676 operands[3] = GEN_INT (INTVAL (operands[3])
5677 * GET_MODE_UNIT_SIZE (<MODE>mode));
5678 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5679 }
5680 [(set_attr "type" "neon_ext<q>")]
5681 )
5682
5683 ;; This instruction's pattern is generated directly by
5684 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5685 ;; need corresponding changes there.
5686 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5687 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5688 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5689 REVERSE))]
5690 "TARGET_SIMD"
5691 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5692 [(set_attr "type" "neon_rev<q>")]
5693 )
5694
5695 (define_insn "aarch64_st2<mode>_dreg"
5696 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5697 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5698 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5699 UNSPEC_ST2))]
5700 "TARGET_SIMD"
5701 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5702 [(set_attr "type" "neon_store2_2reg")]
5703 )
5704
5705 (define_insn "aarch64_st2<mode>_dreg"
5706 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5707 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5708 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5709 UNSPEC_ST2))]
5710 "TARGET_SIMD"
5711 "st1\\t{%S1.1d - %T1.1d}, %0"
5712 [(set_attr "type" "neon_store1_2reg")]
5713 )
5714
5715 (define_insn "aarch64_st3<mode>_dreg"
5716 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5717 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5718 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5719 UNSPEC_ST3))]
5720 "TARGET_SIMD"
5721 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5722 [(set_attr "type" "neon_store3_3reg")]
5723 )
5724
5725 (define_insn "aarch64_st3<mode>_dreg"
5726 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5727 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5728 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5729 UNSPEC_ST3))]
5730 "TARGET_SIMD"
5731 "st1\\t{%S1.1d - %U1.1d}, %0"
5732 [(set_attr "type" "neon_store1_3reg")]
5733 )
5734
5735 (define_insn "aarch64_st4<mode>_dreg"
5736 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5737 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5738 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5739 UNSPEC_ST4))]
5740 "TARGET_SIMD"
5741 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5742 [(set_attr "type" "neon_store4_4reg")]
5743 )
5744
5745 (define_insn "aarch64_st4<mode>_dreg"
5746 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5747 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5748 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5749 UNSPEC_ST4))]
5750 "TARGET_SIMD"
5751 "st1\\t{%S1.1d - %V1.1d}, %0"
5752 [(set_attr "type" "neon_store1_4reg")]
5753 )
5754
5755 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5756 [(match_operand:DI 0 "register_operand" "r")
5757 (match_operand:VSTRUCT 1 "register_operand" "w")
5758 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759 "TARGET_SIMD"
5760 {
5761 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5762 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5763
5764 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5765 DONE;
5766 })
5767
5768 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5769 [(match_operand:DI 0 "register_operand" "r")
5770 (match_operand:VSTRUCT 1 "register_operand" "w")
5771 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5772 "TARGET_SIMD"
5773 {
5774 machine_mode mode = <VSTRUCT:MODE>mode;
5775 rtx mem = gen_rtx_MEM (mode, operands[0]);
5776
5777 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5778 DONE;
5779 })
5780
5781 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5782 [(match_operand:DI 0 "register_operand" "r")
5783 (match_operand:VSTRUCT 1 "register_operand" "w")
5784 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5785 (match_operand:SI 2 "immediate_operand")]
5786 "TARGET_SIMD"
5787 {
5788 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5789 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5790 * <VSTRUCT:nregs>);
5791
5792 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5793 mem, operands[1], operands[2]));
5794 DONE;
5795 })
5796
5797 (define_expand "aarch64_st1<VALL_F16:mode>"
5798 [(match_operand:DI 0 "register_operand")
5799 (match_operand:VALL_F16 1 "register_operand")]
5800 "TARGET_SIMD"
5801 {
5802 machine_mode mode = <VALL_F16:MODE>mode;
5803 rtx mem = gen_rtx_MEM (mode, operands[0]);
5804
5805 if (BYTES_BIG_ENDIAN)
5806 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5807 else
5808 emit_move_insn (mem, operands[1]);
5809 DONE;
5810 })
5811
5812 ;; Expander for builtins to insert vector registers into large
5813 ;; opaque integer modes.
5814
5815 ;; Q-register list. We don't need a D-reg inserter as we zero
5816 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5817
5818 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5819 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5820 (match_operand:VSTRUCT 1 "register_operand" "0")
5821 (match_operand:VQ 2 "register_operand" "w")
5822 (match_operand:SI 3 "immediate_operand" "i")]
5823 "TARGET_SIMD"
5824 {
5825 int part = INTVAL (operands[3]);
5826 int offset = part * 16;
5827
5828 emit_move_insn (operands[0], operands[1]);
5829 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5830 operands[2]);
5831 DONE;
5832 })
5833
5834 ;; Standard pattern name vec_init<mode><Vel>.
5835
5836 (define_expand "vec_init<mode><Vel>"
5837 [(match_operand:VALL_F16 0 "register_operand" "")
5838 (match_operand 1 "" "")]
5839 "TARGET_SIMD"
5840 {
5841 aarch64_expand_vector_init (operands[0], operands[1]);
5842 DONE;
5843 })
5844
5845 (define_insn "*aarch64_simd_ld1r<mode>"
5846 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5847 (vec_duplicate:VALL_F16
5848 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5849 "TARGET_SIMD"
5850 "ld1r\\t{%0.<Vtype>}, %1"
5851 [(set_attr "type" "neon_load1_all_lanes")]
5852 )
5853
5854 (define_insn "aarch64_simd_ld1<mode>_x2"
5855 [(set (match_operand:OI 0 "register_operand" "=w")
5856 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5857 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5858 UNSPEC_LD1))]
5859 "TARGET_SIMD"
5860 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5861 [(set_attr "type" "neon_load1_2reg<q>")]
5862 )
5863
5864 (define_insn "aarch64_simd_ld1<mode>_x2"
5865 [(set (match_operand:OI 0 "register_operand" "=w")
5866 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5867 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5868 UNSPEC_LD1))]
5869 "TARGET_SIMD"
5870 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5871 [(set_attr "type" "neon_load1_2reg<q>")]
5872 )
5873
5874
5875 (define_insn "@aarch64_frecpe<mode>"
5876 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5877 (unspec:VHSDF_HSDF
5878 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
5879 UNSPEC_FRECPE))]
5880 "TARGET_SIMD"
5881 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5882 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5883 )
5884
5885 (define_insn "aarch64_frecpx<mode>"
5886 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5887 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5888 UNSPEC_FRECPX))]
5889 "TARGET_SIMD"
5890 "frecpx\t%<s>0, %<s>1"
5891 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
5892 )
5893
5894 (define_insn "@aarch64_frecps<mode>"
5895 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5896 (unspec:VHSDF_HSDF
5897 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5898 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5899 UNSPEC_FRECPS))]
5900 "TARGET_SIMD"
5901 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5902 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5903 )
5904
5905 (define_insn "aarch64_urecpe<mode>"
5906 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5907 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5908 UNSPEC_URECPE))]
5909 "TARGET_SIMD"
5910 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5911 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5912
5913 ;; Standard pattern name vec_extract<mode><Vel>.
5914
5915 (define_expand "vec_extract<mode><Vel>"
5916 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5917 (match_operand:VALL_F16 1 "register_operand" "")
5918 (match_operand:SI 2 "immediate_operand" "")]
5919 "TARGET_SIMD"
5920 {
5921 emit_insn
5922 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5923 DONE;
5924 })
5925
5926 ;; aes
5927
5928 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5929 [(set (match_operand:V16QI 0 "register_operand" "=w")
5930 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5931 (match_operand:V16QI 2 "register_operand" "w")]
5932 CRYPTO_AES))]
5933 "TARGET_SIMD && TARGET_AES"
5934 "aes<aes_op>\\t%0.16b, %2.16b"
5935 [(set_attr "type" "crypto_aese")]
5936 )
5937
5938 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5939 [(set (match_operand:V16QI 0 "register_operand" "=w")
5940 (unspec:V16QI [(xor:V16QI
5941 (match_operand:V16QI 1 "register_operand" "%0")
5942 (match_operand:V16QI 2 "register_operand" "w"))
5943 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5944 CRYPTO_AES))]
5945 "TARGET_SIMD && TARGET_AES"
5946 "aes<aes_op>\\t%0.16b, %2.16b"
5947 [(set_attr "type" "crypto_aese")]
5948 )
5949
5950 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5951 [(set (match_operand:V16QI 0 "register_operand" "=w")
5952 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5953 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5954 (match_operand:V16QI 2 "register_operand" "w"))]
5955 CRYPTO_AES))]
5956 "TARGET_SIMD && TARGET_AES"
5957 "aes<aes_op>\\t%0.16b, %2.16b"
5958 [(set_attr "type" "crypto_aese")]
5959 )
5960
5961 ;; When AES/AESMC fusion is enabled we want the register allocation to
5962 ;; look like:
5963 ;; AESE Vn, _
5964 ;; AESMC Vn, Vn
5965 ;; So prefer to tie operand 1 to operand 0 when fusing.
5966
5967 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5968 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5969 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5970 CRYPTO_AESMC))]
5971 "TARGET_SIMD && TARGET_AES"
5972 "aes<aesmc_op>\\t%0.16b, %1.16b"
5973 [(set_attr "type" "crypto_aesmc")
5974 (set_attr_alternative "enabled"
5975 [(if_then_else (match_test
5976 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5977 (const_string "yes" )
5978 (const_string "no"))
5979 (const_string "yes")])]
5980 )
5981
5982 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5983 ;; and enforce the register dependency without scheduling or register
5984 ;; allocation messing up the order or introducing moves inbetween.
5985 ;; Mash the two together during combine.
5986
5987 (define_insn "*aarch64_crypto_aese_fused"
5988 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5989 (unspec:V16QI
5990 [(unspec:V16QI
5991 [(match_operand:V16QI 1 "register_operand" "0")
5992 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5993 ] UNSPEC_AESMC))]
5994 "TARGET_SIMD && TARGET_AES
5995 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5996 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5997 [(set_attr "type" "crypto_aese")
5998 (set_attr "length" "8")]
5999 )
6000
6001 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6002 ;; and enforce the register dependency without scheduling or register
6003 ;; allocation messing up the order or introducing moves inbetween.
6004 ;; Mash the two together during combine.
6005
6006 (define_insn "*aarch64_crypto_aesd_fused"
6007 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6008 (unspec:V16QI
6009 [(unspec:V16QI
6010 [(match_operand:V16QI 1 "register_operand" "0")
6011 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6012 ] UNSPEC_AESIMC))]
6013 "TARGET_SIMD && TARGET_AES
6014 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6015 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6016 [(set_attr "type" "crypto_aese")
6017 (set_attr "length" "8")]
6018 )
6019
6020 ;; sha1
6021
6022 (define_insn "aarch64_crypto_sha1hsi"
6023 [(set (match_operand:SI 0 "register_operand" "=w")
6024 (unspec:SI [(match_operand:SI 1
6025 "register_operand" "w")]
6026 UNSPEC_SHA1H))]
6027 "TARGET_SIMD && TARGET_SHA2"
6028 "sha1h\\t%s0, %s1"
6029 [(set_attr "type" "crypto_sha1_fast")]
6030 )
6031
6032 (define_insn "aarch64_crypto_sha1hv4si"
6033 [(set (match_operand:SI 0 "register_operand" "=w")
6034 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6035 (parallel [(const_int 0)]))]
6036 UNSPEC_SHA1H))]
6037 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6038 "sha1h\\t%s0, %s1"
6039 [(set_attr "type" "crypto_sha1_fast")]
6040 )
6041
6042 (define_insn "aarch64_be_crypto_sha1hv4si"
6043 [(set (match_operand:SI 0 "register_operand" "=w")
6044 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6045 (parallel [(const_int 3)]))]
6046 UNSPEC_SHA1H))]
6047 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6048 "sha1h\\t%s0, %s1"
6049 [(set_attr "type" "crypto_sha1_fast")]
6050 )
6051
6052 (define_insn "aarch64_crypto_sha1su1v4si"
6053 [(set (match_operand:V4SI 0 "register_operand" "=w")
6054 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6055 (match_operand:V4SI 2 "register_operand" "w")]
6056 UNSPEC_SHA1SU1))]
6057 "TARGET_SIMD && TARGET_SHA2"
6058 "sha1su1\\t%0.4s, %2.4s"
6059 [(set_attr "type" "crypto_sha1_fast")]
6060 )
6061
6062 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6063 [(set (match_operand:V4SI 0 "register_operand" "=w")
6064 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6065 (match_operand:SI 2 "register_operand" "w")
6066 (match_operand:V4SI 3 "register_operand" "w")]
6067 CRYPTO_SHA1))]
6068 "TARGET_SIMD && TARGET_SHA2"
6069 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6070 [(set_attr "type" "crypto_sha1_slow")]
6071 )
6072
6073 (define_insn "aarch64_crypto_sha1su0v4si"
6074 [(set (match_operand:V4SI 0 "register_operand" "=w")
6075 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6076 (match_operand:V4SI 2 "register_operand" "w")
6077 (match_operand:V4SI 3 "register_operand" "w")]
6078 UNSPEC_SHA1SU0))]
6079 "TARGET_SIMD && TARGET_SHA2"
6080 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6081 [(set_attr "type" "crypto_sha1_xor")]
6082 )
6083
6084 ;; sha256
6085
6086 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6087 [(set (match_operand:V4SI 0 "register_operand" "=w")
6088 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6089 (match_operand:V4SI 2 "register_operand" "w")
6090 (match_operand:V4SI 3 "register_operand" "w")]
6091 CRYPTO_SHA256))]
6092 "TARGET_SIMD && TARGET_SHA2"
6093 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6094 [(set_attr "type" "crypto_sha256_slow")]
6095 )
6096
6097 (define_insn "aarch64_crypto_sha256su0v4si"
6098 [(set (match_operand:V4SI 0 "register_operand" "=w")
6099 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6100 (match_operand:V4SI 2 "register_operand" "w")]
6101 UNSPEC_SHA256SU0))]
6102 "TARGET_SIMD && TARGET_SHA2"
6103 "sha256su0\\t%0.4s, %2.4s"
6104 [(set_attr "type" "crypto_sha256_fast")]
6105 )
6106
6107 (define_insn "aarch64_crypto_sha256su1v4si"
6108 [(set (match_operand:V4SI 0 "register_operand" "=w")
6109 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6110 (match_operand:V4SI 2 "register_operand" "w")
6111 (match_operand:V4SI 3 "register_operand" "w")]
6112 UNSPEC_SHA256SU1))]
6113 "TARGET_SIMD && TARGET_SHA2"
6114 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6115 [(set_attr "type" "crypto_sha256_slow")]
6116 )
6117
6118 ;; sha512
6119
6120 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6121 [(set (match_operand:V2DI 0 "register_operand" "=w")
6122 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6123 (match_operand:V2DI 2 "register_operand" "w")
6124 (match_operand:V2DI 3 "register_operand" "w")]
6125 CRYPTO_SHA512))]
6126 "TARGET_SIMD && TARGET_SHA3"
6127 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6128 [(set_attr "type" "crypto_sha512")]
6129 )
6130
6131 (define_insn "aarch64_crypto_sha512su0qv2di"
6132 [(set (match_operand:V2DI 0 "register_operand" "=w")
6133 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6134 (match_operand:V2DI 2 "register_operand" "w")]
6135 UNSPEC_SHA512SU0))]
6136 "TARGET_SIMD && TARGET_SHA3"
6137 "sha512su0\\t%0.2d, %2.2d"
6138 [(set_attr "type" "crypto_sha512")]
6139 )
6140
6141 (define_insn "aarch64_crypto_sha512su1qv2di"
6142 [(set (match_operand:V2DI 0 "register_operand" "=w")
6143 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6144 (match_operand:V2DI 2 "register_operand" "w")
6145 (match_operand:V2DI 3 "register_operand" "w")]
6146 UNSPEC_SHA512SU1))]
6147 "TARGET_SIMD && TARGET_SHA3"
6148 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6149 [(set_attr "type" "crypto_sha512")]
6150 )
6151
6152 ;; sha3
6153
6154 (define_insn "eor3q<mode>4"
6155 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6156 (xor:VQ_I
6157 (xor:VQ_I
6158 (match_operand:VQ_I 2 "register_operand" "w")
6159 (match_operand:VQ_I 3 "register_operand" "w"))
6160 (match_operand:VQ_I 1 "register_operand" "w")))]
6161 "TARGET_SIMD && TARGET_SHA3"
6162 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6163 [(set_attr "type" "crypto_sha3")]
6164 )
6165
6166 (define_insn "aarch64_rax1qv2di"
6167 [(set (match_operand:V2DI 0 "register_operand" "=w")
6168 (xor:V2DI
6169 (rotate:V2DI
6170 (match_operand:V2DI 2 "register_operand" "w")
6171 (const_int 1))
6172 (match_operand:V2DI 1 "register_operand" "w")))]
6173 "TARGET_SIMD && TARGET_SHA3"
6174 "rax1\\t%0.2d, %1.2d, %2.2d"
6175 [(set_attr "type" "crypto_sha3")]
6176 )
6177
6178 (define_insn "aarch64_xarqv2di"
6179 [(set (match_operand:V2DI 0 "register_operand" "=w")
6180 (rotatert:V2DI
6181 (xor:V2DI
6182 (match_operand:V2DI 1 "register_operand" "%w")
6183 (match_operand:V2DI 2 "register_operand" "w"))
6184 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6185 "TARGET_SIMD && TARGET_SHA3"
6186 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6187 [(set_attr "type" "crypto_sha3")]
6188 )
6189
6190 (define_insn "bcaxq<mode>4"
6191 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6192 (xor:VQ_I
6193 (and:VQ_I
6194 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6195 (match_operand:VQ_I 2 "register_operand" "w"))
6196 (match_operand:VQ_I 1 "register_operand" "w")))]
6197 "TARGET_SIMD && TARGET_SHA3"
6198 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6199 [(set_attr "type" "crypto_sha3")]
6200 )
6201
6202 ;; SM3
6203
6204 (define_insn "aarch64_sm3ss1qv4si"
6205 [(set (match_operand:V4SI 0 "register_operand" "=w")
6206 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6207 (match_operand:V4SI 2 "register_operand" "w")
6208 (match_operand:V4SI 3 "register_operand" "w")]
6209 UNSPEC_SM3SS1))]
6210 "TARGET_SIMD && TARGET_SM4"
6211 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6212 [(set_attr "type" "crypto_sm3")]
6213 )
6214
6215
6216 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6217 [(set (match_operand:V4SI 0 "register_operand" "=w")
6218 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6219 (match_operand:V4SI 2 "register_operand" "w")
6220 (match_operand:V4SI 3 "register_operand" "w")
6221 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6222 CRYPTO_SM3TT))]
6223 "TARGET_SIMD && TARGET_SM4"
6224 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6225 [(set_attr "type" "crypto_sm3")]
6226 )
6227
6228 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6229 [(set (match_operand:V4SI 0 "register_operand" "=w")
6230 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6231 (match_operand:V4SI 2 "register_operand" "w")
6232 (match_operand:V4SI 3 "register_operand" "w")]
6233 CRYPTO_SM3PART))]
6234 "TARGET_SIMD && TARGET_SM4"
6235 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6236 [(set_attr "type" "crypto_sm3")]
6237 )
6238
6239 ;; SM4
6240
6241 (define_insn "aarch64_sm4eqv4si"
6242 [(set (match_operand:V4SI 0 "register_operand" "=w")
6243 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6244 (match_operand:V4SI 2 "register_operand" "w")]
6245 UNSPEC_SM4E))]
6246 "TARGET_SIMD && TARGET_SM4"
6247 "sm4e\\t%0.4s, %2.4s"
6248 [(set_attr "type" "crypto_sm4")]
6249 )
6250
6251 (define_insn "aarch64_sm4ekeyqv4si"
6252 [(set (match_operand:V4SI 0 "register_operand" "=w")
6253 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6254 (match_operand:V4SI 2 "register_operand" "w")]
6255 UNSPEC_SM4EKEY))]
6256 "TARGET_SIMD && TARGET_SM4"
6257 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6258 [(set_attr "type" "crypto_sm4")]
6259 )
6260
6261 ;; fp16fml
6262
6263 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6264 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6265 (unspec:VDQSF
6266 [(match_operand:VDQSF 1 "register_operand" "0")
6267 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6268 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6269 VFMLA16_LOW))]
6270 "TARGET_F16FML"
6271 {
6272 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6273 <nunits> * 2, false);
6274 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6275 <nunits> * 2, false);
6276
6277 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6278 operands[1],
6279 operands[2],
6280 operands[3],
6281 p1, p2));
6282 DONE;
6283
6284 })
6285
6286 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6287 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6288 (unspec:VDQSF
6289 [(match_operand:VDQSF 1 "register_operand" "0")
6290 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6291 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6292 VFMLA16_HIGH))]
6293 "TARGET_F16FML"
6294 {
6295 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6296 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6297
6298 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6299 operands[1],
6300 operands[2],
6301 operands[3],
6302 p1, p2));
6303 DONE;
6304 })
6305
6306 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6307 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6308 (fma:VDQSF
6309 (float_extend:VDQSF
6310 (vec_select:<VFMLA_SEL_W>
6311 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6312 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6313 (float_extend:VDQSF
6314 (vec_select:<VFMLA_SEL_W>
6315 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6316 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6317 (match_operand:VDQSF 1 "register_operand" "0")))]
6318 "TARGET_F16FML"
6319 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6320 [(set_attr "type" "neon_fp_mul_s")]
6321 )
6322
6323 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6324 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6325 (fma:VDQSF
6326 (float_extend:VDQSF
6327 (neg:<VFMLA_SEL_W>
6328 (vec_select:<VFMLA_SEL_W>
6329 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6330 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6331 (float_extend:VDQSF
6332 (vec_select:<VFMLA_SEL_W>
6333 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6334 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6335 (match_operand:VDQSF 1 "register_operand" "0")))]
6336 "TARGET_F16FML"
6337 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6338 [(set_attr "type" "neon_fp_mul_s")]
6339 )
6340
6341 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6342 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6343 (fma:VDQSF
6344 (float_extend:VDQSF
6345 (vec_select:<VFMLA_SEL_W>
6346 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6347 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6348 (float_extend:VDQSF
6349 (vec_select:<VFMLA_SEL_W>
6350 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6351 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6352 (match_operand:VDQSF 1 "register_operand" "0")))]
6353 "TARGET_F16FML"
6354 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6355 [(set_attr "type" "neon_fp_mul_s")]
6356 )
6357
6358 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6359 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6360 (fma:VDQSF
6361 (float_extend:VDQSF
6362 (neg:<VFMLA_SEL_W>
6363 (vec_select:<VFMLA_SEL_W>
6364 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6365 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6366 (float_extend:VDQSF
6367 (vec_select:<VFMLA_SEL_W>
6368 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6369 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6370 (match_operand:VDQSF 1 "register_operand" "0")))]
6371 "TARGET_F16FML"
6372 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6373 [(set_attr "type" "neon_fp_mul_s")]
6374 )
6375
6376 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6377 [(set (match_operand:V2SF 0 "register_operand" "")
6378 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6379 (match_operand:V4HF 2 "register_operand" "")
6380 (match_operand:V4HF 3 "register_operand" "")
6381 (match_operand:SI 4 "aarch64_imm2" "")]
6382 VFMLA16_LOW))]
6383 "TARGET_F16FML"
6384 {
6385 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6386 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6387
6388 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6389 operands[1],
6390 operands[2],
6391 operands[3],
6392 p1, lane));
6393 DONE;
6394 }
6395 )
6396
6397 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6398 [(set (match_operand:V2SF 0 "register_operand" "")
6399 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6400 (match_operand:V4HF 2 "register_operand" "")
6401 (match_operand:V4HF 3 "register_operand" "")
6402 (match_operand:SI 4 "aarch64_imm2" "")]
6403 VFMLA16_HIGH))]
6404 "TARGET_F16FML"
6405 {
6406 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6407 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6408
6409 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6410 operands[1],
6411 operands[2],
6412 operands[3],
6413 p1, lane));
6414 DONE;
6415 })
6416
6417 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6418 [(set (match_operand:V2SF 0 "register_operand" "=w")
6419 (fma:V2SF
6420 (float_extend:V2SF
6421 (vec_select:V2HF
6422 (match_operand:V4HF 2 "register_operand" "w")
6423 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6424 (float_extend:V2SF
6425 (vec_duplicate:V2HF
6426 (vec_select:HF
6427 (match_operand:V4HF 3 "register_operand" "x")
6428 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6429 (match_operand:V2SF 1 "register_operand" "0")))]
6430 "TARGET_F16FML"
6431 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6432 [(set_attr "type" "neon_fp_mul_s")]
6433 )
6434
6435 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6436 [(set (match_operand:V2SF 0 "register_operand" "=w")
6437 (fma:V2SF
6438 (float_extend:V2SF
6439 (neg:V2HF
6440 (vec_select:V2HF
6441 (match_operand:V4HF 2 "register_operand" "w")
6442 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6443 (float_extend:V2SF
6444 (vec_duplicate:V2HF
6445 (vec_select:HF
6446 (match_operand:V4HF 3 "register_operand" "x")
6447 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6448 (match_operand:V2SF 1 "register_operand" "0")))]
6449 "TARGET_F16FML"
6450 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6451 [(set_attr "type" "neon_fp_mul_s")]
6452 )
6453
6454 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6455 [(set (match_operand:V2SF 0 "register_operand" "=w")
6456 (fma:V2SF
6457 (float_extend:V2SF
6458 (vec_select:V2HF
6459 (match_operand:V4HF 2 "register_operand" "w")
6460 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6461 (float_extend:V2SF
6462 (vec_duplicate:V2HF
6463 (vec_select:HF
6464 (match_operand:V4HF 3 "register_operand" "x")
6465 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6466 (match_operand:V2SF 1 "register_operand" "0")))]
6467 "TARGET_F16FML"
6468 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6469 [(set_attr "type" "neon_fp_mul_s")]
6470 )
6471
6472 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6473 [(set (match_operand:V2SF 0 "register_operand" "=w")
6474 (fma:V2SF
6475 (float_extend:V2SF
6476 (neg:V2HF
6477 (vec_select:V2HF
6478 (match_operand:V4HF 2 "register_operand" "w")
6479 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6480 (float_extend:V2SF
6481 (vec_duplicate:V2HF
6482 (vec_select:HF
6483 (match_operand:V4HF 3 "register_operand" "x")
6484 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6485 (match_operand:V2SF 1 "register_operand" "0")))]
6486 "TARGET_F16FML"
6487 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6488 [(set_attr "type" "neon_fp_mul_s")]
6489 )
6490
6491 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6492 [(set (match_operand:V4SF 0 "register_operand" "")
6493 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6494 (match_operand:V8HF 2 "register_operand" "")
6495 (match_operand:V8HF 3 "register_operand" "")
6496 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6497 VFMLA16_LOW))]
6498 "TARGET_F16FML"
6499 {
6500 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6501 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6502
6503 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6504 operands[1],
6505 operands[2],
6506 operands[3],
6507 p1, lane));
6508 DONE;
6509 })
6510
6511 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6512 [(set (match_operand:V4SF 0 "register_operand" "")
6513 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6514 (match_operand:V8HF 2 "register_operand" "")
6515 (match_operand:V8HF 3 "register_operand" "")
6516 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6517 VFMLA16_HIGH))]
6518 "TARGET_F16FML"
6519 {
6520 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6521 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6522
6523 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6524 operands[1],
6525 operands[2],
6526 operands[3],
6527 p1, lane));
6528 DONE;
6529 })
6530
6531 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6532 [(set (match_operand:V4SF 0 "register_operand" "=w")
6533 (fma:V4SF
6534 (float_extend:V4SF
6535 (vec_select:V4HF
6536 (match_operand:V8HF 2 "register_operand" "w")
6537 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6538 (float_extend:V4SF
6539 (vec_duplicate:V4HF
6540 (vec_select:HF
6541 (match_operand:V8HF 3 "register_operand" "x")
6542 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6543 (match_operand:V4SF 1 "register_operand" "0")))]
6544 "TARGET_F16FML"
6545 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6546 [(set_attr "type" "neon_fp_mul_s")]
6547 )
6548
6549 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6550 [(set (match_operand:V4SF 0 "register_operand" "=w")
6551 (fma:V4SF
6552 (float_extend:V4SF
6553 (neg:V4HF
6554 (vec_select:V4HF
6555 (match_operand:V8HF 2 "register_operand" "w")
6556 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6557 (float_extend:V4SF
6558 (vec_duplicate:V4HF
6559 (vec_select:HF
6560 (match_operand:V8HF 3 "register_operand" "x")
6561 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6562 (match_operand:V4SF 1 "register_operand" "0")))]
6563 "TARGET_F16FML"
6564 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6565 [(set_attr "type" "neon_fp_mul_s")]
6566 )
6567
6568 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6569 [(set (match_operand:V4SF 0 "register_operand" "=w")
6570 (fma:V4SF
6571 (float_extend:V4SF
6572 (vec_select:V4HF
6573 (match_operand:V8HF 2 "register_operand" "w")
6574 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6575 (float_extend:V4SF
6576 (vec_duplicate:V4HF
6577 (vec_select:HF
6578 (match_operand:V8HF 3 "register_operand" "x")
6579 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6580 (match_operand:V4SF 1 "register_operand" "0")))]
6581 "TARGET_F16FML"
6582 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6583 [(set_attr "type" "neon_fp_mul_s")]
6584 )
6585
6586 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6587 [(set (match_operand:V4SF 0 "register_operand" "=w")
6588 (fma:V4SF
6589 (float_extend:V4SF
6590 (neg:V4HF
6591 (vec_select:V4HF
6592 (match_operand:V8HF 2 "register_operand" "w")
6593 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6594 (float_extend:V4SF
6595 (vec_duplicate:V4HF
6596 (vec_select:HF
6597 (match_operand:V8HF 3 "register_operand" "x")
6598 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6599 (match_operand:V4SF 1 "register_operand" "0")))]
6600 "TARGET_F16FML"
6601 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6602 [(set_attr "type" "neon_fp_mul_s")]
6603 )
6604
6605 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6606 [(set (match_operand:V2SF 0 "register_operand" "")
6607 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6608 (match_operand:V4HF 2 "register_operand" "")
6609 (match_operand:V8HF 3 "register_operand" "")
6610 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6611 VFMLA16_LOW))]
6612 "TARGET_F16FML"
6613 {
6614 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6615 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6616
6617 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6618 operands[1],
6619 operands[2],
6620 operands[3],
6621 p1, lane));
6622 DONE;
6623
6624 })
6625
6626 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6627 [(set (match_operand:V2SF 0 "register_operand" "")
6628 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6629 (match_operand:V4HF 2 "register_operand" "")
6630 (match_operand:V8HF 3 "register_operand" "")
6631 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6632 VFMLA16_HIGH))]
6633 "TARGET_F16FML"
6634 {
6635 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6636 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6637
6638 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6639 operands[1],
6640 operands[2],
6641 operands[3],
6642 p1, lane));
6643 DONE;
6644
6645 })
6646
6647 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6648 [(set (match_operand:V2SF 0 "register_operand" "=w")
6649 (fma:V2SF
6650 (float_extend:V2SF
6651 (vec_select:V2HF
6652 (match_operand:V4HF 2 "register_operand" "w")
6653 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6654 (float_extend:V2SF
6655 (vec_duplicate:V2HF
6656 (vec_select:HF
6657 (match_operand:V8HF 3 "register_operand" "x")
6658 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6659 (match_operand:V2SF 1 "register_operand" "0")))]
6660 "TARGET_F16FML"
6661 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6662 [(set_attr "type" "neon_fp_mul_s")]
6663 )
6664
6665 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6666 [(set (match_operand:V2SF 0 "register_operand" "=w")
6667 (fma:V2SF
6668 (float_extend:V2SF
6669 (neg:V2HF
6670 (vec_select:V2HF
6671 (match_operand:V4HF 2 "register_operand" "w")
6672 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6673 (float_extend:V2SF
6674 (vec_duplicate:V2HF
6675 (vec_select:HF
6676 (match_operand:V8HF 3 "register_operand" "x")
6677 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6678 (match_operand:V2SF 1 "register_operand" "0")))]
6679 "TARGET_F16FML"
6680 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6681 [(set_attr "type" "neon_fp_mul_s")]
6682 )
6683
6684 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6685 [(set (match_operand:V2SF 0 "register_operand" "=w")
6686 (fma:V2SF
6687 (float_extend:V2SF
6688 (vec_select:V2HF
6689 (match_operand:V4HF 2 "register_operand" "w")
6690 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6691 (float_extend:V2SF
6692 (vec_duplicate:V2HF
6693 (vec_select:HF
6694 (match_operand:V8HF 3 "register_operand" "x")
6695 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6696 (match_operand:V2SF 1 "register_operand" "0")))]
6697 "TARGET_F16FML"
6698 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6699 [(set_attr "type" "neon_fp_mul_s")]
6700 )
6701
6702 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6703 [(set (match_operand:V2SF 0 "register_operand" "=w")
6704 (fma:V2SF
6705 (float_extend:V2SF
6706 (neg:V2HF
6707 (vec_select:V2HF
6708 (match_operand:V4HF 2 "register_operand" "w")
6709 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6710 (float_extend:V2SF
6711 (vec_duplicate:V2HF
6712 (vec_select:HF
6713 (match_operand:V8HF 3 "register_operand" "x")
6714 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6715 (match_operand:V2SF 1 "register_operand" "0")))]
6716 "TARGET_F16FML"
6717 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6718 [(set_attr "type" "neon_fp_mul_s")]
6719 )
6720
6721 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6722 [(set (match_operand:V4SF 0 "register_operand" "")
6723 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6724 (match_operand:V8HF 2 "register_operand" "")
6725 (match_operand:V4HF 3 "register_operand" "")
6726 (match_operand:SI 4 "aarch64_imm2" "")]
6727 VFMLA16_LOW))]
6728 "TARGET_F16FML"
6729 {
6730 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6731 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6732
6733 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6734 operands[1],
6735 operands[2],
6736 operands[3],
6737 p1, lane));
6738 DONE;
6739 })
6740
6741 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6742 [(set (match_operand:V4SF 0 "register_operand" "")
6743 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6744 (match_operand:V8HF 2 "register_operand" "")
6745 (match_operand:V4HF 3 "register_operand" "")
6746 (match_operand:SI 4 "aarch64_imm2" "")]
6747 VFMLA16_HIGH))]
6748 "TARGET_F16FML"
6749 {
6750 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6751 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6752
6753 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6754 operands[1],
6755 operands[2],
6756 operands[3],
6757 p1, lane));
6758 DONE;
6759 })
6760
6761 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6762 [(set (match_operand:V4SF 0 "register_operand" "=w")
6763 (fma:V4SF
6764 (float_extend:V4SF
6765 (vec_select:V4HF
6766 (match_operand:V8HF 2 "register_operand" "w")
6767 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6768 (float_extend:V4SF
6769 (vec_duplicate:V4HF
6770 (vec_select:HF
6771 (match_operand:V4HF 3 "register_operand" "x")
6772 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6773 (match_operand:V4SF 1 "register_operand" "0")))]
6774 "TARGET_F16FML"
6775 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6776 [(set_attr "type" "neon_fp_mul_s")]
6777 )
6778
6779 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6780 [(set (match_operand:V4SF 0 "register_operand" "=w")
6781 (fma:V4SF
6782 (float_extend:V4SF
6783 (neg:V4HF
6784 (vec_select:V4HF
6785 (match_operand:V8HF 2 "register_operand" "w")
6786 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6787 (float_extend:V4SF
6788 (vec_duplicate:V4HF
6789 (vec_select:HF
6790 (match_operand:V4HF 3 "register_operand" "x")
6791 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6792 (match_operand:V4SF 1 "register_operand" "0")))]
6793 "TARGET_F16FML"
6794 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6795 [(set_attr "type" "neon_fp_mul_s")]
6796 )
6797
6798 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6799 [(set (match_operand:V4SF 0 "register_operand" "=w")
6800 (fma:V4SF
6801 (float_extend:V4SF
6802 (vec_select:V4HF
6803 (match_operand:V8HF 2 "register_operand" "w")
6804 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6805 (float_extend:V4SF
6806 (vec_duplicate:V4HF
6807 (vec_select:HF
6808 (match_operand:V4HF 3 "register_operand" "x")
6809 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6810 (match_operand:V4SF 1 "register_operand" "0")))]
6811 "TARGET_F16FML"
6812 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6813 [(set_attr "type" "neon_fp_mul_s")]
6814 )
6815
6816 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6817 [(set (match_operand:V4SF 0 "register_operand" "=w")
6818 (fma:V4SF
6819 (float_extend:V4SF
6820 (neg:V4HF
6821 (vec_select:V4HF
6822 (match_operand:V8HF 2 "register_operand" "w")
6823 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6824 (float_extend:V4SF
6825 (vec_duplicate:V4HF
6826 (vec_select:HF
6827 (match_operand:V4HF 3 "register_operand" "x")
6828 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6829 (match_operand:V4SF 1 "register_operand" "0")))]
6830 "TARGET_F16FML"
6831 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6832 [(set_attr "type" "neon_fp_mul_s")]
6833 )
6834
6835 ;; pmull
6836
6837 (define_insn "aarch64_crypto_pmulldi"
6838 [(set (match_operand:TI 0 "register_operand" "=w")
6839 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6840 (match_operand:DI 2 "register_operand" "w")]
6841 UNSPEC_PMULL))]
6842 "TARGET_SIMD && TARGET_AES"
6843 "pmull\\t%0.1q, %1.1d, %2.1d"
6844 [(set_attr "type" "crypto_pmull")]
6845 )
6846
6847 (define_insn "aarch64_crypto_pmullv2di"
6848 [(set (match_operand:TI 0 "register_operand" "=w")
6849 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6850 (match_operand:V2DI 2 "register_operand" "w")]
6851 UNSPEC_PMULL2))]
6852 "TARGET_SIMD && TARGET_AES"
6853 "pmull2\\t%0.1q, %1.2d, %2.2d"
6854 [(set_attr "type" "crypto_pmull")]
6855 )