]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
[PATCH][AArch64] Stop redundant zero-extension after UMOV when in DI mode
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %1"
219 [(set_attr "type" "neon_ldp_q")]
220 )
221
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %0"
232 [(set_attr "type" "neon_stp_q")]
233 )
234
235
236 (define_split
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
242 [(const_int 0)]
243 {
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245 DONE;
246 })
247
248 (define_split
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254 [(const_int 0)]
255 {
256 aarch64_split_simd_move (operands[0], operands[1]);
257 DONE;
258 })
259
260 (define_expand "aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
263 "TARGET_SIMD"
264 {
265 rtx dst = operands[0];
266 rtx src = operands[1];
267
268 if (GP_REGNUM_P (REGNO (src)))
269 {
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
272
273 emit_insn
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
275 emit_insn
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
277 }
278
279 else
280 {
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285
286 emit_insn
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
288 emit_insn
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
290 }
291 DONE;
292 }
293 )
294
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297 (vec_select:<VHALF>
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
301 "umov\t%0, %1.d[0]"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
304 ])
305
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308 (vec_select:<VHALF>
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
312 "umov\t%0, %1.d[1]"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
315 ])
316
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
324 )
325
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
333 )
334
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
339 "TARGET_SIMD"
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
342 )
343
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
348 "TARGET_SIMD"
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
351 )
352
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
357 "TARGET_SIMD"
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
360 )
361
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365 "TARGET_SIMD"
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
368 )
369
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373 UNSPEC_RBIT))]
374 "TARGET_SIMD"
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
377 )
378
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
382 "TARGET_SIMD"
383 {
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386 <MODE>mode, 0);
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389 DONE;
390 }
391 )
392
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
397 "TARGET_SIMD"
398 {
399
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
404
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
407
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
409
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
413
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
418 DONE;
419 }
420 )
421
422 ;; These instructions map to the __builtins for the Dot Product operations.
423 (define_insn "aarch64_<sur>dot<vsi2qi>"
424 [(set (match_operand:VS 0 "register_operand" "=w")
425 (plus:VS (match_operand:VS 1 "register_operand" "0")
426 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
427 (match_operand:<VSI2QI> 3 "register_operand" "w")]
428 DOTPROD)))]
429 "TARGET_DOTPROD"
430 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
431 [(set_attr "type" "neon_dot")]
432 )
433
434 ;; These expands map to the Dot Product optab the vectorizer checks for.
435 ;; The auto-vectorizer expects a dot product builtin that also does an
436 ;; accumulation into the provided register.
437 ;; Given the following pattern
438 ;;
439 ;; for (i=0; i<len; i++) {
440 ;; c = a[i] * b[i];
441 ;; r += c;
442 ;; }
443 ;; return result;
444 ;;
445 ;; This can be auto-vectorized to
446 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
447 ;;
448 ;; given enough iterations. However the vectorizer can keep unrolling the loop
449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
451 ;; ...
452 ;;
453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
454 (define_expand "<sur>dot_prod<vsi2qi>"
455 [(set (match_operand:VS 0 "register_operand")
456 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
457 (match_operand:<VSI2QI> 2 "register_operand")]
458 DOTPROD)
459 (match_operand:VS 3 "register_operand")))]
460 "TARGET_DOTPROD"
461 {
462 emit_insn (
463 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
464 operands[2]));
465 emit_insn (gen_rtx_SET (operands[0], operands[3]));
466 DONE;
467 })
468
469 ;; These instructions map to the __builtins for the Dot Product
470 ;; indexed operations.
471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
472 [(set (match_operand:VS 0 "register_operand" "=w")
473 (plus:VS (match_operand:VS 1 "register_operand" "0")
474 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
475 (match_operand:V8QI 3 "register_operand" "<h_con>")
476 (match_operand:SI 4 "immediate_operand" "i")]
477 DOTPROD)))]
478 "TARGET_DOTPROD"
479 {
480 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
481 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
482 }
483 [(set_attr "type" "neon_dot")]
484 )
485
486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
487 [(set (match_operand:VS 0 "register_operand" "=w")
488 (plus:VS (match_operand:VS 1 "register_operand" "0")
489 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
490 (match_operand:V16QI 3 "register_operand" "<h_con>")
491 (match_operand:SI 4 "immediate_operand" "i")]
492 DOTPROD)))]
493 "TARGET_DOTPROD"
494 {
495 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
496 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
497 }
498 [(set_attr "type" "neon_dot")]
499 )
500
501 (define_expand "copysign<mode>3"
502 [(match_operand:VHSDF 0 "register_operand")
503 (match_operand:VHSDF 1 "register_operand")
504 (match_operand:VHSDF 2 "register_operand")]
505 "TARGET_FLOAT && TARGET_SIMD"
506 {
507 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
508 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
509
510 emit_move_insn (v_bitmask,
511 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
512 HOST_WIDE_INT_M1U << bits));
513 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
514 operands[2], operands[1]));
515 DONE;
516 }
517 )
518
519 (define_insn "*aarch64_mul3_elt<mode>"
520 [(set (match_operand:VMUL 0 "register_operand" "=w")
521 (mult:VMUL
522 (vec_duplicate:VMUL
523 (vec_select:<VEL>
524 (match_operand:VMUL 1 "register_operand" "<h_con>")
525 (parallel [(match_operand:SI 2 "immediate_operand")])))
526 (match_operand:VMUL 3 "register_operand" "w")))]
527 "TARGET_SIMD"
528 {
529 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
530 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
531 }
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
533 )
534
535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
536 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
537 (mult:VMUL_CHANGE_NLANES
538 (vec_duplicate:VMUL_CHANGE_NLANES
539 (vec_select:<VEL>
540 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
541 (parallel [(match_operand:SI 2 "immediate_operand")])))
542 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
543 "TARGET_SIMD"
544 {
545 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
546 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
547 }
548 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
549 )
550
551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
552 [(set (match_operand:VMUL 0 "register_operand" "=w")
553 (mult:VMUL
554 (vec_duplicate:VMUL
555 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
556 (match_operand:VMUL 2 "register_operand" "w")))]
557 "TARGET_SIMD"
558 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
559 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
560 )
561
562 (define_insn "aarch64_rsqrte<mode>"
563 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
564 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
565 UNSPEC_RSQRTE))]
566 "TARGET_SIMD"
567 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
568 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
569
570 (define_insn "aarch64_rsqrts<mode>"
571 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
572 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
574 UNSPEC_RSQRTS))]
575 "TARGET_SIMD"
576 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
577 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
578
579 (define_expand "rsqrt<mode>2"
580 [(set (match_operand:VALLF 0 "register_operand" "=w")
581 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
582 UNSPEC_RSQRT))]
583 "TARGET_SIMD"
584 {
585 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
586 DONE;
587 })
588
589 (define_insn "*aarch64_mul3_elt_to_64v2df"
590 [(set (match_operand:DF 0 "register_operand" "=w")
591 (mult:DF
592 (vec_select:DF
593 (match_operand:V2DF 1 "register_operand" "w")
594 (parallel [(match_operand:SI 2 "immediate_operand")]))
595 (match_operand:DF 3 "register_operand" "w")))]
596 "TARGET_SIMD"
597 {
598 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
599 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
600 }
601 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
602 )
603
604 (define_insn "neg<mode>2"
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
607 "TARGET_SIMD"
608 "neg\t%0.<Vtype>, %1.<Vtype>"
609 [(set_attr "type" "neon_neg<q>")]
610 )
611
612 (define_insn "abs<mode>2"
613 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
614 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
615 "TARGET_SIMD"
616 "abs\t%0.<Vtype>, %1.<Vtype>"
617 [(set_attr "type" "neon_abs<q>")]
618 )
619
620 ;; The intrinsic version of integer ABS must not be allowed to
621 ;; combine with any operation with an integerated ABS step, such
622 ;; as SABD.
623 (define_insn "aarch64_abs<mode>"
624 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
625 (unspec:VSDQ_I_DI
626 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
627 UNSPEC_ABS))]
628 "TARGET_SIMD"
629 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
630 [(set_attr "type" "neon_abs<q>")]
631 )
632
633 (define_insn "abd<mode>_3"
634 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
635 (abs:VDQ_BHSI (minus:VDQ_BHSI
636 (match_operand:VDQ_BHSI 1 "register_operand" "w")
637 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
638 "TARGET_SIMD"
639 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
640 [(set_attr "type" "neon_abd<q>")]
641 )
642
643 (define_insn "aarch64_<sur>abdl2<mode>_3"
644 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
645 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
646 (match_operand:VDQV_S 2 "register_operand" "w")]
647 ABDL2))]
648 "TARGET_SIMD"
649 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
650 [(set_attr "type" "neon_abd<q>")]
651 )
652
653 (define_insn "aarch64_<sur>abal<mode>_4"
654 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
655 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
656 (match_operand:VDQV_S 2 "register_operand" "w")
657 (match_operand:<VDBLW> 3 "register_operand" "0")]
658 ABAL))]
659 "TARGET_SIMD"
660 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
661 [(set_attr "type" "neon_arith_acc<q>")]
662 )
663
664 (define_insn "aarch64_<sur>adalp<mode>_3"
665 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
666 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
667 (match_operand:<VDBLW> 2 "register_operand" "0")]
668 ADALP))]
669 "TARGET_SIMD"
670 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
671 [(set_attr "type" "neon_reduc_add<q>")]
672 )
673
674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
675 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
676 ;; reduction of the difference into a V4SI vector and accumulate that into
677 ;; operand 3 before copying that into the result operand 0.
678 ;; Perform that with a sequence of:
679 ;; UABDL2 tmp.8h, op1.16b, op2.16b
680 ;; UABAL tmp.8h, op1.16b, op2.16b
681 ;; UADALP op3.4s, tmp.8h
682 ;; MOV op0, op3 // should be eliminated in later passes.
683 ;; The signed version just uses the signed variants of the above instructions.
684
685 (define_expand "<sur>sadv16qi"
686 [(use (match_operand:V4SI 0 "register_operand"))
687 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
688 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
689 (use (match_operand:V4SI 3 "register_operand"))]
690 "TARGET_SIMD"
691 {
692 rtx reduc = gen_reg_rtx (V8HImode);
693 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
694 operands[2]));
695 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
696 operands[2], reduc));
697 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
698 operands[3]));
699 emit_move_insn (operands[0], operands[3]);
700 DONE;
701 }
702 )
703
704 (define_insn "aba<mode>_3"
705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
706 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
707 (match_operand:VDQ_BHSI 1 "register_operand" "w")
708 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
709 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
710 "TARGET_SIMD"
711 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
712 [(set_attr "type" "neon_arith_acc<q>")]
713 )
714
715 (define_insn "fabd<mode>3"
716 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
717 (abs:VHSDF_HSDF
718 (minus:VHSDF_HSDF
719 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
720 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
721 "TARGET_SIMD"
722 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
723 [(set_attr "type" "neon_fp_abd_<stype><q>")]
724 )
725
726 ;; For AND (vector, register) and BIC (vector, immediate)
727 (define_insn "and<mode>3"
728 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
729 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
730 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
731 "TARGET_SIMD"
732 {
733 switch (which_alternative)
734 {
735 case 0:
736 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
737 case 1:
738 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
739 AARCH64_CHECK_BIC);
740 default:
741 gcc_unreachable ();
742 }
743 }
744 [(set_attr "type" "neon_logic<q>")]
745 )
746
747 ;; For ORR (vector, register) and ORR (vector, immediate)
748 (define_insn "ior<mode>3"
749 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
750 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
751 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
752 "TARGET_SIMD"
753 {
754 switch (which_alternative)
755 {
756 case 0:
757 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
758 case 1:
759 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
760 AARCH64_CHECK_ORR);
761 default:
762 gcc_unreachable ();
763 }
764 }
765 [(set_attr "type" "neon_logic<q>")]
766 )
767
768 (define_insn "xor<mode>3"
769 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771 (match_operand:VDQ_I 2 "register_operand" "w")))]
772 "TARGET_SIMD"
773 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
774 [(set_attr "type" "neon_logic<q>")]
775 )
776
777 (define_insn "one_cmpl<mode>2"
778 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
780 "TARGET_SIMD"
781 "not\t%0.<Vbtype>, %1.<Vbtype>"
782 [(set_attr "type" "neon_logic<q>")]
783 )
784
785 (define_insn "aarch64_simd_vec_set<mode>"
786 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
787 (vec_merge:VALL_F16
788 (vec_duplicate:VALL_F16
789 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
790 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
791 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
792 "TARGET_SIMD"
793 {
794 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
796 switch (which_alternative)
797 {
798 case 0:
799 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
800 case 1:
801 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
802 case 2:
803 return "ld1\\t{%0.<Vetype>}[%p2], %1";
804 default:
805 gcc_unreachable ();
806 }
807 }
808 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
809 )
810
811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
812 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
813 (vec_merge:VALL_F16
814 (vec_duplicate:VALL_F16
815 (vec_select:<VEL>
816 (match_operand:VALL_F16 3 "register_operand" "w")
817 (parallel
818 [(match_operand:SI 4 "immediate_operand" "i")])))
819 (match_operand:VALL_F16 1 "register_operand" "0")
820 (match_operand:SI 2 "immediate_operand" "i")))]
821 "TARGET_SIMD"
822 {
823 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
824 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
825 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
826
827 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
828 }
829 [(set_attr "type" "neon_ins<q>")]
830 )
831
832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
833 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
834 (vec_merge:VALL_F16_NO_V2Q
835 (vec_duplicate:VALL_F16_NO_V2Q
836 (vec_select:<VEL>
837 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
838 (parallel
839 [(match_operand:SI 4 "immediate_operand" "i")])))
840 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
841 (match_operand:SI 2 "immediate_operand" "i")))]
842 "TARGET_SIMD"
843 {
844 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
845 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
846 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
847 INTVAL (operands[4]));
848
849 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
850 }
851 [(set_attr "type" "neon_ins<q>")]
852 )
853
854 (define_insn "aarch64_simd_lshr<mode>"
855 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
856 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
857 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
858 "TARGET_SIMD"
859 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
860 [(set_attr "type" "neon_shift_imm<q>")]
861 )
862
863 (define_insn "aarch64_simd_ashr<mode>"
864 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
865 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
866 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
867 "TARGET_SIMD"
868 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
869 [(set_attr "type" "neon_shift_imm<q>")]
870 )
871
872 (define_insn "aarch64_simd_imm_shl<mode>"
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
876 "TARGET_SIMD"
877 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
878 [(set_attr "type" "neon_shift_imm<q>")]
879 )
880
881 (define_insn "aarch64_simd_reg_sshl<mode>"
882 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
883 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
884 (match_operand:VDQ_I 2 "register_operand" "w")))]
885 "TARGET_SIMD"
886 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
887 [(set_attr "type" "neon_shift_reg<q>")]
888 )
889
890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
891 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
892 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
893 (match_operand:VDQ_I 2 "register_operand" "w")]
894 UNSPEC_ASHIFT_UNSIGNED))]
895 "TARGET_SIMD"
896 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
897 [(set_attr "type" "neon_shift_reg<q>")]
898 )
899
900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
901 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
902 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
903 (match_operand:VDQ_I 2 "register_operand" "w")]
904 UNSPEC_ASHIFT_SIGNED))]
905 "TARGET_SIMD"
906 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
907 [(set_attr "type" "neon_shift_reg<q>")]
908 )
909
910 (define_expand "ashl<mode>3"
911 [(match_operand:VDQ_I 0 "register_operand" "")
912 (match_operand:VDQ_I 1 "register_operand" "")
913 (match_operand:SI 2 "general_operand" "")]
914 "TARGET_SIMD"
915 {
916 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
917 int shift_amount;
918
919 if (CONST_INT_P (operands[2]))
920 {
921 shift_amount = INTVAL (operands[2]);
922 if (shift_amount >= 0 && shift_amount < bit_width)
923 {
924 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
925 shift_amount);
926 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
927 operands[1],
928 tmp));
929 DONE;
930 }
931 else
932 {
933 operands[2] = force_reg (SImode, operands[2]);
934 }
935 }
936 else if (MEM_P (operands[2]))
937 {
938 operands[2] = force_reg (SImode, operands[2]);
939 }
940
941 if (REG_P (operands[2]))
942 {
943 rtx tmp = gen_reg_rtx (<MODE>mode);
944 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
945 convert_to_mode (<VEL>mode,
946 operands[2],
947 0)));
948 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
949 tmp));
950 DONE;
951 }
952 else
953 FAIL;
954 }
955 )
956
957 (define_expand "lshr<mode>3"
958 [(match_operand:VDQ_I 0 "register_operand" "")
959 (match_operand:VDQ_I 1 "register_operand" "")
960 (match_operand:SI 2 "general_operand" "")]
961 "TARGET_SIMD"
962 {
963 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
964 int shift_amount;
965
966 if (CONST_INT_P (operands[2]))
967 {
968 shift_amount = INTVAL (operands[2]);
969 if (shift_amount > 0 && shift_amount <= bit_width)
970 {
971 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
972 shift_amount);
973 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
974 operands[1],
975 tmp));
976 DONE;
977 }
978 else
979 operands[2] = force_reg (SImode, operands[2]);
980 }
981 else if (MEM_P (operands[2]))
982 {
983 operands[2] = force_reg (SImode, operands[2]);
984 }
985
986 if (REG_P (operands[2]))
987 {
988 rtx tmp = gen_reg_rtx (SImode);
989 rtx tmp1 = gen_reg_rtx (<MODE>mode);
990 emit_insn (gen_negsi2 (tmp, operands[2]));
991 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
992 convert_to_mode (<VEL>mode,
993 tmp, 0)));
994 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
995 operands[1],
996 tmp1));
997 DONE;
998 }
999 else
1000 FAIL;
1001 }
1002 )
1003
1004 (define_expand "ashr<mode>3"
1005 [(match_operand:VDQ_I 0 "register_operand" "")
1006 (match_operand:VDQ_I 1 "register_operand" "")
1007 (match_operand:SI 2 "general_operand" "")]
1008 "TARGET_SIMD"
1009 {
1010 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1011 int shift_amount;
1012
1013 if (CONST_INT_P (operands[2]))
1014 {
1015 shift_amount = INTVAL (operands[2]);
1016 if (shift_amount > 0 && shift_amount <= bit_width)
1017 {
1018 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1019 shift_amount);
1020 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1021 operands[1],
1022 tmp));
1023 DONE;
1024 }
1025 else
1026 operands[2] = force_reg (SImode, operands[2]);
1027 }
1028 else if (MEM_P (operands[2]))
1029 {
1030 operands[2] = force_reg (SImode, operands[2]);
1031 }
1032
1033 if (REG_P (operands[2]))
1034 {
1035 rtx tmp = gen_reg_rtx (SImode);
1036 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037 emit_insn (gen_negsi2 (tmp, operands[2]));
1038 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039 convert_to_mode (<VEL>mode,
1040 tmp, 0)));
1041 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1042 operands[1],
1043 tmp1));
1044 DONE;
1045 }
1046 else
1047 FAIL;
1048 }
1049 )
1050
1051 (define_expand "vashl<mode>3"
1052 [(match_operand:VDQ_I 0 "register_operand" "")
1053 (match_operand:VDQ_I 1 "register_operand" "")
1054 (match_operand:VDQ_I 2 "register_operand" "")]
1055 "TARGET_SIMD"
1056 {
1057 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1058 operands[2]));
1059 DONE;
1060 })
1061
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067 (match_operand:VDQ_BHSI 1 "register_operand" "")
1068 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1069 "TARGET_SIMD"
1070 {
1071 rtx neg = gen_reg_rtx (<MODE>mode);
1072 emit (gen_neg<mode>2 (neg, operands[2]));
1073 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1074 neg));
1075 DONE;
1076 })
1077
1078 ;; DI vector shift
1079 (define_expand "aarch64_ashr_simddi"
1080 [(match_operand:DI 0 "register_operand" "=w")
1081 (match_operand:DI 1 "register_operand" "w")
1082 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1083 "TARGET_SIMD"
1084 {
1085 /* An arithmetic shift right by 64 fills the result with copies of the sign
1086 bit, just like asr by 63 - however the standard pattern does not handle
1087 a shift by 64. */
1088 if (INTVAL (operands[2]) == 64)
1089 operands[2] = GEN_INT (63);
1090 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1091 DONE;
1092 }
1093 )
1094
1095 (define_expand "vlshr<mode>3"
1096 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097 (match_operand:VDQ_BHSI 1 "register_operand" "")
1098 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1099 "TARGET_SIMD"
1100 {
1101 rtx neg = gen_reg_rtx (<MODE>mode);
1102 emit (gen_neg<mode>2 (neg, operands[2]));
1103 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1104 neg));
1105 DONE;
1106 })
1107
1108 (define_expand "aarch64_lshr_simddi"
1109 [(match_operand:DI 0 "register_operand" "=w")
1110 (match_operand:DI 1 "register_operand" "w")
1111 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1112 "TARGET_SIMD"
1113 {
1114 if (INTVAL (operands[2]) == 64)
1115 emit_move_insn (operands[0], const0_rtx);
1116 else
1117 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1118 DONE;
1119 }
1120 )
1121
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124 [(set (match_operand:VD 0 "register_operand" "=w")
1125 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126 (match_operand:SI 2 "immediate_operand" "i")]
1127 UNSPEC_VEC_SHR))]
1128 "TARGET_SIMD"
1129 {
1130 if (BYTES_BIG_ENDIAN)
1131 return "shl %d0, %d1, %2";
1132 else
1133 return "ushr %d0, %d1, %2";
1134 }
1135 [(set_attr "type" "neon_shift_imm")]
1136 )
1137
1138 (define_expand "vec_set<mode>"
1139 [(match_operand:VALL_F16 0 "register_operand" "+w")
1140 (match_operand:<VEL> 1 "register_operand" "w")
1141 (match_operand:SI 2 "immediate_operand" "")]
1142 "TARGET_SIMD"
1143 {
1144 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146 GEN_INT (elem), operands[0]));
1147 DONE;
1148 }
1149 )
1150
1151
1152 (define_insn "aarch64_mla<mode>"
1153 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154 (plus:VDQ_BHSI (mult:VDQ_BHSI
1155 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1158 "TARGET_SIMD"
1159 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160 [(set_attr "type" "neon_mla_<Vetype><q>")]
1161 )
1162
1163 (define_insn "*aarch64_mla_elt<mode>"
1164 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1165 (plus:VDQHS
1166 (mult:VDQHS
1167 (vec_duplicate:VDQHS
1168 (vec_select:<VEL>
1169 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170 (parallel [(match_operand:SI 2 "immediate_operand")])))
1171 (match_operand:VDQHS 3 "register_operand" "w"))
1172 (match_operand:VDQHS 4 "register_operand" "0")))]
1173 "TARGET_SIMD"
1174 {
1175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1177 }
1178 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1179 )
1180
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1183 (plus:VDQHS
1184 (mult:VDQHS
1185 (vec_duplicate:VDQHS
1186 (vec_select:<VEL>
1187 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188 (parallel [(match_operand:SI 2 "immediate_operand")])))
1189 (match_operand:VDQHS 3 "register_operand" "w"))
1190 (match_operand:VDQHS 4 "register_operand" "0")))]
1191 "TARGET_SIMD"
1192 {
1193 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1195 }
1196 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1197 )
1198
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201 (plus:VDQHS
1202 (mult:VDQHS (vec_duplicate:VDQHS
1203 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204 (match_operand:VDQHS 2 "register_operand" "w"))
1205 (match_operand:VDQHS 3 "register_operand" "0")))]
1206 "TARGET_SIMD"
1207 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "aarch64_mls<mode>"
1212 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1216 "TARGET_SIMD"
1217 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218 [(set_attr "type" "neon_mla_<Vetype><q>")]
1219 )
1220
1221 (define_insn "*aarch64_mls_elt<mode>"
1222 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1223 (minus:VDQHS
1224 (match_operand:VDQHS 4 "register_operand" "0")
1225 (mult:VDQHS
1226 (vec_duplicate:VDQHS
1227 (vec_select:<VEL>
1228 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229 (parallel [(match_operand:SI 2 "immediate_operand")])))
1230 (match_operand:VDQHS 3 "register_operand" "w"))))]
1231 "TARGET_SIMD"
1232 {
1233 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1235 }
1236 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1237 )
1238
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1241 (minus:VDQHS
1242 (match_operand:VDQHS 4 "register_operand" "0")
1243 (mult:VDQHS
1244 (vec_duplicate:VDQHS
1245 (vec_select:<VEL>
1246 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247 (parallel [(match_operand:SI 2 "immediate_operand")])))
1248 (match_operand:VDQHS 3 "register_operand" "w"))))]
1249 "TARGET_SIMD"
1250 {
1251 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1253 }
1254 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1255 )
1256
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1259 (minus:VDQHS
1260 (match_operand:VDQHS 1 "register_operand" "0")
1261 (mult:VDQHS (vec_duplicate:VDQHS
1262 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263 (match_operand:VDQHS 3 "register_operand" "w"))))]
1264 "TARGET_SIMD"
1265 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1267 )
1268
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1274 "TARGET_SIMD"
1275 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276 [(set_attr "type" "neon_minmax<q>")]
1277 )
1278
1279 (define_expand "<su><maxmin>v2di3"
1280 [(set (match_operand:V2DI 0 "register_operand" "")
1281 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282 (match_operand:V2DI 2 "register_operand" "")))]
1283 "TARGET_SIMD"
1284 {
1285 enum rtx_code cmp_operator;
1286 rtx cmp_fmt;
1287
1288 switch (<CODE>)
1289 {
1290 case UMIN:
1291 cmp_operator = LTU;
1292 break;
1293 case SMIN:
1294 cmp_operator = LT;
1295 break;
1296 case UMAX:
1297 cmp_operator = GTU;
1298 break;
1299 case SMAX:
1300 cmp_operator = GT;
1301 break;
1302 default:
1303 gcc_unreachable ();
1304 }
1305
1306 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308 operands[2], cmp_fmt, operands[1], operands[2]));
1309 DONE;
1310 })
1311
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1317 MAXMINV))]
1318 "TARGET_SIMD"
1319 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320 [(set_attr "type" "neon_minmax<q>")]
1321 )
1322
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327 (match_operand:VHSDF 2 "register_operand" "w")]
1328 FMAXMINV))]
1329 "TARGET_SIMD"
1330 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331 [(set_attr "type" "neon_minmax<q>")]
1332 )
1333
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1339
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1343
1344 (define_insn "move_lo_quad_internal_<mode>"
1345 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1346 (vec_concat:VQ_NO2E
1347 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348 (vec_duplicate:<VHALF> (const_int 0))))]
1349 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1350 "@
1351 dup\\t%d0, %1.d[0]
1352 fmov\\t%d0, %1
1353 dup\\t%d0, %1"
1354 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355 (set_attr "simd" "yes,*,yes")
1356 (set_attr "fp" "*,yes,*")
1357 (set_attr "length" "4")]
1358 )
1359
1360 (define_insn "move_lo_quad_internal_<mode>"
1361 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1362 (vec_concat:VQ_2E
1363 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1364 (const_int 0)))]
1365 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1366 "@
1367 dup\\t%d0, %1.d[0]
1368 fmov\\t%d0, %1
1369 dup\\t%d0, %1"
1370 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1371 (set_attr "simd" "yes,*,yes")
1372 (set_attr "fp" "*,yes,*")
1373 (set_attr "length" "4")]
1374 )
1375
1376 (define_insn "move_lo_quad_internal_be_<mode>"
1377 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1378 (vec_concat:VQ_NO2E
1379 (vec_duplicate:<VHALF> (const_int 0))
1380 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1381 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1382 "@
1383 dup\\t%d0, %1.d[0]
1384 fmov\\t%d0, %1
1385 dup\\t%d0, %1"
1386 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1387 (set_attr "simd" "yes,*,yes")
1388 (set_attr "fp" "*,yes,*")
1389 (set_attr "length" "4")]
1390 )
1391
1392 (define_insn "move_lo_quad_internal_be_<mode>"
1393 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1394 (vec_concat:VQ_2E
1395 (const_int 0)
1396 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1397 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1398 "@
1399 dup\\t%d0, %1.d[0]
1400 fmov\\t%d0, %1
1401 dup\\t%d0, %1"
1402 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1403 (set_attr "simd" "yes,*,yes")
1404 (set_attr "fp" "*,yes,*")
1405 (set_attr "length" "4")]
1406 )
1407
1408 (define_expand "move_lo_quad_<mode>"
1409 [(match_operand:VQ 0 "register_operand")
1410 (match_operand:VQ 1 "register_operand")]
1411 "TARGET_SIMD"
1412 {
1413 if (BYTES_BIG_ENDIAN)
1414 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1415 else
1416 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1417 DONE;
1418 }
1419 )
1420
1421 ;; Move operand1 to the high architectural bits of the register, keeping
1422 ;; the low architectural bits of operand2.
1423 ;; For little-endian this is { operand2, operand1 }
1424 ;; For big-endian this is { operand1, operand2 }
1425
1426 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1427 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1428 (vec_concat:VQ
1429 (vec_select:<VHALF>
1430 (match_dup 0)
1431 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1432 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1433 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1434 "@
1435 ins\\t%0.d[1], %1.d[0]
1436 ins\\t%0.d[1], %1"
1437 [(set_attr "type" "neon_ins")]
1438 )
1439
1440 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1441 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1442 (vec_concat:VQ
1443 (match_operand:<VHALF> 1 "register_operand" "w,r")
1444 (vec_select:<VHALF>
1445 (match_dup 0)
1446 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1447 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1448 "@
1449 ins\\t%0.d[1], %1.d[0]
1450 ins\\t%0.d[1], %1"
1451 [(set_attr "type" "neon_ins")]
1452 )
1453
1454 (define_expand "move_hi_quad_<mode>"
1455 [(match_operand:VQ 0 "register_operand" "")
1456 (match_operand:<VHALF> 1 "register_operand" "")]
1457 "TARGET_SIMD"
1458 {
1459 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1460 if (BYTES_BIG_ENDIAN)
1461 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1462 operands[1], p));
1463 else
1464 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1465 operands[1], p));
1466 DONE;
1467 })
1468
1469 ;; Narrowing operations.
1470
1471 ;; For doubles.
1472 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1473 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1474 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1475 "TARGET_SIMD"
1476 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1477 [(set_attr "type" "neon_shift_imm_narrow_q")]
1478 )
1479
1480 (define_expand "vec_pack_trunc_<mode>"
1481 [(match_operand:<VNARROWD> 0 "register_operand" "")
1482 (match_operand:VDN 1 "register_operand" "")
1483 (match_operand:VDN 2 "register_operand" "")]
1484 "TARGET_SIMD"
1485 {
1486 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1487 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1488 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1489
1490 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1491 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1492 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1493 DONE;
1494 })
1495
1496 ;; For quads.
1497
1498 (define_insn "vec_pack_trunc_<mode>"
1499 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1500 (vec_concat:<VNARROWQ2>
1501 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1502 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1503 "TARGET_SIMD"
1504 {
1505 if (BYTES_BIG_ENDIAN)
1506 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1507 else
1508 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1509 }
1510 [(set_attr "type" "multiple")
1511 (set_attr "length" "8")]
1512 )
1513
1514 ;; Widening operations.
1515
1516 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1517 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1518 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1519 (match_operand:VQW 1 "register_operand" "w")
1520 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1521 )))]
1522 "TARGET_SIMD"
1523 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1524 [(set_attr "type" "neon_shift_imm_long")]
1525 )
1526
1527 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1528 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1529 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1530 (match_operand:VQW 1 "register_operand" "w")
1531 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1532 )))]
1533 "TARGET_SIMD"
1534 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1535 [(set_attr "type" "neon_shift_imm_long")]
1536 )
1537
1538 (define_expand "vec_unpack<su>_hi_<mode>"
1539 [(match_operand:<VWIDE> 0 "register_operand" "")
1540 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1541 "TARGET_SIMD"
1542 {
1543 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1544 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1545 operands[1], p));
1546 DONE;
1547 }
1548 )
1549
1550 (define_expand "vec_unpack<su>_lo_<mode>"
1551 [(match_operand:<VWIDE> 0 "register_operand" "")
1552 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1553 "TARGET_SIMD"
1554 {
1555 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1556 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1557 operands[1], p));
1558 DONE;
1559 }
1560 )
1561
1562 ;; Widening arithmetic.
1563
1564 (define_insn "*aarch64_<su>mlal_lo<mode>"
1565 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1566 (plus:<VWIDE>
1567 (mult:<VWIDE>
1568 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1569 (match_operand:VQW 2 "register_operand" "w")
1570 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1571 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1572 (match_operand:VQW 4 "register_operand" "w")
1573 (match_dup 3))))
1574 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1575 "TARGET_SIMD"
1576 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1577 [(set_attr "type" "neon_mla_<Vetype>_long")]
1578 )
1579
1580 (define_insn "*aarch64_<su>mlal_hi<mode>"
1581 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1582 (plus:<VWIDE>
1583 (mult:<VWIDE>
1584 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1585 (match_operand:VQW 2 "register_operand" "w")
1586 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1587 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1588 (match_operand:VQW 4 "register_operand" "w")
1589 (match_dup 3))))
1590 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1591 "TARGET_SIMD"
1592 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1593 [(set_attr "type" "neon_mla_<Vetype>_long")]
1594 )
1595
1596 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1598 (minus:<VWIDE>
1599 (match_operand:<VWIDE> 1 "register_operand" "0")
1600 (mult:<VWIDE>
1601 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1602 (match_operand:VQW 2 "register_operand" "w")
1603 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1604 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1605 (match_operand:VQW 4 "register_operand" "w")
1606 (match_dup 3))))))]
1607 "TARGET_SIMD"
1608 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1609 [(set_attr "type" "neon_mla_<Vetype>_long")]
1610 )
1611
1612 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1613 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1614 (minus:<VWIDE>
1615 (match_operand:<VWIDE> 1 "register_operand" "0")
1616 (mult:<VWIDE>
1617 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1618 (match_operand:VQW 2 "register_operand" "w")
1619 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1620 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1621 (match_operand:VQW 4 "register_operand" "w")
1622 (match_dup 3))))))]
1623 "TARGET_SIMD"
1624 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1625 [(set_attr "type" "neon_mla_<Vetype>_long")]
1626 )
1627
1628 (define_insn "*aarch64_<su>mlal<mode>"
1629 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630 (plus:<VWIDE>
1631 (mult:<VWIDE>
1632 (ANY_EXTEND:<VWIDE>
1633 (match_operand:VD_BHSI 1 "register_operand" "w"))
1634 (ANY_EXTEND:<VWIDE>
1635 (match_operand:VD_BHSI 2 "register_operand" "w")))
1636 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1637 "TARGET_SIMD"
1638 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1639 [(set_attr "type" "neon_mla_<Vetype>_long")]
1640 )
1641
1642 (define_insn "*aarch64_<su>mlsl<mode>"
1643 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1644 (minus:<VWIDE>
1645 (match_operand:<VWIDE> 1 "register_operand" "0")
1646 (mult:<VWIDE>
1647 (ANY_EXTEND:<VWIDE>
1648 (match_operand:VD_BHSI 2 "register_operand" "w"))
1649 (ANY_EXTEND:<VWIDE>
1650 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1651 "TARGET_SIMD"
1652 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1653 [(set_attr "type" "neon_mla_<Vetype>_long")]
1654 )
1655
1656 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1657 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1658 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1659 (match_operand:VQW 1 "register_operand" "w")
1660 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1661 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1662 (match_operand:VQW 2 "register_operand" "w")
1663 (match_dup 3)))))]
1664 "TARGET_SIMD"
1665 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1666 [(set_attr "type" "neon_mul_<Vetype>_long")]
1667 )
1668
1669 (define_expand "vec_widen_<su>mult_lo_<mode>"
1670 [(match_operand:<VWIDE> 0 "register_operand" "")
1671 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1672 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1673 "TARGET_SIMD"
1674 {
1675 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1676 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1677 operands[1],
1678 operands[2], p));
1679 DONE;
1680 }
1681 )
1682
1683 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1684 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1685 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1686 (match_operand:VQW 1 "register_operand" "w")
1687 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1688 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1689 (match_operand:VQW 2 "register_operand" "w")
1690 (match_dup 3)))))]
1691 "TARGET_SIMD"
1692 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1693 [(set_attr "type" "neon_mul_<Vetype>_long")]
1694 )
1695
1696 (define_expand "vec_widen_<su>mult_hi_<mode>"
1697 [(match_operand:<VWIDE> 0 "register_operand" "")
1698 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1699 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1700 "TARGET_SIMD"
1701 {
1702 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1703 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1704 operands[1],
1705 operands[2], p));
1706 DONE;
1707
1708 }
1709 )
1710
1711 ;; FP vector operations.
1712 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1713 ;; double-precision (64-bit) floating-point data types and arithmetic as
1714 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1715 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1716 ;;
1717 ;; Floating-point operations can raise an exception. Vectorizing such
1718 ;; operations are safe because of reasons explained below.
1719 ;;
1720 ;; ARMv8 permits an extension to enable trapped floating-point
1721 ;; exception handling, however this is an optional feature. In the
1722 ;; event of a floating-point exception being raised by vectorised
1723 ;; code then:
1724 ;; 1. If trapped floating-point exceptions are available, then a trap
1725 ;; will be taken when any lane raises an enabled exception. A trap
1726 ;; handler may determine which lane raised the exception.
1727 ;; 2. Alternatively a sticky exception flag is set in the
1728 ;; floating-point status register (FPSR). Software may explicitly
1729 ;; test the exception flags, in which case the tests will either
1730 ;; prevent vectorisation, allowing precise identification of the
1731 ;; failing operation, or if tested outside of vectorisable regions
1732 ;; then the specific operation and lane are not of interest.
1733
1734 ;; FP arithmetic operations.
1735
1736 (define_insn "add<mode>3"
1737 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1738 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1739 (match_operand:VHSDF 2 "register_operand" "w")))]
1740 "TARGET_SIMD"
1741 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1742 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1743 )
1744
1745 (define_insn "sub<mode>3"
1746 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1747 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1748 (match_operand:VHSDF 2 "register_operand" "w")))]
1749 "TARGET_SIMD"
1750 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1751 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1752 )
1753
1754 (define_insn "mul<mode>3"
1755 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757 (match_operand:VHSDF 2 "register_operand" "w")))]
1758 "TARGET_SIMD"
1759 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1761 )
1762
1763 (define_expand "div<mode>3"
1764 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1766 (match_operand:VHSDF 2 "register_operand" "w")))]
1767 "TARGET_SIMD"
1768 {
1769 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1770 DONE;
1771
1772 operands[1] = force_reg (<MODE>mode, operands[1]);
1773 })
1774
1775 (define_insn "*div<mode>3"
1776 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1777 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1778 (match_operand:VHSDF 2 "register_operand" "w")))]
1779 "TARGET_SIMD"
1780 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1781 [(set_attr "type" "neon_fp_div_<stype><q>")]
1782 )
1783
1784 (define_insn "neg<mode>2"
1785 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1786 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1787 "TARGET_SIMD"
1788 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1789 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1790 )
1791
1792 (define_insn "abs<mode>2"
1793 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1794 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1795 "TARGET_SIMD"
1796 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1797 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1798 )
1799
1800 (define_insn "fma<mode>4"
1801 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1802 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1803 (match_operand:VHSDF 2 "register_operand" "w")
1804 (match_operand:VHSDF 3 "register_operand" "0")))]
1805 "TARGET_SIMD"
1806 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1807 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1808 )
1809
1810 (define_insn "*aarch64_fma4_elt<mode>"
1811 [(set (match_operand:VDQF 0 "register_operand" "=w")
1812 (fma:VDQF
1813 (vec_duplicate:VDQF
1814 (vec_select:<VEL>
1815 (match_operand:VDQF 1 "register_operand" "<h_con>")
1816 (parallel [(match_operand:SI 2 "immediate_operand")])))
1817 (match_operand:VDQF 3 "register_operand" "w")
1818 (match_operand:VDQF 4 "register_operand" "0")))]
1819 "TARGET_SIMD"
1820 {
1821 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1822 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1823 }
1824 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1825 )
1826
1827 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1828 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1829 (fma:VDQSF
1830 (vec_duplicate:VDQSF
1831 (vec_select:<VEL>
1832 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1833 (parallel [(match_operand:SI 2 "immediate_operand")])))
1834 (match_operand:VDQSF 3 "register_operand" "w")
1835 (match_operand:VDQSF 4 "register_operand" "0")))]
1836 "TARGET_SIMD"
1837 {
1838 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1839 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1840 }
1841 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1842 )
1843
1844 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1845 [(set (match_operand:VMUL 0 "register_operand" "=w")
1846 (fma:VMUL
1847 (vec_duplicate:VMUL
1848 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1849 (match_operand:VMUL 2 "register_operand" "w")
1850 (match_operand:VMUL 3 "register_operand" "0")))]
1851 "TARGET_SIMD"
1852 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1853 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1854 )
1855
1856 (define_insn "*aarch64_fma4_elt_to_64v2df"
1857 [(set (match_operand:DF 0 "register_operand" "=w")
1858 (fma:DF
1859 (vec_select:DF
1860 (match_operand:V2DF 1 "register_operand" "w")
1861 (parallel [(match_operand:SI 2 "immediate_operand")]))
1862 (match_operand:DF 3 "register_operand" "w")
1863 (match_operand:DF 4 "register_operand" "0")))]
1864 "TARGET_SIMD"
1865 {
1866 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1867 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1868 }
1869 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1870 )
1871
1872 (define_insn "fnma<mode>4"
1873 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1874 (fma:VHSDF
1875 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1876 (match_operand:VHSDF 2 "register_operand" "w")
1877 (match_operand:VHSDF 3 "register_operand" "0")))]
1878 "TARGET_SIMD"
1879 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1880 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1881 )
1882
1883 (define_insn "*aarch64_fnma4_elt<mode>"
1884 [(set (match_operand:VDQF 0 "register_operand" "=w")
1885 (fma:VDQF
1886 (neg:VDQF
1887 (match_operand:VDQF 3 "register_operand" "w"))
1888 (vec_duplicate:VDQF
1889 (vec_select:<VEL>
1890 (match_operand:VDQF 1 "register_operand" "<h_con>")
1891 (parallel [(match_operand:SI 2 "immediate_operand")])))
1892 (match_operand:VDQF 4 "register_operand" "0")))]
1893 "TARGET_SIMD"
1894 {
1895 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1896 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1897 }
1898 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1899 )
1900
1901 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1902 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1903 (fma:VDQSF
1904 (neg:VDQSF
1905 (match_operand:VDQSF 3 "register_operand" "w"))
1906 (vec_duplicate:VDQSF
1907 (vec_select:<VEL>
1908 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1909 (parallel [(match_operand:SI 2 "immediate_operand")])))
1910 (match_operand:VDQSF 4 "register_operand" "0")))]
1911 "TARGET_SIMD"
1912 {
1913 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1914 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1915 }
1916 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1917 )
1918
1919 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1920 [(set (match_operand:VMUL 0 "register_operand" "=w")
1921 (fma:VMUL
1922 (neg:VMUL
1923 (match_operand:VMUL 2 "register_operand" "w"))
1924 (vec_duplicate:VMUL
1925 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1926 (match_operand:VMUL 3 "register_operand" "0")))]
1927 "TARGET_SIMD"
1928 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1929 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1930 )
1931
1932 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1933 [(set (match_operand:DF 0 "register_operand" "=w")
1934 (fma:DF
1935 (vec_select:DF
1936 (match_operand:V2DF 1 "register_operand" "w")
1937 (parallel [(match_operand:SI 2 "immediate_operand")]))
1938 (neg:DF
1939 (match_operand:DF 3 "register_operand" "w"))
1940 (match_operand:DF 4 "register_operand" "0")))]
1941 "TARGET_SIMD"
1942 {
1943 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1944 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1945 }
1946 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1947 )
1948
1949 ;; Vector versions of the floating-point frint patterns.
1950 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1951 (define_insn "<frint_pattern><mode>2"
1952 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1953 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1954 FRINT))]
1955 "TARGET_SIMD"
1956 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1957 [(set_attr "type" "neon_fp_round_<stype><q>")]
1958 )
1959
1960 ;; Vector versions of the fcvt standard patterns.
1961 ;; Expands to lbtrunc, lround, lceil, lfloor
1962 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1963 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1964 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1965 [(match_operand:VHSDF 1 "register_operand" "w")]
1966 FCVT)))]
1967 "TARGET_SIMD"
1968 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1969 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1970 )
1971
1972 ;; HF Scalar variants of related SIMD instructions.
1973 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1974 [(set (match_operand:HI 0 "register_operand" "=w")
1975 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1976 FCVT)))]
1977 "TARGET_SIMD_F16INST"
1978 "fcvt<frint_suffix><su>\t%h0, %h1"
1979 [(set_attr "type" "neon_fp_to_int_s")]
1980 )
1981
1982 (define_insn "<optab>_trunchfhi2"
1983 [(set (match_operand:HI 0 "register_operand" "=w")
1984 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1985 "TARGET_SIMD_F16INST"
1986 "fcvtz<su>\t%h0, %h1"
1987 [(set_attr "type" "neon_fp_to_int_s")]
1988 )
1989
1990 (define_insn "<optab>hihf2"
1991 [(set (match_operand:HF 0 "register_operand" "=w")
1992 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1993 "TARGET_SIMD_F16INST"
1994 "<su_optab>cvtf\t%h0, %h1"
1995 [(set_attr "type" "neon_int_to_fp_s")]
1996 )
1997
1998 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1999 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2000 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001 [(mult:VDQF
2002 (match_operand:VDQF 1 "register_operand" "w")
2003 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2004 UNSPEC_FRINTZ)))]
2005 "TARGET_SIMD
2006 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2007 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2008 {
2009 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2010 char buf[64];
2011 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2012 output_asm_insn (buf, operands);
2013 return "";
2014 }
2015 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2016 )
2017
2018 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2019 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2020 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2021 [(match_operand:VHSDF 1 "register_operand")]
2022 UNSPEC_FRINTZ)))]
2023 "TARGET_SIMD"
2024 {})
2025
2026 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2027 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2028 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2029 [(match_operand:VHSDF 1 "register_operand")]
2030 UNSPEC_FRINTZ)))]
2031 "TARGET_SIMD"
2032 {})
2033
2034 (define_expand "ftrunc<VHSDF:mode>2"
2035 [(set (match_operand:VHSDF 0 "register_operand")
2036 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2037 UNSPEC_FRINTZ))]
2038 "TARGET_SIMD"
2039 {})
2040
2041 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2042 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2043 (FLOATUORS:VHSDF
2044 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2045 "TARGET_SIMD"
2046 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2047 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2048 )
2049
2050 ;; Conversions between vectors of floats and doubles.
2051 ;; Contains a mix of patterns to match standard pattern names
2052 ;; and those for intrinsics.
2053
2054 ;; Float widening operations.
2055
2056 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2057 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2058 (float_extend:<VWIDE> (vec_select:<VHALF>
2059 (match_operand:VQ_HSF 1 "register_operand" "w")
2060 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2061 )))]
2062 "TARGET_SIMD"
2063 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2064 [(set_attr "type" "neon_fp_cvt_widen_s")]
2065 )
2066
2067 ;; Convert between fixed-point and floating-point (vector modes)
2068
2069 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2070 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2071 (unspec:<VHSDF:FCVT_TARGET>
2072 [(match_operand:VHSDF 1 "register_operand" "w")
2073 (match_operand:SI 2 "immediate_operand" "i")]
2074 FCVT_F2FIXED))]
2075 "TARGET_SIMD"
2076 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2077 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2078 )
2079
2080 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2081 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2082 (unspec:<VDQ_HSDI:FCVT_TARGET>
2083 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2084 (match_operand:SI 2 "immediate_operand" "i")]
2085 FCVT_FIXED2F))]
2086 "TARGET_SIMD"
2087 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2088 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2089 )
2090
2091 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2092 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2093 ;; the meaning of HI and LO changes depending on the target endianness.
2094 ;; While elsewhere we map the higher numbered elements of a vector to
2095 ;; the lower architectural lanes of the vector, for these patterns we want
2096 ;; to always treat "hi" as referring to the higher architectural lanes.
2097 ;; Consequently, while the patterns below look inconsistent with our
2098 ;; other big-endian patterns their behavior is as required.
2099
2100 (define_expand "vec_unpacks_lo_<mode>"
2101 [(match_operand:<VWIDE> 0 "register_operand" "")
2102 (match_operand:VQ_HSF 1 "register_operand" "")]
2103 "TARGET_SIMD"
2104 {
2105 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2106 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2107 operands[1], p));
2108 DONE;
2109 }
2110 )
2111
2112 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2113 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2114 (float_extend:<VWIDE> (vec_select:<VHALF>
2115 (match_operand:VQ_HSF 1 "register_operand" "w")
2116 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2117 )))]
2118 "TARGET_SIMD"
2119 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2120 [(set_attr "type" "neon_fp_cvt_widen_s")]
2121 )
2122
2123 (define_expand "vec_unpacks_hi_<mode>"
2124 [(match_operand:<VWIDE> 0 "register_operand" "")
2125 (match_operand:VQ_HSF 1 "register_operand" "")]
2126 "TARGET_SIMD"
2127 {
2128 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2129 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2130 operands[1], p));
2131 DONE;
2132 }
2133 )
2134 (define_insn "aarch64_float_extend_lo_<Vwide>"
2135 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2136 (float_extend:<VWIDE>
2137 (match_operand:VDF 1 "register_operand" "w")))]
2138 "TARGET_SIMD"
2139 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2140 [(set_attr "type" "neon_fp_cvt_widen_s")]
2141 )
2142
2143 ;; Float narrowing operations.
2144
2145 (define_insn "aarch64_float_truncate_lo_<mode>"
2146 [(set (match_operand:VDF 0 "register_operand" "=w")
2147 (float_truncate:VDF
2148 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2149 "TARGET_SIMD"
2150 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2151 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2152 )
2153
2154 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2155 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2156 (vec_concat:<VDBL>
2157 (match_operand:VDF 1 "register_operand" "0")
2158 (float_truncate:VDF
2159 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2160 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2161 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2162 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2163 )
2164
2165 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2166 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2167 (vec_concat:<VDBL>
2168 (float_truncate:VDF
2169 (match_operand:<VWIDE> 2 "register_operand" "w"))
2170 (match_operand:VDF 1 "register_operand" "0")))]
2171 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2172 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2173 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2174 )
2175
2176 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2177 [(match_operand:<VDBL> 0 "register_operand" "=w")
2178 (match_operand:VDF 1 "register_operand" "0")
2179 (match_operand:<VWIDE> 2 "register_operand" "w")]
2180 "TARGET_SIMD"
2181 {
2182 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2183 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2184 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2185 emit_insn (gen (operands[0], operands[1], operands[2]));
2186 DONE;
2187 }
2188 )
2189
2190 (define_expand "vec_pack_trunc_v2df"
2191 [(set (match_operand:V4SF 0 "register_operand")
2192 (vec_concat:V4SF
2193 (float_truncate:V2SF
2194 (match_operand:V2DF 1 "register_operand"))
2195 (float_truncate:V2SF
2196 (match_operand:V2DF 2 "register_operand"))
2197 ))]
2198 "TARGET_SIMD"
2199 {
2200 rtx tmp = gen_reg_rtx (V2SFmode);
2201 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2203
2204 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2205 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2206 tmp, operands[hi]));
2207 DONE;
2208 }
2209 )
2210
2211 (define_expand "vec_pack_trunc_df"
2212 [(set (match_operand:V2SF 0 "register_operand")
2213 (vec_concat:V2SF
2214 (float_truncate:SF
2215 (match_operand:DF 1 "register_operand"))
2216 (float_truncate:SF
2217 (match_operand:DF 2 "register_operand"))
2218 ))]
2219 "TARGET_SIMD"
2220 {
2221 rtx tmp = gen_reg_rtx (V2SFmode);
2222 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2223 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2224
2225 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2226 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2227 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2228 DONE;
2229 }
2230 )
2231
2232 ;; FP Max/Min
2233 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2234 ;; expression like:
2235 ;; a = (b < c) ? b : c;
2236 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2237 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2238 ;; -ffast-math.
2239 ;;
2240 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2241 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2242 ;; operand will be returned when both operands are zero (i.e. they may not
2243 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2244 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2245 ;; NaNs.
2246
2247 (define_insn "<su><maxmin><mode>3"
2248 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2249 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2250 (match_operand:VHSDF 2 "register_operand" "w")))]
2251 "TARGET_SIMD"
2252 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2253 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2254 )
2255
2256 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2257 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2258 ;; which implement the IEEE fmax ()/fmin () functions.
2259 (define_insn "<maxmin_uns><mode>3"
2260 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2261 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2262 (match_operand:VHSDF 2 "register_operand" "w")]
2263 FMAXMIN_UNS))]
2264 "TARGET_SIMD"
2265 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2266 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2267 )
2268
2269 ;; 'across lanes' add.
2270
2271 (define_expand "reduc_plus_scal_<mode>"
2272 [(match_operand:<VEL> 0 "register_operand" "=w")
2273 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2274 UNSPEC_ADDV)]
2275 "TARGET_SIMD"
2276 {
2277 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2278 rtx scratch = gen_reg_rtx (<MODE>mode);
2279 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2280 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2281 DONE;
2282 }
2283 )
2284
2285 (define_insn "aarch64_faddp<mode>"
2286 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2287 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2288 (match_operand:VHSDF 2 "register_operand" "w")]
2289 UNSPEC_FADDV))]
2290 "TARGET_SIMD"
2291 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2292 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2293 )
2294
2295 (define_insn "aarch64_reduc_plus_internal<mode>"
2296 [(set (match_operand:VDQV 0 "register_operand" "=w")
2297 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2298 UNSPEC_ADDV))]
2299 "TARGET_SIMD"
2300 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2301 [(set_attr "type" "neon_reduc_add<q>")]
2302 )
2303
2304 (define_insn "aarch64_reduc_plus_internalv2si"
2305 [(set (match_operand:V2SI 0 "register_operand" "=w")
2306 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2307 UNSPEC_ADDV))]
2308 "TARGET_SIMD"
2309 "addp\\t%0.2s, %1.2s, %1.2s"
2310 [(set_attr "type" "neon_reduc_add")]
2311 )
2312
2313 (define_insn "reduc_plus_scal_<mode>"
2314 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2315 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2316 UNSPEC_FADDV))]
2317 "TARGET_SIMD"
2318 "faddp\\t%<Vetype>0, %1.<Vtype>"
2319 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2320 )
2321
2322 (define_expand "reduc_plus_scal_v4sf"
2323 [(set (match_operand:SF 0 "register_operand")
2324 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2325 UNSPEC_FADDV))]
2326 "TARGET_SIMD"
2327 {
2328 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2329 rtx scratch = gen_reg_rtx (V4SFmode);
2330 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2331 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2332 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2333 DONE;
2334 })
2335
2336 (define_insn "clrsb<mode>2"
2337 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2338 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2339 "TARGET_SIMD"
2340 "cls\\t%0.<Vtype>, %1.<Vtype>"
2341 [(set_attr "type" "neon_cls<q>")]
2342 )
2343
2344 (define_insn "clz<mode>2"
2345 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2346 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2347 "TARGET_SIMD"
2348 "clz\\t%0.<Vtype>, %1.<Vtype>"
2349 [(set_attr "type" "neon_cls<q>")]
2350 )
2351
2352 (define_insn "popcount<mode>2"
2353 [(set (match_operand:VB 0 "register_operand" "=w")
2354 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2355 "TARGET_SIMD"
2356 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2357 [(set_attr "type" "neon_cnt<q>")]
2358 )
2359
2360 ;; 'across lanes' max and min ops.
2361
2362 ;; Template for outputting a scalar, so we can create __builtins which can be
2363 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2364 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2365 [(match_operand:<VEL> 0 "register_operand")
2366 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2367 FMAXMINV)]
2368 "TARGET_SIMD"
2369 {
2370 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2371 rtx scratch = gen_reg_rtx (<MODE>mode);
2372 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2373 operands[1]));
2374 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2375 DONE;
2376 }
2377 )
2378
2379 ;; Likewise for integer cases, signed and unsigned.
2380 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2381 [(match_operand:<VEL> 0 "register_operand")
2382 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2383 MAXMINV)]
2384 "TARGET_SIMD"
2385 {
2386 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2387 rtx scratch = gen_reg_rtx (<MODE>mode);
2388 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2389 operands[1]));
2390 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2391 DONE;
2392 }
2393 )
2394
2395 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2396 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2397 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2398 MAXMINV))]
2399 "TARGET_SIMD"
2400 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2401 [(set_attr "type" "neon_reduc_minmax<q>")]
2402 )
2403
2404 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2405 [(set (match_operand:V2SI 0 "register_operand" "=w")
2406 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2407 MAXMINV))]
2408 "TARGET_SIMD"
2409 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2410 [(set_attr "type" "neon_reduc_minmax")]
2411 )
2412
2413 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2414 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2415 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2416 FMAXMINV))]
2417 "TARGET_SIMD"
2418 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2419 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2420 )
2421
2422 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2423 ;; allocation.
2424 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2425 ;; to select.
2426 ;;
2427 ;; Thus our BSL is of the form:
2428 ;; op0 = bsl (mask, op2, op3)
2429 ;; We can use any of:
2430 ;;
2431 ;; if (op0 = mask)
2432 ;; bsl mask, op1, op2
2433 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2434 ;; bit op0, op2, mask
2435 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2436 ;; bif op0, op1, mask
2437 ;;
2438 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2439 ;; Some forms of straight-line code may generate the equivalent form
2440 ;; in *aarch64_simd_bsl<mode>_alt.
2441
2442 (define_insn "aarch64_simd_bsl<mode>_internal"
2443 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2444 (xor:VDQ_I
2445 (and:VDQ_I
2446 (xor:VDQ_I
2447 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2448 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2449 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2450 (match_dup:<V_INT_EQUIV> 3)
2451 ))]
2452 "TARGET_SIMD"
2453 "@
2454 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2455 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2456 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2457 [(set_attr "type" "neon_bsl<q>")]
2458 )
2459
2460 ;; We need this form in addition to the above pattern to match the case
2461 ;; when combine tries merging three insns such that the second operand of
2462 ;; the outer XOR matches the second operand of the inner XOR rather than
2463 ;; the first. The two are equivalent but since recog doesn't try all
2464 ;; permutations of commutative operations, we have to have a separate pattern.
2465
2466 (define_insn "*aarch64_simd_bsl<mode>_alt"
2467 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2468 (xor:VDQ_I
2469 (and:VDQ_I
2470 (xor:VDQ_I
2471 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2472 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2473 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2474 (match_dup:<V_INT_EQUIV> 2)))]
2475 "TARGET_SIMD"
2476 "@
2477 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2478 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2479 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2480 [(set_attr "type" "neon_bsl<q>")]
2481 )
2482
2483 ;; DImode is special, we want to avoid computing operations which are
2484 ;; more naturally computed in general purpose registers in the vector
2485 ;; registers. If we do that, we need to move all three operands from general
2486 ;; purpose registers to vector registers, then back again. However, we
2487 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2488 ;; optimizations based on the component operations of a BSL.
2489 ;;
2490 ;; That means we need a splitter back to the individual operations, if they
2491 ;; would be better calculated on the integer side.
2492
2493 (define_insn_and_split "aarch64_simd_bsldi_internal"
2494 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2495 (xor:DI
2496 (and:DI
2497 (xor:DI
2498 (match_operand:DI 3 "register_operand" "w,0,w,r")
2499 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2500 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2501 (match_dup:DI 3)
2502 ))]
2503 "TARGET_SIMD"
2504 "@
2505 bsl\\t%0.8b, %2.8b, %3.8b
2506 bit\\t%0.8b, %2.8b, %1.8b
2507 bif\\t%0.8b, %3.8b, %1.8b
2508 #"
2509 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2510 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2511 {
2512 /* Split back to individual operations. If we're before reload, and
2513 able to create a temporary register, do so. If we're after reload,
2514 we've got an early-clobber destination register, so use that.
2515 Otherwise, we can't create pseudos and we can't yet guarantee that
2516 operands[0] is safe to write, so FAIL to split. */
2517
2518 rtx scratch;
2519 if (reload_completed)
2520 scratch = operands[0];
2521 else if (can_create_pseudo_p ())
2522 scratch = gen_reg_rtx (DImode);
2523 else
2524 FAIL;
2525
2526 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2527 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2528 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2529 DONE;
2530 }
2531 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2532 (set_attr "length" "4,4,4,12")]
2533 )
2534
2535 (define_insn_and_split "aarch64_simd_bsldi_alt"
2536 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2537 (xor:DI
2538 (and:DI
2539 (xor:DI
2540 (match_operand:DI 3 "register_operand" "w,w,0,r")
2541 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2542 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2543 (match_dup:DI 2)
2544 ))]
2545 "TARGET_SIMD"
2546 "@
2547 bsl\\t%0.8b, %3.8b, %2.8b
2548 bit\\t%0.8b, %3.8b, %1.8b
2549 bif\\t%0.8b, %2.8b, %1.8b
2550 #"
2551 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2552 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2553 {
2554 /* Split back to individual operations. If we're before reload, and
2555 able to create a temporary register, do so. If we're after reload,
2556 we've got an early-clobber destination register, so use that.
2557 Otherwise, we can't create pseudos and we can't yet guarantee that
2558 operands[0] is safe to write, so FAIL to split. */
2559
2560 rtx scratch;
2561 if (reload_completed)
2562 scratch = operands[0];
2563 else if (can_create_pseudo_p ())
2564 scratch = gen_reg_rtx (DImode);
2565 else
2566 FAIL;
2567
2568 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2569 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2570 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2571 DONE;
2572 }
2573 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2574 (set_attr "length" "4,4,4,12")]
2575 )
2576
2577 (define_expand "aarch64_simd_bsl<mode>"
2578 [(match_operand:VALLDIF 0 "register_operand")
2579 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2580 (match_operand:VALLDIF 2 "register_operand")
2581 (match_operand:VALLDIF 3 "register_operand")]
2582 "TARGET_SIMD"
2583 {
2584 /* We can't alias operands together if they have different modes. */
2585 rtx tmp = operands[0];
2586 if (FLOAT_MODE_P (<MODE>mode))
2587 {
2588 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2589 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2590 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2591 }
2592 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2593 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2594 operands[1],
2595 operands[2],
2596 operands[3]));
2597 if (tmp != operands[0])
2598 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2599
2600 DONE;
2601 })
2602
2603 (define_expand "vcond_mask_<mode><v_int_equiv>"
2604 [(match_operand:VALLDI 0 "register_operand")
2605 (match_operand:VALLDI 1 "nonmemory_operand")
2606 (match_operand:VALLDI 2 "nonmemory_operand")
2607 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2608 "TARGET_SIMD"
2609 {
2610 /* If we have (a = (P) ? -1 : 0);
2611 Then we can simply move the generated mask (result must be int). */
2612 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2613 && operands[2] == CONST0_RTX (<MODE>mode))
2614 emit_move_insn (operands[0], operands[3]);
2615 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2616 else if (operands[1] == CONST0_RTX (<MODE>mode)
2617 && operands[2] == CONSTM1_RTX (<MODE>mode))
2618 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2619 else
2620 {
2621 if (!REG_P (operands[1]))
2622 operands[1] = force_reg (<MODE>mode, operands[1]);
2623 if (!REG_P (operands[2]))
2624 operands[2] = force_reg (<MODE>mode, operands[2]);
2625 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2626 operands[1], operands[2]));
2627 }
2628
2629 DONE;
2630 })
2631
2632 ;; Patterns comparing two vectors to produce a mask.
2633
2634 (define_expand "vec_cmp<mode><mode>"
2635 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2636 (match_operator 1 "comparison_operator"
2637 [(match_operand:VSDQ_I_DI 2 "register_operand")
2638 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2639 "TARGET_SIMD"
2640 {
2641 rtx mask = operands[0];
2642 enum rtx_code code = GET_CODE (operands[1]);
2643
2644 switch (code)
2645 {
2646 case NE:
2647 case LE:
2648 case LT:
2649 case GE:
2650 case GT:
2651 case EQ:
2652 if (operands[3] == CONST0_RTX (<MODE>mode))
2653 break;
2654
2655 /* Fall through. */
2656 default:
2657 if (!REG_P (operands[3]))
2658 operands[3] = force_reg (<MODE>mode, operands[3]);
2659
2660 break;
2661 }
2662
2663 switch (code)
2664 {
2665 case LT:
2666 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2667 break;
2668
2669 case GE:
2670 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2671 break;
2672
2673 case LE:
2674 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2675 break;
2676
2677 case GT:
2678 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2679 break;
2680
2681 case LTU:
2682 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2683 break;
2684
2685 case GEU:
2686 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2687 break;
2688
2689 case LEU:
2690 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2691 break;
2692
2693 case GTU:
2694 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2695 break;
2696
2697 case NE:
2698 /* Handle NE as !EQ. */
2699 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2700 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2701 break;
2702
2703 case EQ:
2704 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2705 break;
2706
2707 default:
2708 gcc_unreachable ();
2709 }
2710
2711 DONE;
2712 })
2713
2714 (define_expand "vec_cmp<mode><v_int_equiv>"
2715 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2716 (match_operator 1 "comparison_operator"
2717 [(match_operand:VDQF 2 "register_operand")
2718 (match_operand:VDQF 3 "nonmemory_operand")]))]
2719 "TARGET_SIMD"
2720 {
2721 int use_zero_form = 0;
2722 enum rtx_code code = GET_CODE (operands[1]);
2723 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2724
2725 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2726
2727 switch (code)
2728 {
2729 case LE:
2730 case LT:
2731 case GE:
2732 case GT:
2733 case EQ:
2734 if (operands[3] == CONST0_RTX (<MODE>mode))
2735 {
2736 use_zero_form = 1;
2737 break;
2738 }
2739 /* Fall through. */
2740 default:
2741 if (!REG_P (operands[3]))
2742 operands[3] = force_reg (<MODE>mode, operands[3]);
2743
2744 break;
2745 }
2746
2747 switch (code)
2748 {
2749 case LT:
2750 if (use_zero_form)
2751 {
2752 comparison = gen_aarch64_cmlt<mode>;
2753 break;
2754 }
2755 /* Fall through. */
2756 case UNLT:
2757 std::swap (operands[2], operands[3]);
2758 /* Fall through. */
2759 case UNGT:
2760 case GT:
2761 comparison = gen_aarch64_cmgt<mode>;
2762 break;
2763 case LE:
2764 if (use_zero_form)
2765 {
2766 comparison = gen_aarch64_cmle<mode>;
2767 break;
2768 }
2769 /* Fall through. */
2770 case UNLE:
2771 std::swap (operands[2], operands[3]);
2772 /* Fall through. */
2773 case UNGE:
2774 case GE:
2775 comparison = gen_aarch64_cmge<mode>;
2776 break;
2777 case NE:
2778 case EQ:
2779 comparison = gen_aarch64_cmeq<mode>;
2780 break;
2781 case UNEQ:
2782 case ORDERED:
2783 case UNORDERED:
2784 case LTGT:
2785 break;
2786 default:
2787 gcc_unreachable ();
2788 }
2789
2790 switch (code)
2791 {
2792 case UNGE:
2793 case UNGT:
2794 case UNLE:
2795 case UNLT:
2796 {
2797 /* All of the above must not raise any FP exceptions. Thus we first
2798 check each operand for NaNs and force any elements containing NaN to
2799 zero before using them in the compare.
2800 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2801 (cm<cc> (isnan (a) ? 0.0 : a,
2802 isnan (b) ? 0.0 : b))
2803 We use the following transformations for doing the comparisions:
2804 a UNGE b -> a GE b
2805 a UNGT b -> a GT b
2806 a UNLE b -> b GE a
2807 a UNLT b -> b GT a. */
2808
2809 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2810 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2811 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2812 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2813 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2814 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2815 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2816 lowpart_subreg (<V_INT_EQUIV>mode,
2817 operands[2],
2818 <MODE>mode)));
2819 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2820 lowpart_subreg (<V_INT_EQUIV>mode,
2821 operands[3],
2822 <MODE>mode)));
2823 gcc_assert (comparison != NULL);
2824 emit_insn (comparison (operands[0],
2825 lowpart_subreg (<MODE>mode,
2826 tmp0, <V_INT_EQUIV>mode),
2827 lowpart_subreg (<MODE>mode,
2828 tmp1, <V_INT_EQUIV>mode)));
2829 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2830 }
2831 break;
2832
2833 case LT:
2834 case LE:
2835 case GT:
2836 case GE:
2837 case EQ:
2838 case NE:
2839 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2840 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2841 a GE b -> a GE b
2842 a GT b -> a GT b
2843 a LE b -> b GE a
2844 a LT b -> b GT a
2845 a EQ b -> a EQ b
2846 a NE b -> ~(a EQ b) */
2847 gcc_assert (comparison != NULL);
2848 emit_insn (comparison (operands[0], operands[2], operands[3]));
2849 if (code == NE)
2850 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2851 break;
2852
2853 case LTGT:
2854 /* LTGT is not guranteed to not generate a FP exception. So let's
2855 go the faster way : ((a > b) || (b > a)). */
2856 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2857 operands[2], operands[3]));
2858 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2859 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2860 break;
2861
2862 case ORDERED:
2863 case UNORDERED:
2864 case UNEQ:
2865 /* cmeq (a, a) & cmeq (b, b). */
2866 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2867 operands[2], operands[2]));
2868 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2869 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2870
2871 if (code == UNORDERED)
2872 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2873 else if (code == UNEQ)
2874 {
2875 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2876 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2877 }
2878 break;
2879
2880 default:
2881 gcc_unreachable ();
2882 }
2883
2884 DONE;
2885 })
2886
2887 (define_expand "vec_cmpu<mode><mode>"
2888 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2889 (match_operator 1 "comparison_operator"
2890 [(match_operand:VSDQ_I_DI 2 "register_operand")
2891 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2892 "TARGET_SIMD"
2893 {
2894 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2895 operands[2], operands[3]));
2896 DONE;
2897 })
2898
2899 (define_expand "vcond<mode><mode>"
2900 [(set (match_operand:VALLDI 0 "register_operand")
2901 (if_then_else:VALLDI
2902 (match_operator 3 "comparison_operator"
2903 [(match_operand:VALLDI 4 "register_operand")
2904 (match_operand:VALLDI 5 "nonmemory_operand")])
2905 (match_operand:VALLDI 1 "nonmemory_operand")
2906 (match_operand:VALLDI 2 "nonmemory_operand")))]
2907 "TARGET_SIMD"
2908 {
2909 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2910 enum rtx_code code = GET_CODE (operands[3]);
2911
2912 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2913 it as well as switch operands 1/2 in order to avoid the additional
2914 NOT instruction. */
2915 if (code == NE)
2916 {
2917 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2918 operands[4], operands[5]);
2919 std::swap (operands[1], operands[2]);
2920 }
2921 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2922 operands[4], operands[5]));
2923 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2924 operands[2], mask));
2925
2926 DONE;
2927 })
2928
2929 (define_expand "vcond<v_cmp_mixed><mode>"
2930 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2931 (if_then_else:<V_cmp_mixed>
2932 (match_operator 3 "comparison_operator"
2933 [(match_operand:VDQF_COND 4 "register_operand")
2934 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2935 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2936 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2937 "TARGET_SIMD"
2938 {
2939 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2940 enum rtx_code code = GET_CODE (operands[3]);
2941
2942 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2943 it as well as switch operands 1/2 in order to avoid the additional
2944 NOT instruction. */
2945 if (code == NE)
2946 {
2947 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2948 operands[4], operands[5]);
2949 std::swap (operands[1], operands[2]);
2950 }
2951 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2952 operands[4], operands[5]));
2953 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2954 operands[0], operands[1],
2955 operands[2], mask));
2956
2957 DONE;
2958 })
2959
2960 (define_expand "vcondu<mode><mode>"
2961 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2962 (if_then_else:VSDQ_I_DI
2963 (match_operator 3 "comparison_operator"
2964 [(match_operand:VSDQ_I_DI 4 "register_operand")
2965 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2966 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2967 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2968 "TARGET_SIMD"
2969 {
2970 rtx mask = gen_reg_rtx (<MODE>mode);
2971 enum rtx_code code = GET_CODE (operands[3]);
2972
2973 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974 it as well as switch operands 1/2 in order to avoid the additional
2975 NOT instruction. */
2976 if (code == NE)
2977 {
2978 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979 operands[4], operands[5]);
2980 std::swap (operands[1], operands[2]);
2981 }
2982 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2983 operands[4], operands[5]));
2984 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2985 operands[2], mask));
2986 DONE;
2987 })
2988
2989 (define_expand "vcondu<mode><v_cmp_mixed>"
2990 [(set (match_operand:VDQF 0 "register_operand")
2991 (if_then_else:VDQF
2992 (match_operator 3 "comparison_operator"
2993 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2994 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2995 (match_operand:VDQF 1 "nonmemory_operand")
2996 (match_operand:VDQF 2 "nonmemory_operand")))]
2997 "TARGET_SIMD"
2998 {
2999 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3000 enum rtx_code code = GET_CODE (operands[3]);
3001
3002 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3003 it as well as switch operands 1/2 in order to avoid the additional
3004 NOT instruction. */
3005 if (code == NE)
3006 {
3007 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3008 operands[4], operands[5]);
3009 std::swap (operands[1], operands[2]);
3010 }
3011 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3012 mask, operands[3],
3013 operands[4], operands[5]));
3014 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3015 operands[2], mask));
3016 DONE;
3017 })
3018
3019 ;; Patterns for AArch64 SIMD Intrinsics.
3020
3021 ;; Lane extraction with sign extension to general purpose register.
3022 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3023 [(set (match_operand:GPI 0 "register_operand" "=r")
3024 (sign_extend:GPI
3025 (vec_select:<VEL>
3026 (match_operand:VDQQH 1 "register_operand" "w")
3027 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3028 "TARGET_SIMD"
3029 {
3030 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3031 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3032 }
3033 [(set_attr "type" "neon_to_gp<q>")]
3034 )
3035
3036 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3037 [(set (match_operand:GPI 0 "register_operand" "=r")
3038 (zero_extend:GPI
3039 (vec_select:<VEL>
3040 (match_operand:VDQQH 1 "register_operand" "w")
3041 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3042 "TARGET_SIMD"
3043 {
3044 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3045 INTVAL (operands[2]));
3046 return "umov\\t%w0, %1.<Vetype>[%2]";
3047 }
3048 [(set_attr "type" "neon_to_gp<q>")]
3049 )
3050
3051 ;; Lane extraction of a value, neither sign nor zero extension
3052 ;; is guaranteed so upper bits should be considered undefined.
3053 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3054 (define_insn "aarch64_get_lane<mode>"
3055 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3056 (vec_select:<VEL>
3057 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3058 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3059 "TARGET_SIMD"
3060 {
3061 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3062 switch (which_alternative)
3063 {
3064 case 0:
3065 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3066 case 1:
3067 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3068 case 2:
3069 return "st1\\t{%1.<Vetype>}[%2], %0";
3070 default:
3071 gcc_unreachable ();
3072 }
3073 }
3074 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3075 )
3076
3077 (define_insn "load_pair_lanes<mode>"
3078 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3079 (vec_concat:<VDBL>
3080 (match_operand:VDC 1 "memory_operand" "Utq")
3081 (match_operand:VDC 2 "memory_operand" "m")))]
3082 "TARGET_SIMD && !STRICT_ALIGNMENT
3083 && rtx_equal_p (XEXP (operands[2], 0),
3084 plus_constant (Pmode,
3085 XEXP (operands[1], 0),
3086 GET_MODE_SIZE (<MODE>mode)))"
3087 "ldr\\t%q0, %1"
3088 [(set_attr "type" "neon_load1_1reg_q")]
3089 )
3090
3091 (define_insn "store_pair_lanes<mode>"
3092 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3093 (vec_concat:<VDBL>
3094 (match_operand:VDC 1 "register_operand" "w, r")
3095 (match_operand:VDC 2 "register_operand" "w, r")))]
3096 "TARGET_SIMD"
3097 "@
3098 stp\\t%d1, %d2, %y0
3099 stp\\t%x1, %x2, %y0"
3100 [(set_attr "type" "neon_stp, store_16")]
3101 )
3102
3103 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3104 ;; dest vector.
3105
3106 (define_insn "*aarch64_combinez<mode>"
3107 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3108 (vec_concat:<VDBL>
3109 (match_operand:VDC 1 "general_operand" "w,?r,m")
3110 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3111 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3112 "@
3113 mov\\t%0.8b, %1.8b
3114 fmov\t%d0, %1
3115 ldr\\t%d0, %1"
3116 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3117 (set_attr "simd" "yes,*,yes")
3118 (set_attr "fp" "*,yes,*")]
3119 )
3120
3121 (define_insn "*aarch64_combinez_be<mode>"
3122 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3123 (vec_concat:<VDBL>
3124 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3125 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3126 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3127 "@
3128 mov\\t%0.8b, %1.8b
3129 fmov\t%d0, %1
3130 ldr\\t%d0, %1"
3131 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3132 (set_attr "simd" "yes,*,yes")
3133 (set_attr "fp" "*,yes,*")]
3134 )
3135
3136 (define_expand "aarch64_combine<mode>"
3137 [(match_operand:<VDBL> 0 "register_operand")
3138 (match_operand:VDC 1 "register_operand")
3139 (match_operand:VDC 2 "register_operand")]
3140 "TARGET_SIMD"
3141 {
3142 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3143
3144 DONE;
3145 }
3146 )
3147
3148 (define_expand "aarch64_simd_combine<mode>"
3149 [(match_operand:<VDBL> 0 "register_operand")
3150 (match_operand:VDC 1 "register_operand")
3151 (match_operand:VDC 2 "register_operand")]
3152 "TARGET_SIMD"
3153 {
3154 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3155 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3156 DONE;
3157 }
3158 [(set_attr "type" "multiple")]
3159 )
3160
3161 ;; <su><addsub>l<q>.
3162
3163 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3164 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3165 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3166 (match_operand:VQW 1 "register_operand" "w")
3167 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3168 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3169 (match_operand:VQW 2 "register_operand" "w")
3170 (match_dup 3)))))]
3171 "TARGET_SIMD"
3172 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3173 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3174 )
3175
3176 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3177 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3178 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3179 (match_operand:VQW 1 "register_operand" "w")
3180 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3181 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3182 (match_operand:VQW 2 "register_operand" "w")
3183 (match_dup 3)))))]
3184 "TARGET_SIMD"
3185 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3186 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3187 )
3188
3189
3190 (define_expand "aarch64_saddl2<mode>"
3191 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3192 (match_operand:VQW 1 "register_operand" "w")
3193 (match_operand:VQW 2 "register_operand" "w")]
3194 "TARGET_SIMD"
3195 {
3196 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3197 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3198 operands[2], p));
3199 DONE;
3200 })
3201
3202 (define_expand "aarch64_uaddl2<mode>"
3203 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3204 (match_operand:VQW 1 "register_operand" "w")
3205 (match_operand:VQW 2 "register_operand" "w")]
3206 "TARGET_SIMD"
3207 {
3208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3209 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3210 operands[2], p));
3211 DONE;
3212 })
3213
3214 (define_expand "aarch64_ssubl2<mode>"
3215 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3216 (match_operand:VQW 1 "register_operand" "w")
3217 (match_operand:VQW 2 "register_operand" "w")]
3218 "TARGET_SIMD"
3219 {
3220 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3221 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3222 operands[2], p));
3223 DONE;
3224 })
3225
3226 (define_expand "aarch64_usubl2<mode>"
3227 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3228 (match_operand:VQW 1 "register_operand" "w")
3229 (match_operand:VQW 2 "register_operand" "w")]
3230 "TARGET_SIMD"
3231 {
3232 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3233 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3234 operands[2], p));
3235 DONE;
3236 })
3237
3238 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3239 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3240 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3241 (match_operand:VD_BHSI 1 "register_operand" "w"))
3242 (ANY_EXTEND:<VWIDE>
3243 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3244 "TARGET_SIMD"
3245 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3246 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3247 )
3248
3249 ;; <su><addsub>w<q>.
3250
3251 (define_expand "widen_ssum<mode>3"
3252 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3253 (plus:<VDBLW> (sign_extend:<VDBLW>
3254 (match_operand:VQW 1 "register_operand" ""))
3255 (match_operand:<VDBLW> 2 "register_operand" "")))]
3256 "TARGET_SIMD"
3257 {
3258 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3259 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3260
3261 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3262 operands[1], p));
3263 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3264 DONE;
3265 }
3266 )
3267
3268 (define_expand "widen_ssum<mode>3"
3269 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3270 (plus:<VWIDE> (sign_extend:<VWIDE>
3271 (match_operand:VD_BHSI 1 "register_operand" ""))
3272 (match_operand:<VWIDE> 2 "register_operand" "")))]
3273 "TARGET_SIMD"
3274 {
3275 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3276 DONE;
3277 })
3278
3279 (define_expand "widen_usum<mode>3"
3280 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3281 (plus:<VDBLW> (zero_extend:<VDBLW>
3282 (match_operand:VQW 1 "register_operand" ""))
3283 (match_operand:<VDBLW> 2 "register_operand" "")))]
3284 "TARGET_SIMD"
3285 {
3286 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3287 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3288
3289 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3290 operands[1], p));
3291 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3292 DONE;
3293 }
3294 )
3295
3296 (define_expand "widen_usum<mode>3"
3297 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3298 (plus:<VWIDE> (zero_extend:<VWIDE>
3299 (match_operand:VD_BHSI 1 "register_operand" ""))
3300 (match_operand:<VWIDE> 2 "register_operand" "")))]
3301 "TARGET_SIMD"
3302 {
3303 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3304 DONE;
3305 })
3306
3307 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3308 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3309 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3310 (ANY_EXTEND:<VWIDE>
3311 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3312 "TARGET_SIMD"
3313 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3314 [(set_attr "type" "neon_sub_widen")]
3315 )
3316
3317 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3318 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3319 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3320 (ANY_EXTEND:<VWIDE>
3321 (vec_select:<VHALF>
3322 (match_operand:VQW 2 "register_operand" "w")
3323 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3324 "TARGET_SIMD"
3325 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3326 [(set_attr "type" "neon_sub_widen")]
3327 )
3328
3329 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3330 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3331 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3332 (ANY_EXTEND:<VWIDE>
3333 (vec_select:<VHALF>
3334 (match_operand:VQW 2 "register_operand" "w")
3335 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3336 "TARGET_SIMD"
3337 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3338 [(set_attr "type" "neon_sub_widen")]
3339 )
3340
3341 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3342 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3343 (plus:<VWIDE>
3344 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3345 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3346 "TARGET_SIMD"
3347 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3348 [(set_attr "type" "neon_add_widen")]
3349 )
3350
3351 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3352 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3353 (plus:<VWIDE>
3354 (ANY_EXTEND:<VWIDE>
3355 (vec_select:<VHALF>
3356 (match_operand:VQW 2 "register_operand" "w")
3357 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3358 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3359 "TARGET_SIMD"
3360 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3361 [(set_attr "type" "neon_add_widen")]
3362 )
3363
3364 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3365 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3366 (plus:<VWIDE>
3367 (ANY_EXTEND:<VWIDE>
3368 (vec_select:<VHALF>
3369 (match_operand:VQW 2 "register_operand" "w")
3370 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3371 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3372 "TARGET_SIMD"
3373 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3374 [(set_attr "type" "neon_add_widen")]
3375 )
3376
3377 (define_expand "aarch64_saddw2<mode>"
3378 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3379 (match_operand:<VWIDE> 1 "register_operand" "w")
3380 (match_operand:VQW 2 "register_operand" "w")]
3381 "TARGET_SIMD"
3382 {
3383 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3384 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3385 operands[2], p));
3386 DONE;
3387 })
3388
3389 (define_expand "aarch64_uaddw2<mode>"
3390 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3391 (match_operand:<VWIDE> 1 "register_operand" "w")
3392 (match_operand:VQW 2 "register_operand" "w")]
3393 "TARGET_SIMD"
3394 {
3395 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3396 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3397 operands[2], p));
3398 DONE;
3399 })
3400
3401
3402 (define_expand "aarch64_ssubw2<mode>"
3403 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3404 (match_operand:<VWIDE> 1 "register_operand" "w")
3405 (match_operand:VQW 2 "register_operand" "w")]
3406 "TARGET_SIMD"
3407 {
3408 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3409 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3410 operands[2], p));
3411 DONE;
3412 })
3413
3414 (define_expand "aarch64_usubw2<mode>"
3415 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3416 (match_operand:<VWIDE> 1 "register_operand" "w")
3417 (match_operand:VQW 2 "register_operand" "w")]
3418 "TARGET_SIMD"
3419 {
3420 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3421 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3422 operands[2], p));
3423 DONE;
3424 })
3425
3426 ;; <su><r>h<addsub>.
3427
3428 (define_expand "<u>avg<mode>3_floor"
3429 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3430 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3431 (match_operand:VDQ_BHSI 2 "register_operand")]
3432 HADD))]
3433 "TARGET_SIMD"
3434 )
3435
3436 (define_expand "<u>avg<mode>3_ceil"
3437 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3438 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3439 (match_operand:VDQ_BHSI 2 "register_operand")]
3440 RHADD))]
3441 "TARGET_SIMD"
3442 )
3443
3444 (define_insn "aarch64_<sur>h<addsub><mode>"
3445 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3446 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3447 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3448 HADDSUB))]
3449 "TARGET_SIMD"
3450 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3451 [(set_attr "type" "neon_<addsub>_halve<q>")]
3452 )
3453
3454 ;; <r><addsub>hn<q>.
3455
3456 (define_insn "aarch64_<sur><addsub>hn<mode>"
3457 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3458 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3459 (match_operand:VQN 2 "register_operand" "w")]
3460 ADDSUBHN))]
3461 "TARGET_SIMD"
3462 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3463 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3464 )
3465
3466 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3467 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3468 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3469 (match_operand:VQN 2 "register_operand" "w")
3470 (match_operand:VQN 3 "register_operand" "w")]
3471 ADDSUBHN2))]
3472 "TARGET_SIMD"
3473 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3474 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3475 )
3476
3477 ;; pmul.
3478
3479 (define_insn "aarch64_pmul<mode>"
3480 [(set (match_operand:VB 0 "register_operand" "=w")
3481 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3482 (match_operand:VB 2 "register_operand" "w")]
3483 UNSPEC_PMUL))]
3484 "TARGET_SIMD"
3485 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3486 [(set_attr "type" "neon_mul_<Vetype><q>")]
3487 )
3488
3489 ;; fmulx.
3490
3491 (define_insn "aarch64_fmulx<mode>"
3492 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3493 (unspec:VHSDF_HSDF
3494 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3495 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3496 UNSPEC_FMULX))]
3497 "TARGET_SIMD"
3498 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3499 [(set_attr "type" "neon_fp_mul_<stype>")]
3500 )
3501
3502 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3503
3504 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3505 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3506 (unspec:VDQSF
3507 [(match_operand:VDQSF 1 "register_operand" "w")
3508 (vec_duplicate:VDQSF
3509 (vec_select:<VEL>
3510 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3511 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3512 UNSPEC_FMULX))]
3513 "TARGET_SIMD"
3514 {
3515 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3516 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3517 }
3518 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3519 )
3520
3521 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3522
3523 (define_insn "*aarch64_mulx_elt<mode>"
3524 [(set (match_operand:VDQF 0 "register_operand" "=w")
3525 (unspec:VDQF
3526 [(match_operand:VDQF 1 "register_operand" "w")
3527 (vec_duplicate:VDQF
3528 (vec_select:<VEL>
3529 (match_operand:VDQF 2 "register_operand" "w")
3530 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3531 UNSPEC_FMULX))]
3532 "TARGET_SIMD"
3533 {
3534 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3535 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3536 }
3537 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3538 )
3539
3540 ;; vmulxq_lane
3541
3542 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3543 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3544 (unspec:VHSDF
3545 [(match_operand:VHSDF 1 "register_operand" "w")
3546 (vec_duplicate:VHSDF
3547 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3548 UNSPEC_FMULX))]
3549 "TARGET_SIMD"
3550 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3551 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3552 )
3553
3554 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3555 ;; vmulxd_lane_f64 == vmulx_lane_f64
3556 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3557
3558 (define_insn "*aarch64_vgetfmulx<mode>"
3559 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3560 (unspec:<VEL>
3561 [(match_operand:<VEL> 1 "register_operand" "w")
3562 (vec_select:<VEL>
3563 (match_operand:VDQF 2 "register_operand" "w")
3564 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3565 UNSPEC_FMULX))]
3566 "TARGET_SIMD"
3567 {
3568 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3569 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3570 }
3571 [(set_attr "type" "fmul<Vetype>")]
3572 )
3573 ;; <su>q<addsub>
3574
3575 (define_insn "aarch64_<su_optab><optab><mode>"
3576 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3577 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3578 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3579 "TARGET_SIMD"
3580 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3581 [(set_attr "type" "neon_<optab><q>")]
3582 )
3583
3584 ;; suqadd and usqadd
3585
3586 (define_insn "aarch64_<sur>qadd<mode>"
3587 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3588 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3589 (match_operand:VSDQ_I 2 "register_operand" "w")]
3590 USSUQADD))]
3591 "TARGET_SIMD"
3592 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3593 [(set_attr "type" "neon_qadd<q>")]
3594 )
3595
3596 ;; sqmovun
3597
3598 (define_insn "aarch64_sqmovun<mode>"
3599 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3600 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3601 UNSPEC_SQXTUN))]
3602 "TARGET_SIMD"
3603 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3604 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3605 )
3606
3607 ;; sqmovn and uqmovn
3608
3609 (define_insn "aarch64_<sur>qmovn<mode>"
3610 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3611 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3612 SUQMOVN))]
3613 "TARGET_SIMD"
3614 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3615 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3616 )
3617
3618 ;; <su>q<absneg>
3619
3620 (define_insn "aarch64_s<optab><mode>"
3621 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3622 (UNQOPS:VSDQ_I
3623 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3624 "TARGET_SIMD"
3625 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3626 [(set_attr "type" "neon_<optab><q>")]
3627 )
3628
3629 ;; sq<r>dmulh.
3630
3631 (define_insn "aarch64_sq<r>dmulh<mode>"
3632 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3633 (unspec:VSDQ_HSI
3634 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3635 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3636 VQDMULH))]
3637 "TARGET_SIMD"
3638 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3639 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3640 )
3641
3642 ;; sq<r>dmulh_lane
3643
3644 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3645 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3646 (unspec:VDQHS
3647 [(match_operand:VDQHS 1 "register_operand" "w")
3648 (vec_select:<VEL>
3649 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3650 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3651 VQDMULH))]
3652 "TARGET_SIMD"
3653 "*
3654 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3655 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3656 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3657 )
3658
3659 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3660 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3661 (unspec:VDQHS
3662 [(match_operand:VDQHS 1 "register_operand" "w")
3663 (vec_select:<VEL>
3664 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3665 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3666 VQDMULH))]
3667 "TARGET_SIMD"
3668 "*
3669 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3670 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3671 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3672 )
3673
3674 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3675 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3676 (unspec:SD_HSI
3677 [(match_operand:SD_HSI 1 "register_operand" "w")
3678 (vec_select:<VEL>
3679 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3680 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3681 VQDMULH))]
3682 "TARGET_SIMD"
3683 "*
3684 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3685 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3686 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3687 )
3688
3689 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3690 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3691 (unspec:SD_HSI
3692 [(match_operand:SD_HSI 1 "register_operand" "w")
3693 (vec_select:<VEL>
3694 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3695 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3696 VQDMULH))]
3697 "TARGET_SIMD"
3698 "*
3699 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3700 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3701 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3702 )
3703
3704 ;; sqrdml[as]h.
3705
3706 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3707 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3708 (unspec:VSDQ_HSI
3709 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3710 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3711 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3712 SQRDMLH_AS))]
3713 "TARGET_SIMD_RDMA"
3714 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3715 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3716 )
3717
3718 ;; sqrdml[as]h_lane.
3719
3720 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3721 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3722 (unspec:VDQHS
3723 [(match_operand:VDQHS 1 "register_operand" "0")
3724 (match_operand:VDQHS 2 "register_operand" "w")
3725 (vec_select:<VEL>
3726 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3727 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3728 SQRDMLH_AS))]
3729 "TARGET_SIMD_RDMA"
3730 {
3731 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3732 return
3733 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3734 }
3735 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3736 )
3737
3738 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3739 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3740 (unspec:SD_HSI
3741 [(match_operand:SD_HSI 1 "register_operand" "0")
3742 (match_operand:SD_HSI 2 "register_operand" "w")
3743 (vec_select:<VEL>
3744 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3745 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3746 SQRDMLH_AS))]
3747 "TARGET_SIMD_RDMA"
3748 {
3749 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3750 return
3751 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3752 }
3753 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3754 )
3755
3756 ;; sqrdml[as]h_laneq.
3757
3758 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3759 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3760 (unspec:VDQHS
3761 [(match_operand:VDQHS 1 "register_operand" "0")
3762 (match_operand:VDQHS 2 "register_operand" "w")
3763 (vec_select:<VEL>
3764 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3765 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3766 SQRDMLH_AS))]
3767 "TARGET_SIMD_RDMA"
3768 {
3769 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3770 return
3771 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3772 }
3773 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3774 )
3775
3776 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3777 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3778 (unspec:SD_HSI
3779 [(match_operand:SD_HSI 1 "register_operand" "0")
3780 (match_operand:SD_HSI 2 "register_operand" "w")
3781 (vec_select:<VEL>
3782 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3783 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3784 SQRDMLH_AS))]
3785 "TARGET_SIMD_RDMA"
3786 {
3787 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3788 return
3789 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3790 }
3791 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3792 )
3793
3794 ;; vqdml[sa]l
3795
3796 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3797 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3798 (SBINQOPS:<VWIDE>
3799 (match_operand:<VWIDE> 1 "register_operand" "0")
3800 (ss_ashift:<VWIDE>
3801 (mult:<VWIDE>
3802 (sign_extend:<VWIDE>
3803 (match_operand:VSD_HSI 2 "register_operand" "w"))
3804 (sign_extend:<VWIDE>
3805 (match_operand:VSD_HSI 3 "register_operand" "w")))
3806 (const_int 1))))]
3807 "TARGET_SIMD"
3808 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3809 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3810 )
3811
3812 ;; vqdml[sa]l_lane
3813
3814 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3815 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3816 (SBINQOPS:<VWIDE>
3817 (match_operand:<VWIDE> 1 "register_operand" "0")
3818 (ss_ashift:<VWIDE>
3819 (mult:<VWIDE>
3820 (sign_extend:<VWIDE>
3821 (match_operand:VD_HSI 2 "register_operand" "w"))
3822 (sign_extend:<VWIDE>
3823 (vec_duplicate:VD_HSI
3824 (vec_select:<VEL>
3825 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3826 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3827 ))
3828 (const_int 1))))]
3829 "TARGET_SIMD"
3830 {
3831 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3832 return
3833 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3834 }
3835 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3836 )
3837
3838 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3839 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3840 (SBINQOPS:<VWIDE>
3841 (match_operand:<VWIDE> 1 "register_operand" "0")
3842 (ss_ashift:<VWIDE>
3843 (mult:<VWIDE>
3844 (sign_extend:<VWIDE>
3845 (match_operand:VD_HSI 2 "register_operand" "w"))
3846 (sign_extend:<VWIDE>
3847 (vec_duplicate:VD_HSI
3848 (vec_select:<VEL>
3849 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3850 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3851 ))
3852 (const_int 1))))]
3853 "TARGET_SIMD"
3854 {
3855 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3856 return
3857 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3858 }
3859 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3860 )
3861
3862 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3863 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3864 (SBINQOPS:<VWIDE>
3865 (match_operand:<VWIDE> 1 "register_operand" "0")
3866 (ss_ashift:<VWIDE>
3867 (mult:<VWIDE>
3868 (sign_extend:<VWIDE>
3869 (match_operand:SD_HSI 2 "register_operand" "w"))
3870 (sign_extend:<VWIDE>
3871 (vec_select:<VEL>
3872 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3873 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3874 )
3875 (const_int 1))))]
3876 "TARGET_SIMD"
3877 {
3878 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3879 return
3880 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3881 }
3882 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3883 )
3884
3885 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3886 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3887 (SBINQOPS:<VWIDE>
3888 (match_operand:<VWIDE> 1 "register_operand" "0")
3889 (ss_ashift:<VWIDE>
3890 (mult:<VWIDE>
3891 (sign_extend:<VWIDE>
3892 (match_operand:SD_HSI 2 "register_operand" "w"))
3893 (sign_extend:<VWIDE>
3894 (vec_select:<VEL>
3895 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3896 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3897 )
3898 (const_int 1))))]
3899 "TARGET_SIMD"
3900 {
3901 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3902 return
3903 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3904 }
3905 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3906 )
3907
3908 ;; vqdml[sa]l_n
3909
3910 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3911 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3912 (SBINQOPS:<VWIDE>
3913 (match_operand:<VWIDE> 1 "register_operand" "0")
3914 (ss_ashift:<VWIDE>
3915 (mult:<VWIDE>
3916 (sign_extend:<VWIDE>
3917 (match_operand:VD_HSI 2 "register_operand" "w"))
3918 (sign_extend:<VWIDE>
3919 (vec_duplicate:VD_HSI
3920 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3921 (const_int 1))))]
3922 "TARGET_SIMD"
3923 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3924 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3925 )
3926
3927 ;; sqdml[as]l2
3928
3929 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3930 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3931 (SBINQOPS:<VWIDE>
3932 (match_operand:<VWIDE> 1 "register_operand" "0")
3933 (ss_ashift:<VWIDE>
3934 (mult:<VWIDE>
3935 (sign_extend:<VWIDE>
3936 (vec_select:<VHALF>
3937 (match_operand:VQ_HSI 2 "register_operand" "w")
3938 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3939 (sign_extend:<VWIDE>
3940 (vec_select:<VHALF>
3941 (match_operand:VQ_HSI 3 "register_operand" "w")
3942 (match_dup 4))))
3943 (const_int 1))))]
3944 "TARGET_SIMD"
3945 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3946 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3947 )
3948
3949 (define_expand "aarch64_sqdmlal2<mode>"
3950 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3951 (match_operand:<VWIDE> 1 "register_operand" "w")
3952 (match_operand:VQ_HSI 2 "register_operand" "w")
3953 (match_operand:VQ_HSI 3 "register_operand" "w")]
3954 "TARGET_SIMD"
3955 {
3956 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3957 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3958 operands[2], operands[3], p));
3959 DONE;
3960 })
3961
3962 (define_expand "aarch64_sqdmlsl2<mode>"
3963 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3964 (match_operand:<VWIDE> 1 "register_operand" "w")
3965 (match_operand:VQ_HSI 2 "register_operand" "w")
3966 (match_operand:VQ_HSI 3 "register_operand" "w")]
3967 "TARGET_SIMD"
3968 {
3969 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3970 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3971 operands[2], operands[3], p));
3972 DONE;
3973 })
3974
3975 ;; vqdml[sa]l2_lane
3976
3977 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3978 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3979 (SBINQOPS:<VWIDE>
3980 (match_operand:<VWIDE> 1 "register_operand" "0")
3981 (ss_ashift:<VWIDE>
3982 (mult:<VWIDE>
3983 (sign_extend:<VWIDE>
3984 (vec_select:<VHALF>
3985 (match_operand:VQ_HSI 2 "register_operand" "w")
3986 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3987 (sign_extend:<VWIDE>
3988 (vec_duplicate:<VHALF>
3989 (vec_select:<VEL>
3990 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3991 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3992 ))))
3993 (const_int 1))))]
3994 "TARGET_SIMD"
3995 {
3996 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3997 return
3998 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3999 }
4000 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4001 )
4002
4003 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4004 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4005 (SBINQOPS:<VWIDE>
4006 (match_operand:<VWIDE> 1 "register_operand" "0")
4007 (ss_ashift:<VWIDE>
4008 (mult:<VWIDE>
4009 (sign_extend:<VWIDE>
4010 (vec_select:<VHALF>
4011 (match_operand:VQ_HSI 2 "register_operand" "w")
4012 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4013 (sign_extend:<VWIDE>
4014 (vec_duplicate:<VHALF>
4015 (vec_select:<VEL>
4016 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4017 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4018 ))))
4019 (const_int 1))))]
4020 "TARGET_SIMD"
4021 {
4022 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4023 return
4024 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4025 }
4026 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4027 )
4028
4029 (define_expand "aarch64_sqdmlal2_lane<mode>"
4030 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4031 (match_operand:<VWIDE> 1 "register_operand" "w")
4032 (match_operand:VQ_HSI 2 "register_operand" "w")
4033 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4034 (match_operand:SI 4 "immediate_operand" "i")]
4035 "TARGET_SIMD"
4036 {
4037 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4038 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4039 operands[2], operands[3],
4040 operands[4], p));
4041 DONE;
4042 })
4043
4044 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4045 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4046 (match_operand:<VWIDE> 1 "register_operand" "w")
4047 (match_operand:VQ_HSI 2 "register_operand" "w")
4048 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4049 (match_operand:SI 4 "immediate_operand" "i")]
4050 "TARGET_SIMD"
4051 {
4052 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4053 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4054 operands[2], operands[3],
4055 operands[4], p));
4056 DONE;
4057 })
4058
4059 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4060 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4061 (match_operand:<VWIDE> 1 "register_operand" "w")
4062 (match_operand:VQ_HSI 2 "register_operand" "w")
4063 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4064 (match_operand:SI 4 "immediate_operand" "i")]
4065 "TARGET_SIMD"
4066 {
4067 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4068 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4069 operands[2], operands[3],
4070 operands[4], p));
4071 DONE;
4072 })
4073
4074 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4075 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4076 (match_operand:<VWIDE> 1 "register_operand" "w")
4077 (match_operand:VQ_HSI 2 "register_operand" "w")
4078 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4079 (match_operand:SI 4 "immediate_operand" "i")]
4080 "TARGET_SIMD"
4081 {
4082 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4083 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4084 operands[2], operands[3],
4085 operands[4], p));
4086 DONE;
4087 })
4088
4089 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4090 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4091 (SBINQOPS:<VWIDE>
4092 (match_operand:<VWIDE> 1 "register_operand" "0")
4093 (ss_ashift:<VWIDE>
4094 (mult:<VWIDE>
4095 (sign_extend:<VWIDE>
4096 (vec_select:<VHALF>
4097 (match_operand:VQ_HSI 2 "register_operand" "w")
4098 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4099 (sign_extend:<VWIDE>
4100 (vec_duplicate:<VHALF>
4101 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4102 (const_int 1))))]
4103 "TARGET_SIMD"
4104 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4105 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4106 )
4107
4108 (define_expand "aarch64_sqdmlal2_n<mode>"
4109 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4110 (match_operand:<VWIDE> 1 "register_operand" "w")
4111 (match_operand:VQ_HSI 2 "register_operand" "w")
4112 (match_operand:<VEL> 3 "register_operand" "w")]
4113 "TARGET_SIMD"
4114 {
4115 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4116 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4117 operands[2], operands[3],
4118 p));
4119 DONE;
4120 })
4121
4122 (define_expand "aarch64_sqdmlsl2_n<mode>"
4123 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4124 (match_operand:<VWIDE> 1 "register_operand" "w")
4125 (match_operand:VQ_HSI 2 "register_operand" "w")
4126 (match_operand:<VEL> 3 "register_operand" "w")]
4127 "TARGET_SIMD"
4128 {
4129 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4130 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4131 operands[2], operands[3],
4132 p));
4133 DONE;
4134 })
4135
4136 ;; vqdmull
4137
4138 (define_insn "aarch64_sqdmull<mode>"
4139 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4140 (ss_ashift:<VWIDE>
4141 (mult:<VWIDE>
4142 (sign_extend:<VWIDE>
4143 (match_operand:VSD_HSI 1 "register_operand" "w"))
4144 (sign_extend:<VWIDE>
4145 (match_operand:VSD_HSI 2 "register_operand" "w")))
4146 (const_int 1)))]
4147 "TARGET_SIMD"
4148 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4149 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4150 )
4151
4152 ;; vqdmull_lane
4153
4154 (define_insn "aarch64_sqdmull_lane<mode>"
4155 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4156 (ss_ashift:<VWIDE>
4157 (mult:<VWIDE>
4158 (sign_extend:<VWIDE>
4159 (match_operand:VD_HSI 1 "register_operand" "w"))
4160 (sign_extend:<VWIDE>
4161 (vec_duplicate:VD_HSI
4162 (vec_select:<VEL>
4163 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4164 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4165 ))
4166 (const_int 1)))]
4167 "TARGET_SIMD"
4168 {
4169 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4170 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4171 }
4172 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4173 )
4174
4175 (define_insn "aarch64_sqdmull_laneq<mode>"
4176 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4177 (ss_ashift:<VWIDE>
4178 (mult:<VWIDE>
4179 (sign_extend:<VWIDE>
4180 (match_operand:VD_HSI 1 "register_operand" "w"))
4181 (sign_extend:<VWIDE>
4182 (vec_duplicate:VD_HSI
4183 (vec_select:<VEL>
4184 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4185 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4186 ))
4187 (const_int 1)))]
4188 "TARGET_SIMD"
4189 {
4190 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4191 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4192 }
4193 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4194 )
4195
4196 (define_insn "aarch64_sqdmull_lane<mode>"
4197 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4198 (ss_ashift:<VWIDE>
4199 (mult:<VWIDE>
4200 (sign_extend:<VWIDE>
4201 (match_operand:SD_HSI 1 "register_operand" "w"))
4202 (sign_extend:<VWIDE>
4203 (vec_select:<VEL>
4204 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4205 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4206 ))
4207 (const_int 1)))]
4208 "TARGET_SIMD"
4209 {
4210 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4211 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4212 }
4213 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4214 )
4215
4216 (define_insn "aarch64_sqdmull_laneq<mode>"
4217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4218 (ss_ashift:<VWIDE>
4219 (mult:<VWIDE>
4220 (sign_extend:<VWIDE>
4221 (match_operand:SD_HSI 1 "register_operand" "w"))
4222 (sign_extend:<VWIDE>
4223 (vec_select:<VEL>
4224 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4225 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4226 ))
4227 (const_int 1)))]
4228 "TARGET_SIMD"
4229 {
4230 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4231 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4232 }
4233 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4234 )
4235
4236 ;; vqdmull_n
4237
4238 (define_insn "aarch64_sqdmull_n<mode>"
4239 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4240 (ss_ashift:<VWIDE>
4241 (mult:<VWIDE>
4242 (sign_extend:<VWIDE>
4243 (match_operand:VD_HSI 1 "register_operand" "w"))
4244 (sign_extend:<VWIDE>
4245 (vec_duplicate:VD_HSI
4246 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4247 )
4248 (const_int 1)))]
4249 "TARGET_SIMD"
4250 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4251 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4252 )
4253
4254 ;; vqdmull2
4255
4256
4257
4258 (define_insn "aarch64_sqdmull2<mode>_internal"
4259 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4260 (ss_ashift:<VWIDE>
4261 (mult:<VWIDE>
4262 (sign_extend:<VWIDE>
4263 (vec_select:<VHALF>
4264 (match_operand:VQ_HSI 1 "register_operand" "w")
4265 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4266 (sign_extend:<VWIDE>
4267 (vec_select:<VHALF>
4268 (match_operand:VQ_HSI 2 "register_operand" "w")
4269 (match_dup 3)))
4270 )
4271 (const_int 1)))]
4272 "TARGET_SIMD"
4273 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4274 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4275 )
4276
4277 (define_expand "aarch64_sqdmull2<mode>"
4278 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4279 (match_operand:VQ_HSI 1 "register_operand" "w")
4280 (match_operand:VQ_HSI 2 "register_operand" "w")]
4281 "TARGET_SIMD"
4282 {
4283 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4284 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4285 operands[2], p));
4286 DONE;
4287 })
4288
4289 ;; vqdmull2_lane
4290
4291 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4292 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4293 (ss_ashift:<VWIDE>
4294 (mult:<VWIDE>
4295 (sign_extend:<VWIDE>
4296 (vec_select:<VHALF>
4297 (match_operand:VQ_HSI 1 "register_operand" "w")
4298 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4299 (sign_extend:<VWIDE>
4300 (vec_duplicate:<VHALF>
4301 (vec_select:<VEL>
4302 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4303 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4304 ))
4305 (const_int 1)))]
4306 "TARGET_SIMD"
4307 {
4308 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4309 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4310 }
4311 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4312 )
4313
4314 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4315 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4316 (ss_ashift:<VWIDE>
4317 (mult:<VWIDE>
4318 (sign_extend:<VWIDE>
4319 (vec_select:<VHALF>
4320 (match_operand:VQ_HSI 1 "register_operand" "w")
4321 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4322 (sign_extend:<VWIDE>
4323 (vec_duplicate:<VHALF>
4324 (vec_select:<VEL>
4325 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4326 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4327 ))
4328 (const_int 1)))]
4329 "TARGET_SIMD"
4330 {
4331 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4332 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4333 }
4334 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4335 )
4336
4337 (define_expand "aarch64_sqdmull2_lane<mode>"
4338 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4339 (match_operand:VQ_HSI 1 "register_operand" "w")
4340 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4341 (match_operand:SI 3 "immediate_operand" "i")]
4342 "TARGET_SIMD"
4343 {
4344 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4345 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4346 operands[2], operands[3],
4347 p));
4348 DONE;
4349 })
4350
4351 (define_expand "aarch64_sqdmull2_laneq<mode>"
4352 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4353 (match_operand:VQ_HSI 1 "register_operand" "w")
4354 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4355 (match_operand:SI 3 "immediate_operand" "i")]
4356 "TARGET_SIMD"
4357 {
4358 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4359 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4360 operands[2], operands[3],
4361 p));
4362 DONE;
4363 })
4364
4365 ;; vqdmull2_n
4366
4367 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4368 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4369 (ss_ashift:<VWIDE>
4370 (mult:<VWIDE>
4371 (sign_extend:<VWIDE>
4372 (vec_select:<VHALF>
4373 (match_operand:VQ_HSI 1 "register_operand" "w")
4374 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4375 (sign_extend:<VWIDE>
4376 (vec_duplicate:<VHALF>
4377 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4378 )
4379 (const_int 1)))]
4380 "TARGET_SIMD"
4381 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4382 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4383 )
4384
4385 (define_expand "aarch64_sqdmull2_n<mode>"
4386 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4387 (match_operand:VQ_HSI 1 "register_operand" "w")
4388 (match_operand:<VEL> 2 "register_operand" "w")]
4389 "TARGET_SIMD"
4390 {
4391 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4392 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4393 operands[2], p));
4394 DONE;
4395 })
4396
4397 ;; vshl
4398
4399 (define_insn "aarch64_<sur>shl<mode>"
4400 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4401 (unspec:VSDQ_I_DI
4402 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4403 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4404 VSHL))]
4405 "TARGET_SIMD"
4406 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4407 [(set_attr "type" "neon_shift_reg<q>")]
4408 )
4409
4410
4411 ;; vqshl
4412
4413 (define_insn "aarch64_<sur>q<r>shl<mode>"
4414 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4415 (unspec:VSDQ_I
4416 [(match_operand:VSDQ_I 1 "register_operand" "w")
4417 (match_operand:VSDQ_I 2 "register_operand" "w")]
4418 VQSHL))]
4419 "TARGET_SIMD"
4420 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4421 [(set_attr "type" "neon_sat_shift_reg<q>")]
4422 )
4423
4424 ;; vshll_n
4425
4426 (define_insn "aarch64_<sur>shll_n<mode>"
4427 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4428 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4429 (match_operand:SI 2
4430 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4431 VSHLL))]
4432 "TARGET_SIMD"
4433 {
4434 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4435 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4436 else
4437 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4438 }
4439 [(set_attr "type" "neon_shift_imm_long")]
4440 )
4441
4442 ;; vshll_high_n
4443
4444 (define_insn "aarch64_<sur>shll2_n<mode>"
4445 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4446 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4447 (match_operand:SI 2 "immediate_operand" "i")]
4448 VSHLL))]
4449 "TARGET_SIMD"
4450 {
4451 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4452 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4453 else
4454 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4455 }
4456 [(set_attr "type" "neon_shift_imm_long")]
4457 )
4458
4459 ;; vrshr_n
4460
4461 (define_insn "aarch64_<sur>shr_n<mode>"
4462 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4463 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4464 (match_operand:SI 2
4465 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4466 VRSHR_N))]
4467 "TARGET_SIMD"
4468 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4469 [(set_attr "type" "neon_sat_shift_imm<q>")]
4470 )
4471
4472 ;; v(r)sra_n
4473
4474 (define_insn "aarch64_<sur>sra_n<mode>"
4475 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4476 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4477 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4478 (match_operand:SI 3
4479 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4480 VSRA))]
4481 "TARGET_SIMD"
4482 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4483 [(set_attr "type" "neon_shift_acc<q>")]
4484 )
4485
4486 ;; vs<lr>i_n
4487
4488 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4489 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4490 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4491 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4492 (match_operand:SI 3
4493 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4494 VSLRI))]
4495 "TARGET_SIMD"
4496 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4497 [(set_attr "type" "neon_shift_imm<q>")]
4498 )
4499
4500 ;; vqshl(u)
4501
4502 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4503 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4504 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4505 (match_operand:SI 2
4506 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4507 VQSHL_N))]
4508 "TARGET_SIMD"
4509 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4510 [(set_attr "type" "neon_sat_shift_imm<q>")]
4511 )
4512
4513
4514 ;; vq(r)shr(u)n_n
4515
4516 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4517 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4518 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4519 (match_operand:SI 2
4520 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4521 VQSHRN_N))]
4522 "TARGET_SIMD"
4523 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4524 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4525 )
4526
4527
4528 ;; cm(eq|ge|gt|lt|le)
4529 ;; Note, we have constraints for Dz and Z as different expanders
4530 ;; have different ideas of what should be passed to this pattern.
4531
4532 (define_insn "aarch64_cm<optab><mode>"
4533 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4534 (neg:<V_INT_EQUIV>
4535 (COMPARISONS:<V_INT_EQUIV>
4536 (match_operand:VDQ_I 1 "register_operand" "w,w")
4537 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4538 )))]
4539 "TARGET_SIMD"
4540 "@
4541 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4542 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4543 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4544 )
4545
4546 (define_insn_and_split "aarch64_cm<optab>di"
4547 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4548 (neg:DI
4549 (COMPARISONS:DI
4550 (match_operand:DI 1 "register_operand" "w,w,r")
4551 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4552 )))
4553 (clobber (reg:CC CC_REGNUM))]
4554 "TARGET_SIMD"
4555 "#"
4556 "&& reload_completed"
4557 [(set (match_operand:DI 0 "register_operand")
4558 (neg:DI
4559 (COMPARISONS:DI
4560 (match_operand:DI 1 "register_operand")
4561 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4562 )))]
4563 {
4564 /* If we are in the general purpose register file,
4565 we split to a sequence of comparison and store. */
4566 if (GP_REGNUM_P (REGNO (operands[0]))
4567 && GP_REGNUM_P (REGNO (operands[1])))
4568 {
4569 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4570 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4571 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4572 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4573 DONE;
4574 }
4575 /* Otherwise, we expand to a similar pattern which does not
4576 clobber CC_REGNUM. */
4577 }
4578 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4579 )
4580
4581 (define_insn "*aarch64_cm<optab>di"
4582 [(set (match_operand:DI 0 "register_operand" "=w,w")
4583 (neg:DI
4584 (COMPARISONS:DI
4585 (match_operand:DI 1 "register_operand" "w,w")
4586 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4587 )))]
4588 "TARGET_SIMD && reload_completed"
4589 "@
4590 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4591 cm<optab>\t%d0, %d1, #0"
4592 [(set_attr "type" "neon_compare, neon_compare_zero")]
4593 )
4594
4595 ;; cm(hs|hi)
4596
4597 (define_insn "aarch64_cm<optab><mode>"
4598 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4599 (neg:<V_INT_EQUIV>
4600 (UCOMPARISONS:<V_INT_EQUIV>
4601 (match_operand:VDQ_I 1 "register_operand" "w")
4602 (match_operand:VDQ_I 2 "register_operand" "w")
4603 )))]
4604 "TARGET_SIMD"
4605 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4606 [(set_attr "type" "neon_compare<q>")]
4607 )
4608
4609 (define_insn_and_split "aarch64_cm<optab>di"
4610 [(set (match_operand:DI 0 "register_operand" "=w,r")
4611 (neg:DI
4612 (UCOMPARISONS:DI
4613 (match_operand:DI 1 "register_operand" "w,r")
4614 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4615 )))
4616 (clobber (reg:CC CC_REGNUM))]
4617 "TARGET_SIMD"
4618 "#"
4619 "&& reload_completed"
4620 [(set (match_operand:DI 0 "register_operand")
4621 (neg:DI
4622 (UCOMPARISONS:DI
4623 (match_operand:DI 1 "register_operand")
4624 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4625 )))]
4626 {
4627 /* If we are in the general purpose register file,
4628 we split to a sequence of comparison and store. */
4629 if (GP_REGNUM_P (REGNO (operands[0]))
4630 && GP_REGNUM_P (REGNO (operands[1])))
4631 {
4632 machine_mode mode = CCmode;
4633 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4634 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4635 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4636 DONE;
4637 }
4638 /* Otherwise, we expand to a similar pattern which does not
4639 clobber CC_REGNUM. */
4640 }
4641 [(set_attr "type" "neon_compare,multiple")]
4642 )
4643
4644 (define_insn "*aarch64_cm<optab>di"
4645 [(set (match_operand:DI 0 "register_operand" "=w")
4646 (neg:DI
4647 (UCOMPARISONS:DI
4648 (match_operand:DI 1 "register_operand" "w")
4649 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4650 )))]
4651 "TARGET_SIMD && reload_completed"
4652 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4653 [(set_attr "type" "neon_compare")]
4654 )
4655
4656 ;; cmtst
4657
4658 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4659 ;; we don't have any insns using ne, and aarch64_vcond outputs
4660 ;; not (neg (eq (and x y) 0))
4661 ;; which is rewritten by simplify_rtx as
4662 ;; plus (eq (and x y) 0) -1.
4663
4664 (define_insn "aarch64_cmtst<mode>"
4665 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4666 (plus:<V_INT_EQUIV>
4667 (eq:<V_INT_EQUIV>
4668 (and:VDQ_I
4669 (match_operand:VDQ_I 1 "register_operand" "w")
4670 (match_operand:VDQ_I 2 "register_operand" "w"))
4671 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4672 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4673 ]
4674 "TARGET_SIMD"
4675 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4676 [(set_attr "type" "neon_tst<q>")]
4677 )
4678
4679 (define_insn_and_split "aarch64_cmtstdi"
4680 [(set (match_operand:DI 0 "register_operand" "=w,r")
4681 (neg:DI
4682 (ne:DI
4683 (and:DI
4684 (match_operand:DI 1 "register_operand" "w,r")
4685 (match_operand:DI 2 "register_operand" "w,r"))
4686 (const_int 0))))
4687 (clobber (reg:CC CC_REGNUM))]
4688 "TARGET_SIMD"
4689 "#"
4690 "&& reload_completed"
4691 [(set (match_operand:DI 0 "register_operand")
4692 (neg:DI
4693 (ne:DI
4694 (and:DI
4695 (match_operand:DI 1 "register_operand")
4696 (match_operand:DI 2 "register_operand"))
4697 (const_int 0))))]
4698 {
4699 /* If we are in the general purpose register file,
4700 we split to a sequence of comparison and store. */
4701 if (GP_REGNUM_P (REGNO (operands[0]))
4702 && GP_REGNUM_P (REGNO (operands[1])))
4703 {
4704 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4705 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4706 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4707 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4708 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4709 DONE;
4710 }
4711 /* Otherwise, we expand to a similar pattern which does not
4712 clobber CC_REGNUM. */
4713 }
4714 [(set_attr "type" "neon_tst,multiple")]
4715 )
4716
4717 (define_insn "*aarch64_cmtstdi"
4718 [(set (match_operand:DI 0 "register_operand" "=w")
4719 (neg:DI
4720 (ne:DI
4721 (and:DI
4722 (match_operand:DI 1 "register_operand" "w")
4723 (match_operand:DI 2 "register_operand" "w"))
4724 (const_int 0))))]
4725 "TARGET_SIMD"
4726 "cmtst\t%d0, %d1, %d2"
4727 [(set_attr "type" "neon_tst")]
4728 )
4729
4730 ;; fcm(eq|ge|gt|le|lt)
4731
4732 (define_insn "aarch64_cm<optab><mode>"
4733 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4734 (neg:<V_INT_EQUIV>
4735 (COMPARISONS:<V_INT_EQUIV>
4736 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4737 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4738 )))]
4739 "TARGET_SIMD"
4740 "@
4741 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4742 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4743 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4744 )
4745
4746 ;; fac(ge|gt)
4747 ;; Note we can also handle what would be fac(le|lt) by
4748 ;; generating fac(ge|gt).
4749
4750 (define_insn "aarch64_fac<optab><mode>"
4751 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4752 (neg:<V_INT_EQUIV>
4753 (FAC_COMPARISONS:<V_INT_EQUIV>
4754 (abs:VHSDF_HSDF
4755 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4756 (abs:VHSDF_HSDF
4757 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4758 )))]
4759 "TARGET_SIMD"
4760 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4761 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4762 )
4763
4764 ;; addp
4765
4766 (define_insn "aarch64_addp<mode>"
4767 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4768 (unspec:VD_BHSI
4769 [(match_operand:VD_BHSI 1 "register_operand" "w")
4770 (match_operand:VD_BHSI 2 "register_operand" "w")]
4771 UNSPEC_ADDP))]
4772 "TARGET_SIMD"
4773 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4774 [(set_attr "type" "neon_reduc_add<q>")]
4775 )
4776
4777 (define_insn "aarch64_addpdi"
4778 [(set (match_operand:DI 0 "register_operand" "=w")
4779 (unspec:DI
4780 [(match_operand:V2DI 1 "register_operand" "w")]
4781 UNSPEC_ADDP))]
4782 "TARGET_SIMD"
4783 "addp\t%d0, %1.2d"
4784 [(set_attr "type" "neon_reduc_add")]
4785 )
4786
4787 ;; sqrt
4788
4789 (define_expand "sqrt<mode>2"
4790 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4791 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4792 "TARGET_SIMD"
4793 {
4794 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4795 DONE;
4796 })
4797
4798 (define_insn "*sqrt<mode>2"
4799 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4800 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4801 "TARGET_SIMD"
4802 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4803 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4804 )
4805
4806 ;; Patterns for vector struct loads and stores.
4807
4808 (define_insn "aarch64_simd_ld2<mode>"
4809 [(set (match_operand:OI 0 "register_operand" "=w")
4810 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4811 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4812 UNSPEC_LD2))]
4813 "TARGET_SIMD"
4814 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4815 [(set_attr "type" "neon_load2_2reg<q>")]
4816 )
4817
4818 (define_insn "aarch64_simd_ld2r<mode>"
4819 [(set (match_operand:OI 0 "register_operand" "=w")
4820 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4821 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4822 UNSPEC_LD2_DUP))]
4823 "TARGET_SIMD"
4824 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4825 [(set_attr "type" "neon_load2_all_lanes<q>")]
4826 )
4827
4828 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4829 [(set (match_operand:OI 0 "register_operand" "=w")
4830 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4831 (match_operand:OI 2 "register_operand" "0")
4832 (match_operand:SI 3 "immediate_operand" "i")
4833 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4834 UNSPEC_LD2_LANE))]
4835 "TARGET_SIMD"
4836 {
4837 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4838 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4839 }
4840 [(set_attr "type" "neon_load2_one_lane")]
4841 )
4842
4843 (define_expand "vec_load_lanesoi<mode>"
4844 [(set (match_operand:OI 0 "register_operand" "=w")
4845 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4846 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4847 UNSPEC_LD2))]
4848 "TARGET_SIMD"
4849 {
4850 if (BYTES_BIG_ENDIAN)
4851 {
4852 rtx tmp = gen_reg_rtx (OImode);
4853 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4854 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4855 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4856 }
4857 else
4858 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4859 DONE;
4860 })
4861
4862 (define_insn "aarch64_simd_st2<mode>"
4863 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4864 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4865 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4866 UNSPEC_ST2))]
4867 "TARGET_SIMD"
4868 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4869 [(set_attr "type" "neon_store2_2reg<q>")]
4870 )
4871
4872 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4873 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4874 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4875 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4876 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4877 (match_operand:SI 2 "immediate_operand" "i")]
4878 UNSPEC_ST2_LANE))]
4879 "TARGET_SIMD"
4880 {
4881 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4882 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4883 }
4884 [(set_attr "type" "neon_store2_one_lane<q>")]
4885 )
4886
4887 (define_expand "vec_store_lanesoi<mode>"
4888 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4889 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4890 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4891 UNSPEC_ST2))]
4892 "TARGET_SIMD"
4893 {
4894 if (BYTES_BIG_ENDIAN)
4895 {
4896 rtx tmp = gen_reg_rtx (OImode);
4897 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4898 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4899 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4900 }
4901 else
4902 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4903 DONE;
4904 })
4905
4906 (define_insn "aarch64_simd_ld3<mode>"
4907 [(set (match_operand:CI 0 "register_operand" "=w")
4908 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4909 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4910 UNSPEC_LD3))]
4911 "TARGET_SIMD"
4912 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4913 [(set_attr "type" "neon_load3_3reg<q>")]
4914 )
4915
4916 (define_insn "aarch64_simd_ld3r<mode>"
4917 [(set (match_operand:CI 0 "register_operand" "=w")
4918 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4919 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4920 UNSPEC_LD3_DUP))]
4921 "TARGET_SIMD"
4922 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4923 [(set_attr "type" "neon_load3_all_lanes<q>")]
4924 )
4925
4926 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4927 [(set (match_operand:CI 0 "register_operand" "=w")
4928 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4929 (match_operand:CI 2 "register_operand" "0")
4930 (match_operand:SI 3 "immediate_operand" "i")
4931 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4932 UNSPEC_LD3_LANE))]
4933 "TARGET_SIMD"
4934 {
4935 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4936 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4937 }
4938 [(set_attr "type" "neon_load3_one_lane")]
4939 )
4940
4941 (define_expand "vec_load_lanesci<mode>"
4942 [(set (match_operand:CI 0 "register_operand" "=w")
4943 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4944 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4945 UNSPEC_LD3))]
4946 "TARGET_SIMD"
4947 {
4948 if (BYTES_BIG_ENDIAN)
4949 {
4950 rtx tmp = gen_reg_rtx (CImode);
4951 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4952 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4953 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4954 }
4955 else
4956 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4957 DONE;
4958 })
4959
4960 (define_insn "aarch64_simd_st3<mode>"
4961 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4962 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4963 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4964 UNSPEC_ST3))]
4965 "TARGET_SIMD"
4966 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4967 [(set_attr "type" "neon_store3_3reg<q>")]
4968 )
4969
4970 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4971 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4972 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4973 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4974 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4975 (match_operand:SI 2 "immediate_operand" "i")]
4976 UNSPEC_ST3_LANE))]
4977 "TARGET_SIMD"
4978 {
4979 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4980 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4981 }
4982 [(set_attr "type" "neon_store3_one_lane<q>")]
4983 )
4984
4985 (define_expand "vec_store_lanesci<mode>"
4986 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4987 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4988 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4989 UNSPEC_ST3))]
4990 "TARGET_SIMD"
4991 {
4992 if (BYTES_BIG_ENDIAN)
4993 {
4994 rtx tmp = gen_reg_rtx (CImode);
4995 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4996 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4997 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4998 }
4999 else
5000 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5001 DONE;
5002 })
5003
5004 (define_insn "aarch64_simd_ld4<mode>"
5005 [(set (match_operand:XI 0 "register_operand" "=w")
5006 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5007 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5008 UNSPEC_LD4))]
5009 "TARGET_SIMD"
5010 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5011 [(set_attr "type" "neon_load4_4reg<q>")]
5012 )
5013
5014 (define_insn "aarch64_simd_ld4r<mode>"
5015 [(set (match_operand:XI 0 "register_operand" "=w")
5016 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5017 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5018 UNSPEC_LD4_DUP))]
5019 "TARGET_SIMD"
5020 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5021 [(set_attr "type" "neon_load4_all_lanes<q>")]
5022 )
5023
5024 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5025 [(set (match_operand:XI 0 "register_operand" "=w")
5026 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5027 (match_operand:XI 2 "register_operand" "0")
5028 (match_operand:SI 3 "immediate_operand" "i")
5029 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5030 UNSPEC_LD4_LANE))]
5031 "TARGET_SIMD"
5032 {
5033 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5034 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5035 }
5036 [(set_attr "type" "neon_load4_one_lane")]
5037 )
5038
5039 (define_expand "vec_load_lanesxi<mode>"
5040 [(set (match_operand:XI 0 "register_operand" "=w")
5041 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5042 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5043 UNSPEC_LD4))]
5044 "TARGET_SIMD"
5045 {
5046 if (BYTES_BIG_ENDIAN)
5047 {
5048 rtx tmp = gen_reg_rtx (XImode);
5049 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5050 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5051 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5052 }
5053 else
5054 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5055 DONE;
5056 })
5057
5058 (define_insn "aarch64_simd_st4<mode>"
5059 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5060 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5061 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5062 UNSPEC_ST4))]
5063 "TARGET_SIMD"
5064 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5065 [(set_attr "type" "neon_store4_4reg<q>")]
5066 )
5067
5068 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5069 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5070 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5071 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5072 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5073 (match_operand:SI 2 "immediate_operand" "i")]
5074 UNSPEC_ST4_LANE))]
5075 "TARGET_SIMD"
5076 {
5077 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5078 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5079 }
5080 [(set_attr "type" "neon_store4_one_lane<q>")]
5081 )
5082
5083 (define_expand "vec_store_lanesxi<mode>"
5084 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5085 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5086 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5087 UNSPEC_ST4))]
5088 "TARGET_SIMD"
5089 {
5090 if (BYTES_BIG_ENDIAN)
5091 {
5092 rtx tmp = gen_reg_rtx (XImode);
5093 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5094 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5095 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5096 }
5097 else
5098 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5099 DONE;
5100 })
5101
5102 (define_insn_and_split "aarch64_rev_reglist<mode>"
5103 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5104 (unspec:VSTRUCT
5105 [(match_operand:VSTRUCT 1 "register_operand" "w")
5106 (match_operand:V16QI 2 "register_operand" "w")]
5107 UNSPEC_REV_REGLIST))]
5108 "TARGET_SIMD"
5109 "#"
5110 "&& reload_completed"
5111 [(const_int 0)]
5112 {
5113 int i;
5114 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5115 for (i = 0; i < nregs; i++)
5116 {
5117 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5118 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5119 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5120 }
5121 DONE;
5122 }
5123 [(set_attr "type" "neon_tbl1_q")
5124 (set_attr "length" "<insn_count>")]
5125 )
5126
5127 ;; Reload patterns for AdvSIMD register list operands.
5128
5129 (define_expand "mov<mode>"
5130 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5131 (match_operand:VSTRUCT 1 "general_operand" ""))]
5132 "TARGET_SIMD"
5133 {
5134 if (can_create_pseudo_p ())
5135 {
5136 if (GET_CODE (operands[0]) != REG)
5137 operands[1] = force_reg (<MODE>mode, operands[1]);
5138 }
5139 })
5140
5141
5142 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5143 [(match_operand:CI 0 "register_operand" "=w")
5144 (match_operand:DI 1 "register_operand" "r")
5145 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5146 "TARGET_SIMD"
5147 {
5148 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5149 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5150 DONE;
5151 })
5152
5153 (define_insn "aarch64_ld1_x3_<mode>"
5154 [(set (match_operand:CI 0 "register_operand" "=w")
5155 (unspec:CI
5156 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5157 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5158 "TARGET_SIMD"
5159 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5160 [(set_attr "type" "neon_load1_3reg<q>")]
5161 )
5162
5163 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5164 [(match_operand:DI 0 "register_operand" "")
5165 (match_operand:OI 1 "register_operand" "")
5166 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5167 "TARGET_SIMD"
5168 {
5169 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5170 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5171 DONE;
5172 })
5173
5174 (define_insn "aarch64_st1_x2_<mode>"
5175 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5176 (unspec:OI
5177 [(match_operand:OI 1 "register_operand" "w")
5178 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5179 "TARGET_SIMD"
5180 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5181 [(set_attr "type" "neon_store1_2reg<q>")]
5182 )
5183
5184 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5185 [(match_operand:DI 0 "register_operand" "")
5186 (match_operand:CI 1 "register_operand" "")
5187 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5188 "TARGET_SIMD"
5189 {
5190 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5191 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5192 DONE;
5193 })
5194
5195 (define_insn "aarch64_st1_x3_<mode>"
5196 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5197 (unspec:CI
5198 [(match_operand:CI 1 "register_operand" "w")
5199 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5200 "TARGET_SIMD"
5201 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5202 [(set_attr "type" "neon_store1_3reg<q>")]
5203 )
5204
5205 (define_insn "*aarch64_mov<mode>"
5206 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5207 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5208 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5209 && (register_operand (operands[0], <MODE>mode)
5210 || register_operand (operands[1], <MODE>mode))"
5211 "@
5212 #
5213 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5214 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5215 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5216 neon_load<nregs>_<nregs>reg_q")
5217 (set_attr "length" "<insn_count>,4,4")]
5218 )
5219
5220 (define_insn "aarch64_be_ld1<mode>"
5221 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5222 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5223 "aarch64_simd_struct_operand" "Utv")]
5224 UNSPEC_LD1))]
5225 "TARGET_SIMD"
5226 "ld1\\t{%0<Vmtype>}, %1"
5227 [(set_attr "type" "neon_load1_1reg<q>")]
5228 )
5229
5230 (define_insn "aarch64_be_st1<mode>"
5231 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5232 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5233 UNSPEC_ST1))]
5234 "TARGET_SIMD"
5235 "st1\\t{%1<Vmtype>}, %0"
5236 [(set_attr "type" "neon_store1_1reg<q>")]
5237 )
5238
5239 (define_insn "*aarch64_be_movoi"
5240 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5241 (match_operand:OI 1 "general_operand" " w,w,m"))]
5242 "TARGET_SIMD && BYTES_BIG_ENDIAN
5243 && (register_operand (operands[0], OImode)
5244 || register_operand (operands[1], OImode))"
5245 "@
5246 #
5247 stp\\t%q1, %R1, %0
5248 ldp\\t%q0, %R0, %1"
5249 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5250 (set_attr "length" "8,4,4")]
5251 )
5252
5253 (define_insn "*aarch64_be_movci"
5254 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5255 (match_operand:CI 1 "general_operand" " w,w,o"))]
5256 "TARGET_SIMD && BYTES_BIG_ENDIAN
5257 && (register_operand (operands[0], CImode)
5258 || register_operand (operands[1], CImode))"
5259 "#"
5260 [(set_attr "type" "multiple")
5261 (set_attr "length" "12,4,4")]
5262 )
5263
5264 (define_insn "*aarch64_be_movxi"
5265 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5266 (match_operand:XI 1 "general_operand" " w,w,o"))]
5267 "TARGET_SIMD && BYTES_BIG_ENDIAN
5268 && (register_operand (operands[0], XImode)
5269 || register_operand (operands[1], XImode))"
5270 "#"
5271 [(set_attr "type" "multiple")
5272 (set_attr "length" "16,4,4")]
5273 )
5274
5275 (define_split
5276 [(set (match_operand:OI 0 "register_operand")
5277 (match_operand:OI 1 "register_operand"))]
5278 "TARGET_SIMD && reload_completed"
5279 [(const_int 0)]
5280 {
5281 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5282 DONE;
5283 })
5284
5285 (define_split
5286 [(set (match_operand:CI 0 "nonimmediate_operand")
5287 (match_operand:CI 1 "general_operand"))]
5288 "TARGET_SIMD && reload_completed"
5289 [(const_int 0)]
5290 {
5291 if (register_operand (operands[0], CImode)
5292 && register_operand (operands[1], CImode))
5293 {
5294 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5295 DONE;
5296 }
5297 else if (BYTES_BIG_ENDIAN)
5298 {
5299 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5300 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5301 emit_move_insn (gen_lowpart (V16QImode,
5302 simplify_gen_subreg (TImode, operands[0],
5303 CImode, 32)),
5304 gen_lowpart (V16QImode,
5305 simplify_gen_subreg (TImode, operands[1],
5306 CImode, 32)));
5307 DONE;
5308 }
5309 else
5310 FAIL;
5311 })
5312
5313 (define_split
5314 [(set (match_operand:XI 0 "nonimmediate_operand")
5315 (match_operand:XI 1 "general_operand"))]
5316 "TARGET_SIMD && reload_completed"
5317 [(const_int 0)]
5318 {
5319 if (register_operand (operands[0], XImode)
5320 && register_operand (operands[1], XImode))
5321 {
5322 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5323 DONE;
5324 }
5325 else if (BYTES_BIG_ENDIAN)
5326 {
5327 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5328 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5329 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5330 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5331 DONE;
5332 }
5333 else
5334 FAIL;
5335 })
5336
5337 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5338 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5339 (match_operand:DI 1 "register_operand" "w")
5340 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5341 "TARGET_SIMD"
5342 {
5343 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5344 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5345 * <VSTRUCT:nregs>);
5346
5347 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5348 mem));
5349 DONE;
5350 })
5351
5352 (define_insn "aarch64_ld2<mode>_dreg"
5353 [(set (match_operand:OI 0 "register_operand" "=w")
5354 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5355 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5356 UNSPEC_LD2_DREG))]
5357 "TARGET_SIMD"
5358 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5359 [(set_attr "type" "neon_load2_2reg<q>")]
5360 )
5361
5362 (define_insn "aarch64_ld2<mode>_dreg"
5363 [(set (match_operand:OI 0 "register_operand" "=w")
5364 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5365 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5366 UNSPEC_LD2_DREG))]
5367 "TARGET_SIMD"
5368 "ld1\\t{%S0.1d - %T0.1d}, %1"
5369 [(set_attr "type" "neon_load1_2reg<q>")]
5370 )
5371
5372 (define_insn "aarch64_ld3<mode>_dreg"
5373 [(set (match_operand:CI 0 "register_operand" "=w")
5374 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5375 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5376 UNSPEC_LD3_DREG))]
5377 "TARGET_SIMD"
5378 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5379 [(set_attr "type" "neon_load3_3reg<q>")]
5380 )
5381
5382 (define_insn "aarch64_ld3<mode>_dreg"
5383 [(set (match_operand:CI 0 "register_operand" "=w")
5384 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5385 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5386 UNSPEC_LD3_DREG))]
5387 "TARGET_SIMD"
5388 "ld1\\t{%S0.1d - %U0.1d}, %1"
5389 [(set_attr "type" "neon_load1_3reg<q>")]
5390 )
5391
5392 (define_insn "aarch64_ld4<mode>_dreg"
5393 [(set (match_operand:XI 0 "register_operand" "=w")
5394 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5395 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5396 UNSPEC_LD4_DREG))]
5397 "TARGET_SIMD"
5398 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5399 [(set_attr "type" "neon_load4_4reg<q>")]
5400 )
5401
5402 (define_insn "aarch64_ld4<mode>_dreg"
5403 [(set (match_operand:XI 0 "register_operand" "=w")
5404 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5405 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5406 UNSPEC_LD4_DREG))]
5407 "TARGET_SIMD"
5408 "ld1\\t{%S0.1d - %V0.1d}, %1"
5409 [(set_attr "type" "neon_load1_4reg<q>")]
5410 )
5411
5412 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5413 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5414 (match_operand:DI 1 "register_operand" "r")
5415 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5416 "TARGET_SIMD"
5417 {
5418 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5419 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5420
5421 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5422 DONE;
5423 })
5424
5425 (define_expand "aarch64_ld1<VALL_F16:mode>"
5426 [(match_operand:VALL_F16 0 "register_operand")
5427 (match_operand:DI 1 "register_operand")]
5428 "TARGET_SIMD"
5429 {
5430 machine_mode mode = <VALL_F16:MODE>mode;
5431 rtx mem = gen_rtx_MEM (mode, operands[1]);
5432
5433 if (BYTES_BIG_ENDIAN)
5434 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5435 else
5436 emit_move_insn (operands[0], mem);
5437 DONE;
5438 })
5439
5440 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5441 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5442 (match_operand:DI 1 "register_operand" "r")
5443 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5444 "TARGET_SIMD"
5445 {
5446 machine_mode mode = <VSTRUCT:MODE>mode;
5447 rtx mem = gen_rtx_MEM (mode, operands[1]);
5448
5449 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5450 DONE;
5451 })
5452
5453 (define_expand "aarch64_ld1x2<VQ:mode>"
5454 [(match_operand:OI 0 "register_operand" "=w")
5455 (match_operand:DI 1 "register_operand" "r")
5456 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5457 "TARGET_SIMD"
5458 {
5459 machine_mode mode = OImode;
5460 rtx mem = gen_rtx_MEM (mode, operands[1]);
5461
5462 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5463 DONE;
5464 })
5465
5466 (define_expand "aarch64_ld1x2<VDC:mode>"
5467 [(match_operand:OI 0 "register_operand" "=w")
5468 (match_operand:DI 1 "register_operand" "r")
5469 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5470 "TARGET_SIMD"
5471 {
5472 machine_mode mode = OImode;
5473 rtx mem = gen_rtx_MEM (mode, operands[1]);
5474
5475 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5476 DONE;
5477 })
5478
5479
5480 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5481 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5482 (match_operand:DI 1 "register_operand" "w")
5483 (match_operand:VSTRUCT 2 "register_operand" "0")
5484 (match_operand:SI 3 "immediate_operand" "i")
5485 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5486 "TARGET_SIMD"
5487 {
5488 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5489 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5490 * <VSTRUCT:nregs>);
5491
5492 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5493 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5494 operands[0], mem, operands[2], operands[3]));
5495 DONE;
5496 })
5497
5498 ;; Expanders for builtins to extract vector registers from large
5499 ;; opaque integer modes.
5500
5501 ;; D-register list.
5502
5503 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5504 [(match_operand:VDC 0 "register_operand" "=w")
5505 (match_operand:VSTRUCT 1 "register_operand" "w")
5506 (match_operand:SI 2 "immediate_operand" "i")]
5507 "TARGET_SIMD"
5508 {
5509 int part = INTVAL (operands[2]);
5510 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5511 int offset = part * 16;
5512
5513 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5514 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5515 DONE;
5516 })
5517
5518 ;; Q-register list.
5519
5520 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5521 [(match_operand:VQ 0 "register_operand" "=w")
5522 (match_operand:VSTRUCT 1 "register_operand" "w")
5523 (match_operand:SI 2 "immediate_operand" "i")]
5524 "TARGET_SIMD"
5525 {
5526 int part = INTVAL (operands[2]);
5527 int offset = part * 16;
5528
5529 emit_move_insn (operands[0],
5530 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5531 DONE;
5532 })
5533
5534 ;; Permuted-store expanders for neon intrinsics.
5535
5536 ;; Permute instructions
5537
5538 ;; vec_perm support
5539
5540 (define_expand "vec_perm<mode>"
5541 [(match_operand:VB 0 "register_operand")
5542 (match_operand:VB 1 "register_operand")
5543 (match_operand:VB 2 "register_operand")
5544 (match_operand:VB 3 "register_operand")]
5545 "TARGET_SIMD"
5546 {
5547 aarch64_expand_vec_perm (operands[0], operands[1],
5548 operands[2], operands[3], <nunits>);
5549 DONE;
5550 })
5551
5552 (define_insn "aarch64_tbl1<mode>"
5553 [(set (match_operand:VB 0 "register_operand" "=w")
5554 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5555 (match_operand:VB 2 "register_operand" "w")]
5556 UNSPEC_TBL))]
5557 "TARGET_SIMD"
5558 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5559 [(set_attr "type" "neon_tbl1<q>")]
5560 )
5561
5562 ;; Two source registers.
5563
5564 (define_insn "aarch64_tbl2v16qi"
5565 [(set (match_operand:V16QI 0 "register_operand" "=w")
5566 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5567 (match_operand:V16QI 2 "register_operand" "w")]
5568 UNSPEC_TBL))]
5569 "TARGET_SIMD"
5570 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5571 [(set_attr "type" "neon_tbl2_q")]
5572 )
5573
5574 (define_insn "aarch64_tbl3<mode>"
5575 [(set (match_operand:VB 0 "register_operand" "=w")
5576 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5577 (match_operand:VB 2 "register_operand" "w")]
5578 UNSPEC_TBL))]
5579 "TARGET_SIMD"
5580 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5581 [(set_attr "type" "neon_tbl3")]
5582 )
5583
5584 (define_insn "aarch64_tbx4<mode>"
5585 [(set (match_operand:VB 0 "register_operand" "=w")
5586 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5587 (match_operand:OI 2 "register_operand" "w")
5588 (match_operand:VB 3 "register_operand" "w")]
5589 UNSPEC_TBX))]
5590 "TARGET_SIMD"
5591 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5592 [(set_attr "type" "neon_tbl4")]
5593 )
5594
5595 ;; Three source registers.
5596
5597 (define_insn "aarch64_qtbl3<mode>"
5598 [(set (match_operand:VB 0 "register_operand" "=w")
5599 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5600 (match_operand:VB 2 "register_operand" "w")]
5601 UNSPEC_TBL))]
5602 "TARGET_SIMD"
5603 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5604 [(set_attr "type" "neon_tbl3")]
5605 )
5606
5607 (define_insn "aarch64_qtbx3<mode>"
5608 [(set (match_operand:VB 0 "register_operand" "=w")
5609 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5610 (match_operand:CI 2 "register_operand" "w")
5611 (match_operand:VB 3 "register_operand" "w")]
5612 UNSPEC_TBX))]
5613 "TARGET_SIMD"
5614 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5615 [(set_attr "type" "neon_tbl3")]
5616 )
5617
5618 ;; Four source registers.
5619
5620 (define_insn "aarch64_qtbl4<mode>"
5621 [(set (match_operand:VB 0 "register_operand" "=w")
5622 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5623 (match_operand:VB 2 "register_operand" "w")]
5624 UNSPEC_TBL))]
5625 "TARGET_SIMD"
5626 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5627 [(set_attr "type" "neon_tbl4")]
5628 )
5629
5630 (define_insn "aarch64_qtbx4<mode>"
5631 [(set (match_operand:VB 0 "register_operand" "=w")
5632 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5633 (match_operand:XI 2 "register_operand" "w")
5634 (match_operand:VB 3 "register_operand" "w")]
5635 UNSPEC_TBX))]
5636 "TARGET_SIMD"
5637 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5638 [(set_attr "type" "neon_tbl4")]
5639 )
5640
5641 (define_insn_and_split "aarch64_combinev16qi"
5642 [(set (match_operand:OI 0 "register_operand" "=w")
5643 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5644 (match_operand:V16QI 2 "register_operand" "w")]
5645 UNSPEC_CONCAT))]
5646 "TARGET_SIMD"
5647 "#"
5648 "&& reload_completed"
5649 [(const_int 0)]
5650 {
5651 aarch64_split_combinev16qi (operands);
5652 DONE;
5653 }
5654 [(set_attr "type" "multiple")]
5655 )
5656
5657 ;; This instruction's pattern is generated directly by
5658 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5659 ;; need corresponding changes there.
5660 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5661 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5662 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5663 (match_operand:VALL_F16 2 "register_operand" "w")]
5664 PERMUTE))]
5665 "TARGET_SIMD"
5666 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5667 [(set_attr "type" "neon_permute<q>")]
5668 )
5669
5670 ;; This instruction's pattern is generated directly by
5671 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5672 ;; need corresponding changes there. Note that the immediate (third)
5673 ;; operand is a lane index not a byte index.
5674 (define_insn "aarch64_ext<mode>"
5675 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5676 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5677 (match_operand:VALL_F16 2 "register_operand" "w")
5678 (match_operand:SI 3 "immediate_operand" "i")]
5679 UNSPEC_EXT))]
5680 "TARGET_SIMD"
5681 {
5682 operands[3] = GEN_INT (INTVAL (operands[3])
5683 * GET_MODE_UNIT_SIZE (<MODE>mode));
5684 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5685 }
5686 [(set_attr "type" "neon_ext<q>")]
5687 )
5688
5689 ;; This instruction's pattern is generated directly by
5690 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5691 ;; need corresponding changes there.
5692 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5693 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5694 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5695 REVERSE))]
5696 "TARGET_SIMD"
5697 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5698 [(set_attr "type" "neon_rev<q>")]
5699 )
5700
5701 (define_insn "aarch64_st2<mode>_dreg"
5702 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5703 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5704 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5705 UNSPEC_ST2))]
5706 "TARGET_SIMD"
5707 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5708 [(set_attr "type" "neon_store2_2reg")]
5709 )
5710
5711 (define_insn "aarch64_st2<mode>_dreg"
5712 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5713 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5714 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5715 UNSPEC_ST2))]
5716 "TARGET_SIMD"
5717 "st1\\t{%S1.1d - %T1.1d}, %0"
5718 [(set_attr "type" "neon_store1_2reg")]
5719 )
5720
5721 (define_insn "aarch64_st3<mode>_dreg"
5722 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5723 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5724 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5725 UNSPEC_ST3))]
5726 "TARGET_SIMD"
5727 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5728 [(set_attr "type" "neon_store3_3reg")]
5729 )
5730
5731 (define_insn "aarch64_st3<mode>_dreg"
5732 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5733 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5734 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5735 UNSPEC_ST3))]
5736 "TARGET_SIMD"
5737 "st1\\t{%S1.1d - %U1.1d}, %0"
5738 [(set_attr "type" "neon_store1_3reg")]
5739 )
5740
5741 (define_insn "aarch64_st4<mode>_dreg"
5742 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5743 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5744 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5745 UNSPEC_ST4))]
5746 "TARGET_SIMD"
5747 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5748 [(set_attr "type" "neon_store4_4reg")]
5749 )
5750
5751 (define_insn "aarch64_st4<mode>_dreg"
5752 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5753 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5754 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5755 UNSPEC_ST4))]
5756 "TARGET_SIMD"
5757 "st1\\t{%S1.1d - %V1.1d}, %0"
5758 [(set_attr "type" "neon_store1_4reg")]
5759 )
5760
5761 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5762 [(match_operand:DI 0 "register_operand" "r")
5763 (match_operand:VSTRUCT 1 "register_operand" "w")
5764 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5765 "TARGET_SIMD"
5766 {
5767 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5768 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5769
5770 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5771 DONE;
5772 })
5773
5774 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5775 [(match_operand:DI 0 "register_operand" "r")
5776 (match_operand:VSTRUCT 1 "register_operand" "w")
5777 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5778 "TARGET_SIMD"
5779 {
5780 machine_mode mode = <VSTRUCT:MODE>mode;
5781 rtx mem = gen_rtx_MEM (mode, operands[0]);
5782
5783 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5784 DONE;
5785 })
5786
5787 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5788 [(match_operand:DI 0 "register_operand" "r")
5789 (match_operand:VSTRUCT 1 "register_operand" "w")
5790 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5791 (match_operand:SI 2 "immediate_operand")]
5792 "TARGET_SIMD"
5793 {
5794 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5795 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5796 * <VSTRUCT:nregs>);
5797
5798 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5799 mem, operands[1], operands[2]));
5800 DONE;
5801 })
5802
5803 (define_expand "aarch64_st1<VALL_F16:mode>"
5804 [(match_operand:DI 0 "register_operand")
5805 (match_operand:VALL_F16 1 "register_operand")]
5806 "TARGET_SIMD"
5807 {
5808 machine_mode mode = <VALL_F16:MODE>mode;
5809 rtx mem = gen_rtx_MEM (mode, operands[0]);
5810
5811 if (BYTES_BIG_ENDIAN)
5812 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5813 else
5814 emit_move_insn (mem, operands[1]);
5815 DONE;
5816 })
5817
5818 ;; Expander for builtins to insert vector registers into large
5819 ;; opaque integer modes.
5820
5821 ;; Q-register list. We don't need a D-reg inserter as we zero
5822 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5823
5824 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5825 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5826 (match_operand:VSTRUCT 1 "register_operand" "0")
5827 (match_operand:VQ 2 "register_operand" "w")
5828 (match_operand:SI 3 "immediate_operand" "i")]
5829 "TARGET_SIMD"
5830 {
5831 int part = INTVAL (operands[3]);
5832 int offset = part * 16;
5833
5834 emit_move_insn (operands[0], operands[1]);
5835 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5836 operands[2]);
5837 DONE;
5838 })
5839
5840 ;; Standard pattern name vec_init<mode><Vel>.
5841
5842 (define_expand "vec_init<mode><Vel>"
5843 [(match_operand:VALL_F16 0 "register_operand" "")
5844 (match_operand 1 "" "")]
5845 "TARGET_SIMD"
5846 {
5847 aarch64_expand_vector_init (operands[0], operands[1]);
5848 DONE;
5849 })
5850
5851 (define_insn "*aarch64_simd_ld1r<mode>"
5852 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5853 (vec_duplicate:VALL_F16
5854 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5855 "TARGET_SIMD"
5856 "ld1r\\t{%0.<Vtype>}, %1"
5857 [(set_attr "type" "neon_load1_all_lanes")]
5858 )
5859
5860 (define_insn "aarch64_simd_ld1<mode>_x2"
5861 [(set (match_operand:OI 0 "register_operand" "=w")
5862 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5863 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5864 UNSPEC_LD1))]
5865 "TARGET_SIMD"
5866 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5867 [(set_attr "type" "neon_load1_2reg<q>")]
5868 )
5869
5870 (define_insn "aarch64_simd_ld1<mode>_x2"
5871 [(set (match_operand:OI 0 "register_operand" "=w")
5872 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5873 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5874 UNSPEC_LD1))]
5875 "TARGET_SIMD"
5876 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5877 [(set_attr "type" "neon_load1_2reg<q>")]
5878 )
5879
5880
5881 (define_insn "aarch64_frecpe<mode>"
5882 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5883 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5884 UNSPEC_FRECPE))]
5885 "TARGET_SIMD"
5886 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5887 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5888 )
5889
5890 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5891 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5892 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5893 FRECP))]
5894 "TARGET_SIMD"
5895 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5896 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5897 )
5898
5899 (define_insn "aarch64_frecps<mode>"
5900 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5901 (unspec:VHSDF_HSDF
5902 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5903 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5904 UNSPEC_FRECPS))]
5905 "TARGET_SIMD"
5906 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5907 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5908 )
5909
5910 (define_insn "aarch64_urecpe<mode>"
5911 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5912 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5913 UNSPEC_URECPE))]
5914 "TARGET_SIMD"
5915 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5916 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5917
5918 ;; Standard pattern name vec_extract<mode><Vel>.
5919
5920 (define_expand "vec_extract<mode><Vel>"
5921 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5922 (match_operand:VALL_F16 1 "register_operand" "")
5923 (match_operand:SI 2 "immediate_operand" "")]
5924 "TARGET_SIMD"
5925 {
5926 emit_insn
5927 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5928 DONE;
5929 })
5930
5931 ;; aes
5932
5933 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5934 [(set (match_operand:V16QI 0 "register_operand" "=w")
5935 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5936 (match_operand:V16QI 2 "register_operand" "w")]
5937 CRYPTO_AES))]
5938 "TARGET_SIMD && TARGET_AES"
5939 "aes<aes_op>\\t%0.16b, %2.16b"
5940 [(set_attr "type" "crypto_aese")]
5941 )
5942
5943 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5944 [(set (match_operand:V16QI 0 "register_operand" "=w")
5945 (unspec:V16QI [(xor:V16QI
5946 (match_operand:V16QI 1 "register_operand" "%0")
5947 (match_operand:V16QI 2 "register_operand" "w"))
5948 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5949 CRYPTO_AES))]
5950 "TARGET_SIMD && TARGET_AES"
5951 "aes<aes_op>\\t%0.16b, %2.16b"
5952 [(set_attr "type" "crypto_aese")]
5953 )
5954
5955 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5956 [(set (match_operand:V16QI 0 "register_operand" "=w")
5957 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5958 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5959 (match_operand:V16QI 2 "register_operand" "w"))]
5960 CRYPTO_AES))]
5961 "TARGET_SIMD && TARGET_AES"
5962 "aes<aes_op>\\t%0.16b, %2.16b"
5963 [(set_attr "type" "crypto_aese")]
5964 )
5965
5966 ;; When AES/AESMC fusion is enabled we want the register allocation to
5967 ;; look like:
5968 ;; AESE Vn, _
5969 ;; AESMC Vn, Vn
5970 ;; So prefer to tie operand 1 to operand 0 when fusing.
5971
5972 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5973 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5974 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5975 CRYPTO_AESMC))]
5976 "TARGET_SIMD && TARGET_AES"
5977 "aes<aesmc_op>\\t%0.16b, %1.16b"
5978 [(set_attr "type" "crypto_aesmc")
5979 (set_attr_alternative "enabled"
5980 [(if_then_else (match_test
5981 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5982 (const_string "yes" )
5983 (const_string "no"))
5984 (const_string "yes")])]
5985 )
5986
5987 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5988 ;; and enforce the register dependency without scheduling or register
5989 ;; allocation messing up the order or introducing moves inbetween.
5990 ;; Mash the two together during combine.
5991
5992 (define_insn "*aarch64_crypto_aese_fused"
5993 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5994 (unspec:V16QI
5995 [(unspec:V16QI
5996 [(match_operand:V16QI 1 "register_operand" "0")
5997 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5998 ] UNSPEC_AESMC))]
5999 "TARGET_SIMD && TARGET_AES
6000 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6001 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6002 [(set_attr "type" "crypto_aese")
6003 (set_attr "length" "8")]
6004 )
6005
6006 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6007 ;; and enforce the register dependency without scheduling or register
6008 ;; allocation messing up the order or introducing moves inbetween.
6009 ;; Mash the two together during combine.
6010
6011 (define_insn "*aarch64_crypto_aesd_fused"
6012 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6013 (unspec:V16QI
6014 [(unspec:V16QI
6015 [(match_operand:V16QI 1 "register_operand" "0")
6016 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6017 ] UNSPEC_AESIMC))]
6018 "TARGET_SIMD && TARGET_AES
6019 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6020 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6021 [(set_attr "type" "crypto_aese")
6022 (set_attr "length" "8")]
6023 )
6024
6025 ;; sha1
6026
6027 (define_insn "aarch64_crypto_sha1hsi"
6028 [(set (match_operand:SI 0 "register_operand" "=w")
6029 (unspec:SI [(match_operand:SI 1
6030 "register_operand" "w")]
6031 UNSPEC_SHA1H))]
6032 "TARGET_SIMD && TARGET_SHA2"
6033 "sha1h\\t%s0, %s1"
6034 [(set_attr "type" "crypto_sha1_fast")]
6035 )
6036
6037 (define_insn "aarch64_crypto_sha1hv4si"
6038 [(set (match_operand:SI 0 "register_operand" "=w")
6039 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6040 (parallel [(const_int 0)]))]
6041 UNSPEC_SHA1H))]
6042 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6043 "sha1h\\t%s0, %s1"
6044 [(set_attr "type" "crypto_sha1_fast")]
6045 )
6046
6047 (define_insn "aarch64_be_crypto_sha1hv4si"
6048 [(set (match_operand:SI 0 "register_operand" "=w")
6049 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6050 (parallel [(const_int 3)]))]
6051 UNSPEC_SHA1H))]
6052 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6053 "sha1h\\t%s0, %s1"
6054 [(set_attr "type" "crypto_sha1_fast")]
6055 )
6056
6057 (define_insn "aarch64_crypto_sha1su1v4si"
6058 [(set (match_operand:V4SI 0 "register_operand" "=w")
6059 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6060 (match_operand:V4SI 2 "register_operand" "w")]
6061 UNSPEC_SHA1SU1))]
6062 "TARGET_SIMD && TARGET_SHA2"
6063 "sha1su1\\t%0.4s, %2.4s"
6064 [(set_attr "type" "crypto_sha1_fast")]
6065 )
6066
6067 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6068 [(set (match_operand:V4SI 0 "register_operand" "=w")
6069 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6070 (match_operand:SI 2 "register_operand" "w")
6071 (match_operand:V4SI 3 "register_operand" "w")]
6072 CRYPTO_SHA1))]
6073 "TARGET_SIMD && TARGET_SHA2"
6074 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6075 [(set_attr "type" "crypto_sha1_slow")]
6076 )
6077
6078 (define_insn "aarch64_crypto_sha1su0v4si"
6079 [(set (match_operand:V4SI 0 "register_operand" "=w")
6080 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6081 (match_operand:V4SI 2 "register_operand" "w")
6082 (match_operand:V4SI 3 "register_operand" "w")]
6083 UNSPEC_SHA1SU0))]
6084 "TARGET_SIMD && TARGET_SHA2"
6085 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6086 [(set_attr "type" "crypto_sha1_xor")]
6087 )
6088
6089 ;; sha256
6090
6091 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6092 [(set (match_operand:V4SI 0 "register_operand" "=w")
6093 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6094 (match_operand:V4SI 2 "register_operand" "w")
6095 (match_operand:V4SI 3 "register_operand" "w")]
6096 CRYPTO_SHA256))]
6097 "TARGET_SIMD && TARGET_SHA2"
6098 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6099 [(set_attr "type" "crypto_sha256_slow")]
6100 )
6101
6102 (define_insn "aarch64_crypto_sha256su0v4si"
6103 [(set (match_operand:V4SI 0 "register_operand" "=w")
6104 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6105 (match_operand:V4SI 2 "register_operand" "w")]
6106 UNSPEC_SHA256SU0))]
6107 "TARGET_SIMD && TARGET_SHA2"
6108 "sha256su0\\t%0.4s, %2.4s"
6109 [(set_attr "type" "crypto_sha256_fast")]
6110 )
6111
6112 (define_insn "aarch64_crypto_sha256su1v4si"
6113 [(set (match_operand:V4SI 0 "register_operand" "=w")
6114 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6115 (match_operand:V4SI 2 "register_operand" "w")
6116 (match_operand:V4SI 3 "register_operand" "w")]
6117 UNSPEC_SHA256SU1))]
6118 "TARGET_SIMD && TARGET_SHA2"
6119 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6120 [(set_attr "type" "crypto_sha256_slow")]
6121 )
6122
6123 ;; sha512
6124
6125 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6126 [(set (match_operand:V2DI 0 "register_operand" "=w")
6127 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6128 (match_operand:V2DI 2 "register_operand" "w")
6129 (match_operand:V2DI 3 "register_operand" "w")]
6130 CRYPTO_SHA512))]
6131 "TARGET_SIMD && TARGET_SHA3"
6132 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6133 [(set_attr "type" "crypto_sha512")]
6134 )
6135
6136 (define_insn "aarch64_crypto_sha512su0qv2di"
6137 [(set (match_operand:V2DI 0 "register_operand" "=w")
6138 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6139 (match_operand:V2DI 2 "register_operand" "w")]
6140 UNSPEC_SHA512SU0))]
6141 "TARGET_SIMD && TARGET_SHA3"
6142 "sha512su0\\t%0.2d, %2.2d"
6143 [(set_attr "type" "crypto_sha512")]
6144 )
6145
6146 (define_insn "aarch64_crypto_sha512su1qv2di"
6147 [(set (match_operand:V2DI 0 "register_operand" "=w")
6148 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6149 (match_operand:V2DI 2 "register_operand" "w")
6150 (match_operand:V2DI 3 "register_operand" "w")]
6151 UNSPEC_SHA512SU1))]
6152 "TARGET_SIMD && TARGET_SHA3"
6153 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6154 [(set_attr "type" "crypto_sha512")]
6155 )
6156
6157 ;; sha3
6158
6159 (define_insn "eor3q<mode>4"
6160 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6161 (xor:VQ_I
6162 (xor:VQ_I
6163 (match_operand:VQ_I 2 "register_operand" "w")
6164 (match_operand:VQ_I 3 "register_operand" "w"))
6165 (match_operand:VQ_I 1 "register_operand" "w")))]
6166 "TARGET_SIMD && TARGET_SHA3"
6167 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6168 [(set_attr "type" "crypto_sha3")]
6169 )
6170
6171 (define_insn "aarch64_rax1qv2di"
6172 [(set (match_operand:V2DI 0 "register_operand" "=w")
6173 (xor:V2DI
6174 (rotate:V2DI
6175 (match_operand:V2DI 2 "register_operand" "w")
6176 (const_int 1))
6177 (match_operand:V2DI 1 "register_operand" "w")))]
6178 "TARGET_SIMD && TARGET_SHA3"
6179 "rax1\\t%0.2d, %1.2d, %2.2d"
6180 [(set_attr "type" "crypto_sha3")]
6181 )
6182
6183 (define_insn "aarch64_xarqv2di"
6184 [(set (match_operand:V2DI 0 "register_operand" "=w")
6185 (rotatert:V2DI
6186 (xor:V2DI
6187 (match_operand:V2DI 1 "register_operand" "%w")
6188 (match_operand:V2DI 2 "register_operand" "w"))
6189 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6190 "TARGET_SIMD && TARGET_SHA3"
6191 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6192 [(set_attr "type" "crypto_sha3")]
6193 )
6194
6195 (define_insn "bcaxq<mode>4"
6196 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6197 (xor:VQ_I
6198 (and:VQ_I
6199 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6200 (match_operand:VQ_I 2 "register_operand" "w"))
6201 (match_operand:VQ_I 1 "register_operand" "w")))]
6202 "TARGET_SIMD && TARGET_SHA3"
6203 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6204 [(set_attr "type" "crypto_sha3")]
6205 )
6206
6207 ;; SM3
6208
6209 (define_insn "aarch64_sm3ss1qv4si"
6210 [(set (match_operand:V4SI 0 "register_operand" "=w")
6211 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6212 (match_operand:V4SI 2 "register_operand" "w")
6213 (match_operand:V4SI 3 "register_operand" "w")]
6214 UNSPEC_SM3SS1))]
6215 "TARGET_SIMD && TARGET_SM4"
6216 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6217 [(set_attr "type" "crypto_sm3")]
6218 )
6219
6220
6221 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6222 [(set (match_operand:V4SI 0 "register_operand" "=w")
6223 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6224 (match_operand:V4SI 2 "register_operand" "w")
6225 (match_operand:V4SI 3 "register_operand" "w")
6226 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6227 CRYPTO_SM3TT))]
6228 "TARGET_SIMD && TARGET_SM4"
6229 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6230 [(set_attr "type" "crypto_sm3")]
6231 )
6232
6233 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6234 [(set (match_operand:V4SI 0 "register_operand" "=w")
6235 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6236 (match_operand:V4SI 2 "register_operand" "w")
6237 (match_operand:V4SI 3 "register_operand" "w")]
6238 CRYPTO_SM3PART))]
6239 "TARGET_SIMD && TARGET_SM4"
6240 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6241 [(set_attr "type" "crypto_sm3")]
6242 )
6243
6244 ;; SM4
6245
6246 (define_insn "aarch64_sm4eqv4si"
6247 [(set (match_operand:V4SI 0 "register_operand" "=w")
6248 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6249 (match_operand:V4SI 2 "register_operand" "w")]
6250 UNSPEC_SM4E))]
6251 "TARGET_SIMD && TARGET_SM4"
6252 "sm4e\\t%0.4s, %2.4s"
6253 [(set_attr "type" "crypto_sm4")]
6254 )
6255
6256 (define_insn "aarch64_sm4ekeyqv4si"
6257 [(set (match_operand:V4SI 0 "register_operand" "=w")
6258 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6259 (match_operand:V4SI 2 "register_operand" "w")]
6260 UNSPEC_SM4EKEY))]
6261 "TARGET_SIMD && TARGET_SM4"
6262 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6263 [(set_attr "type" "crypto_sm4")]
6264 )
6265
6266 ;; fp16fml
6267
6268 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6269 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6270 (unspec:VDQSF
6271 [(match_operand:VDQSF 1 "register_operand" "0")
6272 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6273 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6274 VFMLA16_LOW))]
6275 "TARGET_F16FML"
6276 {
6277 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6278 <nunits> * 2, false);
6279 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6280 <nunits> * 2, false);
6281
6282 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6283 operands[1],
6284 operands[2],
6285 operands[3],
6286 p1, p2));
6287 DONE;
6288
6289 })
6290
6291 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6292 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6293 (unspec:VDQSF
6294 [(match_operand:VDQSF 1 "register_operand" "0")
6295 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6296 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6297 VFMLA16_HIGH))]
6298 "TARGET_F16FML"
6299 {
6300 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6301 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6302
6303 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6304 operands[1],
6305 operands[2],
6306 operands[3],
6307 p1, p2));
6308 DONE;
6309 })
6310
6311 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6312 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6313 (fma:VDQSF
6314 (float_extend:VDQSF
6315 (vec_select:<VFMLA_SEL_W>
6316 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6317 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6318 (float_extend:VDQSF
6319 (vec_select:<VFMLA_SEL_W>
6320 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6321 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6322 (match_operand:VDQSF 1 "register_operand" "0")))]
6323 "TARGET_F16FML"
6324 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6325 [(set_attr "type" "neon_fp_mul_s")]
6326 )
6327
6328 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6329 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6330 (fma:VDQSF
6331 (float_extend:VDQSF
6332 (neg:<VFMLA_SEL_W>
6333 (vec_select:<VFMLA_SEL_W>
6334 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6335 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6336 (float_extend:VDQSF
6337 (vec_select:<VFMLA_SEL_W>
6338 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6339 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6340 (match_operand:VDQSF 1 "register_operand" "0")))]
6341 "TARGET_F16FML"
6342 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6343 [(set_attr "type" "neon_fp_mul_s")]
6344 )
6345
6346 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6347 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6348 (fma:VDQSF
6349 (float_extend:VDQSF
6350 (vec_select:<VFMLA_SEL_W>
6351 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6352 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6353 (float_extend:VDQSF
6354 (vec_select:<VFMLA_SEL_W>
6355 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6356 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6357 (match_operand:VDQSF 1 "register_operand" "0")))]
6358 "TARGET_F16FML"
6359 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6360 [(set_attr "type" "neon_fp_mul_s")]
6361 )
6362
6363 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6364 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6365 (fma:VDQSF
6366 (float_extend:VDQSF
6367 (neg:<VFMLA_SEL_W>
6368 (vec_select:<VFMLA_SEL_W>
6369 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6370 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6371 (float_extend:VDQSF
6372 (vec_select:<VFMLA_SEL_W>
6373 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6374 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6375 (match_operand:VDQSF 1 "register_operand" "0")))]
6376 "TARGET_F16FML"
6377 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6378 [(set_attr "type" "neon_fp_mul_s")]
6379 )
6380
6381 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6382 [(set (match_operand:V2SF 0 "register_operand" "")
6383 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6384 (match_operand:V4HF 2 "register_operand" "")
6385 (match_operand:V4HF 3 "register_operand" "")
6386 (match_operand:SI 4 "aarch64_imm2" "")]
6387 VFMLA16_LOW))]
6388 "TARGET_F16FML"
6389 {
6390 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6391 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6392
6393 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6394 operands[1],
6395 operands[2],
6396 operands[3],
6397 p1, lane));
6398 DONE;
6399 }
6400 )
6401
6402 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6403 [(set (match_operand:V2SF 0 "register_operand" "")
6404 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6405 (match_operand:V4HF 2 "register_operand" "")
6406 (match_operand:V4HF 3 "register_operand" "")
6407 (match_operand:SI 4 "aarch64_imm2" "")]
6408 VFMLA16_HIGH))]
6409 "TARGET_F16FML"
6410 {
6411 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6412 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6413
6414 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6415 operands[1],
6416 operands[2],
6417 operands[3],
6418 p1, lane));
6419 DONE;
6420 })
6421
6422 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6423 [(set (match_operand:V2SF 0 "register_operand" "=w")
6424 (fma:V2SF
6425 (float_extend:V2SF
6426 (vec_select:V2HF
6427 (match_operand:V4HF 2 "register_operand" "w")
6428 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6429 (float_extend:V2SF
6430 (vec_duplicate:V2HF
6431 (vec_select:HF
6432 (match_operand:V4HF 3 "register_operand" "x")
6433 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6434 (match_operand:V2SF 1 "register_operand" "0")))]
6435 "TARGET_F16FML"
6436 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6437 [(set_attr "type" "neon_fp_mul_s")]
6438 )
6439
6440 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6441 [(set (match_operand:V2SF 0 "register_operand" "=w")
6442 (fma:V2SF
6443 (float_extend:V2SF
6444 (neg:V2HF
6445 (vec_select:V2HF
6446 (match_operand:V4HF 2 "register_operand" "w")
6447 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6448 (float_extend:V2SF
6449 (vec_duplicate:V2HF
6450 (vec_select:HF
6451 (match_operand:V4HF 3 "register_operand" "x")
6452 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6453 (match_operand:V2SF 1 "register_operand" "0")))]
6454 "TARGET_F16FML"
6455 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6456 [(set_attr "type" "neon_fp_mul_s")]
6457 )
6458
6459 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6460 [(set (match_operand:V2SF 0 "register_operand" "=w")
6461 (fma:V2SF
6462 (float_extend:V2SF
6463 (vec_select:V2HF
6464 (match_operand:V4HF 2 "register_operand" "w")
6465 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6466 (float_extend:V2SF
6467 (vec_duplicate:V2HF
6468 (vec_select:HF
6469 (match_operand:V4HF 3 "register_operand" "x")
6470 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6471 (match_operand:V2SF 1 "register_operand" "0")))]
6472 "TARGET_F16FML"
6473 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6474 [(set_attr "type" "neon_fp_mul_s")]
6475 )
6476
6477 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6478 [(set (match_operand:V2SF 0 "register_operand" "=w")
6479 (fma:V2SF
6480 (float_extend:V2SF
6481 (neg:V2HF
6482 (vec_select:V2HF
6483 (match_operand:V4HF 2 "register_operand" "w")
6484 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6485 (float_extend:V2SF
6486 (vec_duplicate:V2HF
6487 (vec_select:HF
6488 (match_operand:V4HF 3 "register_operand" "x")
6489 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6490 (match_operand:V2SF 1 "register_operand" "0")))]
6491 "TARGET_F16FML"
6492 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6493 [(set_attr "type" "neon_fp_mul_s")]
6494 )
6495
6496 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6497 [(set (match_operand:V4SF 0 "register_operand" "")
6498 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6499 (match_operand:V8HF 2 "register_operand" "")
6500 (match_operand:V8HF 3 "register_operand" "")
6501 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6502 VFMLA16_LOW))]
6503 "TARGET_F16FML"
6504 {
6505 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6506 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6507
6508 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6509 operands[1],
6510 operands[2],
6511 operands[3],
6512 p1, lane));
6513 DONE;
6514 })
6515
6516 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6517 [(set (match_operand:V4SF 0 "register_operand" "")
6518 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6519 (match_operand:V8HF 2 "register_operand" "")
6520 (match_operand:V8HF 3 "register_operand" "")
6521 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6522 VFMLA16_HIGH))]
6523 "TARGET_F16FML"
6524 {
6525 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6526 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6527
6528 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6529 operands[1],
6530 operands[2],
6531 operands[3],
6532 p1, lane));
6533 DONE;
6534 })
6535
6536 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6537 [(set (match_operand:V4SF 0 "register_operand" "=w")
6538 (fma:V4SF
6539 (float_extend:V4SF
6540 (vec_select:V4HF
6541 (match_operand:V8HF 2 "register_operand" "w")
6542 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6543 (float_extend:V4SF
6544 (vec_duplicate:V4HF
6545 (vec_select:HF
6546 (match_operand:V8HF 3 "register_operand" "x")
6547 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6548 (match_operand:V4SF 1 "register_operand" "0")))]
6549 "TARGET_F16FML"
6550 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6551 [(set_attr "type" "neon_fp_mul_s")]
6552 )
6553
6554 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6555 [(set (match_operand:V4SF 0 "register_operand" "=w")
6556 (fma:V4SF
6557 (float_extend:V4SF
6558 (neg:V4HF
6559 (vec_select:V4HF
6560 (match_operand:V8HF 2 "register_operand" "w")
6561 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6562 (float_extend:V4SF
6563 (vec_duplicate:V4HF
6564 (vec_select:HF
6565 (match_operand:V8HF 3 "register_operand" "x")
6566 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6567 (match_operand:V4SF 1 "register_operand" "0")))]
6568 "TARGET_F16FML"
6569 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6570 [(set_attr "type" "neon_fp_mul_s")]
6571 )
6572
6573 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6574 [(set (match_operand:V4SF 0 "register_operand" "=w")
6575 (fma:V4SF
6576 (float_extend:V4SF
6577 (vec_select:V4HF
6578 (match_operand:V8HF 2 "register_operand" "w")
6579 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6580 (float_extend:V4SF
6581 (vec_duplicate:V4HF
6582 (vec_select:HF
6583 (match_operand:V8HF 3 "register_operand" "x")
6584 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6585 (match_operand:V4SF 1 "register_operand" "0")))]
6586 "TARGET_F16FML"
6587 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6588 [(set_attr "type" "neon_fp_mul_s")]
6589 )
6590
6591 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6592 [(set (match_operand:V4SF 0 "register_operand" "=w")
6593 (fma:V4SF
6594 (float_extend:V4SF
6595 (neg:V4HF
6596 (vec_select:V4HF
6597 (match_operand:V8HF 2 "register_operand" "w")
6598 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6599 (float_extend:V4SF
6600 (vec_duplicate:V4HF
6601 (vec_select:HF
6602 (match_operand:V8HF 3 "register_operand" "x")
6603 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6604 (match_operand:V4SF 1 "register_operand" "0")))]
6605 "TARGET_F16FML"
6606 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6607 [(set_attr "type" "neon_fp_mul_s")]
6608 )
6609
6610 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6611 [(set (match_operand:V2SF 0 "register_operand" "")
6612 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6613 (match_operand:V4HF 2 "register_operand" "")
6614 (match_operand:V8HF 3 "register_operand" "")
6615 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6616 VFMLA16_LOW))]
6617 "TARGET_F16FML"
6618 {
6619 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6620 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6621
6622 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6623 operands[1],
6624 operands[2],
6625 operands[3],
6626 p1, lane));
6627 DONE;
6628
6629 })
6630
6631 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6632 [(set (match_operand:V2SF 0 "register_operand" "")
6633 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6634 (match_operand:V4HF 2 "register_operand" "")
6635 (match_operand:V8HF 3 "register_operand" "")
6636 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6637 VFMLA16_HIGH))]
6638 "TARGET_F16FML"
6639 {
6640 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6641 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6642
6643 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6644 operands[1],
6645 operands[2],
6646 operands[3],
6647 p1, lane));
6648 DONE;
6649
6650 })
6651
6652 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6653 [(set (match_operand:V2SF 0 "register_operand" "=w")
6654 (fma:V2SF
6655 (float_extend:V2SF
6656 (vec_select:V2HF
6657 (match_operand:V4HF 2 "register_operand" "w")
6658 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6659 (float_extend:V2SF
6660 (vec_duplicate:V2HF
6661 (vec_select:HF
6662 (match_operand:V8HF 3 "register_operand" "x")
6663 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664 (match_operand:V2SF 1 "register_operand" "0")))]
6665 "TARGET_F16FML"
6666 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6667 [(set_attr "type" "neon_fp_mul_s")]
6668 )
6669
6670 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6671 [(set (match_operand:V2SF 0 "register_operand" "=w")
6672 (fma:V2SF
6673 (float_extend:V2SF
6674 (neg:V2HF
6675 (vec_select:V2HF
6676 (match_operand:V4HF 2 "register_operand" "w")
6677 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6678 (float_extend:V2SF
6679 (vec_duplicate:V2HF
6680 (vec_select:HF
6681 (match_operand:V8HF 3 "register_operand" "x")
6682 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683 (match_operand:V2SF 1 "register_operand" "0")))]
6684 "TARGET_F16FML"
6685 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6686 [(set_attr "type" "neon_fp_mul_s")]
6687 )
6688
6689 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6690 [(set (match_operand:V2SF 0 "register_operand" "=w")
6691 (fma:V2SF
6692 (float_extend:V2SF
6693 (vec_select:V2HF
6694 (match_operand:V4HF 2 "register_operand" "w")
6695 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6696 (float_extend:V2SF
6697 (vec_duplicate:V2HF
6698 (vec_select:HF
6699 (match_operand:V8HF 3 "register_operand" "x")
6700 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6701 (match_operand:V2SF 1 "register_operand" "0")))]
6702 "TARGET_F16FML"
6703 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6704 [(set_attr "type" "neon_fp_mul_s")]
6705 )
6706
6707 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6708 [(set (match_operand:V2SF 0 "register_operand" "=w")
6709 (fma:V2SF
6710 (float_extend:V2SF
6711 (neg:V2HF
6712 (vec_select:V2HF
6713 (match_operand:V4HF 2 "register_operand" "w")
6714 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6715 (float_extend:V2SF
6716 (vec_duplicate:V2HF
6717 (vec_select:HF
6718 (match_operand:V8HF 3 "register_operand" "x")
6719 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6720 (match_operand:V2SF 1 "register_operand" "0")))]
6721 "TARGET_F16FML"
6722 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6723 [(set_attr "type" "neon_fp_mul_s")]
6724 )
6725
6726 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6727 [(set (match_operand:V4SF 0 "register_operand" "")
6728 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6729 (match_operand:V8HF 2 "register_operand" "")
6730 (match_operand:V4HF 3 "register_operand" "")
6731 (match_operand:SI 4 "aarch64_imm2" "")]
6732 VFMLA16_LOW))]
6733 "TARGET_F16FML"
6734 {
6735 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6736 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6737
6738 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6739 operands[1],
6740 operands[2],
6741 operands[3],
6742 p1, lane));
6743 DONE;
6744 })
6745
6746 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6747 [(set (match_operand:V4SF 0 "register_operand" "")
6748 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6749 (match_operand:V8HF 2 "register_operand" "")
6750 (match_operand:V4HF 3 "register_operand" "")
6751 (match_operand:SI 4 "aarch64_imm2" "")]
6752 VFMLA16_HIGH))]
6753 "TARGET_F16FML"
6754 {
6755 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6756 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6757
6758 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6759 operands[1],
6760 operands[2],
6761 operands[3],
6762 p1, lane));
6763 DONE;
6764 })
6765
6766 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6767 [(set (match_operand:V4SF 0 "register_operand" "=w")
6768 (fma:V4SF
6769 (float_extend:V4SF
6770 (vec_select:V4HF
6771 (match_operand:V8HF 2 "register_operand" "w")
6772 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6773 (float_extend:V4SF
6774 (vec_duplicate:V4HF
6775 (vec_select:HF
6776 (match_operand:V4HF 3 "register_operand" "x")
6777 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6778 (match_operand:V4SF 1 "register_operand" "0")))]
6779 "TARGET_F16FML"
6780 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6781 [(set_attr "type" "neon_fp_mul_s")]
6782 )
6783
6784 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6785 [(set (match_operand:V4SF 0 "register_operand" "=w")
6786 (fma:V4SF
6787 (float_extend:V4SF
6788 (neg:V4HF
6789 (vec_select:V4HF
6790 (match_operand:V8HF 2 "register_operand" "w")
6791 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6792 (float_extend:V4SF
6793 (vec_duplicate:V4HF
6794 (vec_select:HF
6795 (match_operand:V4HF 3 "register_operand" "x")
6796 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6797 (match_operand:V4SF 1 "register_operand" "0")))]
6798 "TARGET_F16FML"
6799 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6800 [(set_attr "type" "neon_fp_mul_s")]
6801 )
6802
6803 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6804 [(set (match_operand:V4SF 0 "register_operand" "=w")
6805 (fma:V4SF
6806 (float_extend:V4SF
6807 (vec_select:V4HF
6808 (match_operand:V8HF 2 "register_operand" "w")
6809 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6810 (float_extend:V4SF
6811 (vec_duplicate:V4HF
6812 (vec_select:HF
6813 (match_operand:V4HF 3 "register_operand" "x")
6814 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6815 (match_operand:V4SF 1 "register_operand" "0")))]
6816 "TARGET_F16FML"
6817 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6818 [(set_attr "type" "neon_fp_mul_s")]
6819 )
6820
6821 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6822 [(set (match_operand:V4SF 0 "register_operand" "=w")
6823 (fma:V4SF
6824 (float_extend:V4SF
6825 (neg:V4HF
6826 (vec_select:V4HF
6827 (match_operand:V8HF 2 "register_operand" "w")
6828 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6829 (float_extend:V4SF
6830 (vec_duplicate:V4HF
6831 (vec_select:HF
6832 (match_operand:V4HF 3 "register_operand" "x")
6833 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6834 (match_operand:V4SF 1 "register_operand" "0")))]
6835 "TARGET_F16FML"
6836 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6837 [(set_attr "type" "neon_fp_mul_s")]
6838 )
6839
6840 ;; pmull
6841
6842 (define_insn "aarch64_crypto_pmulldi"
6843 [(set (match_operand:TI 0 "register_operand" "=w")
6844 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6845 (match_operand:DI 2 "register_operand" "w")]
6846 UNSPEC_PMULL))]
6847 "TARGET_SIMD && TARGET_AES"
6848 "pmull\\t%0.1q, %1.1d, %2.1d"
6849 [(set_attr "type" "crypto_pmull")]
6850 )
6851
6852 (define_insn "aarch64_crypto_pmullv2di"
6853 [(set (match_operand:TI 0 "register_operand" "=w")
6854 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6855 (match_operand:V2DI 2 "register_operand" "w")]
6856 UNSPEC_PMULL2))]
6857 "TARGET_SIMD && TARGET_AES"
6858 "pmull2\\t%0.1q, %1.2d, %2.2d"
6859 [(set_attr "type" "crypto_pmull")]
6860 )