]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-simd.md
[AArch64] Fix ICE due to store_pair_lanes
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((GET_MODE_SIZE (<MODE>mode) == 16
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || GET_MODE_SIZE (<MODE>mode) == 8)))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1],
124 <MODE>mode, 64);
125 default: gcc_unreachable ();
126 }
127 }
128 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
129 neon_logic<q>, neon_to_gp<q>, f_mcr,\
130 mov_reg, neon_move<q>")]
131 )
132
133 (define_insn "*aarch64_simd_mov<VQ:mode>"
134 [(set (match_operand:VQ 0 "nonimmediate_operand"
135 "=w, Umq, m, w, ?r, ?w, ?r, w")
136 (match_operand:VQ 1 "general_operand"
137 "m, Dz, w, w, w, r, r, Dn"))]
138 "TARGET_SIMD
139 && (register_operand (operands[0], <MODE>mode)
140 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 {
142 switch (which_alternative)
143 {
144 case 0:
145 return "ldr\t%q0, %1";
146 case 1:
147 return "stp\txzr, xzr, %0";
148 case 2:
149 return "str\t%q1, %0";
150 case 3:
151 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
152 case 4:
153 case 5:
154 case 6:
155 return "#";
156 case 7:
157 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
158 default:
159 gcc_unreachable ();
160 }
161 }
162 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
163 neon_logic<q>, multiple, multiple,\
164 multiple, neon_move<q>")
165 (set_attr "length" "4,4,4,4,8,8,8,4")]
166 )
167
168 ;; When storing lane zero we can use the normal STR and its more permissive
169 ;; addressing modes.
170
171 (define_insn "aarch64_store_lane0<mode>"
172 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
173 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
174 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 "TARGET_SIMD
176 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
177 "str\\t%<Vetype>1, %0"
178 [(set_attr "type" "neon_store1_1reg<q>")]
179 )
180
181 (define_insn "load_pair<mode>"
182 [(set (match_operand:VD 0 "register_operand" "=w")
183 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
184 (set (match_operand:VD 2 "register_operand" "=w")
185 (match_operand:VD 3 "memory_operand" "m"))]
186 "TARGET_SIMD
187 && rtx_equal_p (XEXP (operands[3], 0),
188 plus_constant (Pmode,
189 XEXP (operands[1], 0),
190 GET_MODE_SIZE (<MODE>mode)))"
191 "ldp\\t%d0, %d2, %1"
192 [(set_attr "type" "neon_ldp")]
193 )
194
195 (define_insn "store_pair<mode>"
196 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
197 (match_operand:VD 1 "register_operand" "w"))
198 (set (match_operand:VD 2 "memory_operand" "=m")
199 (match_operand:VD 3 "register_operand" "w"))]
200 "TARGET_SIMD
201 && rtx_equal_p (XEXP (operands[2], 0),
202 plus_constant (Pmode,
203 XEXP (operands[0], 0),
204 GET_MODE_SIZE (<MODE>mode)))"
205 "stp\\t%d1, %d3, %0"
206 [(set_attr "type" "neon_stp")]
207 )
208
209 (define_split
210 [(set (match_operand:VQ 0 "register_operand" "")
211 (match_operand:VQ 1 "register_operand" ""))]
212 "TARGET_SIMD && reload_completed
213 && GP_REGNUM_P (REGNO (operands[0]))
214 && GP_REGNUM_P (REGNO (operands[1]))"
215 [(const_int 0)]
216 {
217 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
218 DONE;
219 })
220
221 (define_split
222 [(set (match_operand:VQ 0 "register_operand" "")
223 (match_operand:VQ 1 "register_operand" ""))]
224 "TARGET_SIMD && reload_completed
225 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
226 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
227 [(const_int 0)]
228 {
229 aarch64_split_simd_move (operands[0], operands[1]);
230 DONE;
231 })
232
233 (define_expand "aarch64_split_simd_mov<mode>"
234 [(set (match_operand:VQ 0)
235 (match_operand:VQ 1))]
236 "TARGET_SIMD"
237 {
238 rtx dst = operands[0];
239 rtx src = operands[1];
240
241 if (GP_REGNUM_P (REGNO (src)))
242 {
243 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
244 rtx src_high_part = gen_highpart (<VHALF>mode, src);
245
246 emit_insn
247 (gen_move_lo_quad_<mode> (dst, src_low_part));
248 emit_insn
249 (gen_move_hi_quad_<mode> (dst, src_high_part));
250 }
251
252 else
253 {
254 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
255 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
256 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
257 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
258
259 emit_insn
260 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
261 emit_insn
262 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
263 }
264 DONE;
265 }
266 )
267
268 (define_insn "aarch64_simd_mov_from_<mode>low"
269 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
270 (vec_select:<VHALF>
271 (match_operand:VQ 1 "register_operand" "w")
272 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
273 "TARGET_SIMD && reload_completed"
274 "umov\t%0, %1.d[0]"
275 [(set_attr "type" "neon_to_gp<q>")
276 (set_attr "length" "4")
277 ])
278
279 (define_insn "aarch64_simd_mov_from_<mode>high"
280 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
281 (vec_select:<VHALF>
282 (match_operand:VQ 1 "register_operand" "w")
283 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
284 "TARGET_SIMD && reload_completed"
285 "umov\t%0, %1.d[1]"
286 [(set_attr "type" "neon_to_gp<q>")
287 (set_attr "length" "4")
288 ])
289
290 (define_insn "orn<mode>3"
291 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
292 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
293 (match_operand:VDQ_I 2 "register_operand" "w")))]
294 "TARGET_SIMD"
295 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
296 [(set_attr "type" "neon_logic<q>")]
297 )
298
299 (define_insn "bic<mode>3"
300 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
301 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
302 (match_operand:VDQ_I 2 "register_operand" "w")))]
303 "TARGET_SIMD"
304 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
305 [(set_attr "type" "neon_logic<q>")]
306 )
307
308 (define_insn "add<mode>3"
309 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
310 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
311 (match_operand:VDQ_I 2 "register_operand" "w")))]
312 "TARGET_SIMD"
313 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
314 [(set_attr "type" "neon_add<q>")]
315 )
316
317 (define_insn "sub<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
323 [(set_attr "type" "neon_sub<q>")]
324 )
325
326 (define_insn "mul<mode>3"
327 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
328 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
329 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
332 [(set_attr "type" "neon_mul_<Vetype><q>")]
333 )
334
335 (define_insn "bswap<mode>2"
336 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
337 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
338 "TARGET_SIMD"
339 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
340 [(set_attr "type" "neon_rev<q>")]
341 )
342
343 (define_insn "aarch64_rbit<mode>"
344 [(set (match_operand:VB 0 "register_operand" "=w")
345 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
346 UNSPEC_RBIT))]
347 "TARGET_SIMD"
348 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
349 [(set_attr "type" "neon_rbit")]
350 )
351
352 (define_expand "ctz<mode>2"
353 [(set (match_operand:VS 0 "register_operand")
354 (ctz:VS (match_operand:VS 1 "register_operand")))]
355 "TARGET_SIMD"
356 {
357 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
358 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
359 <MODE>mode, 0);
360 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
361 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
362 DONE;
363 }
364 )
365
366 (define_expand "xorsign<mode>3"
367 [(match_operand:VHSDF 0 "register_operand")
368 (match_operand:VHSDF 1 "register_operand")
369 (match_operand:VHSDF 2 "register_operand")]
370 "TARGET_SIMD"
371 {
372
373 machine_mode imode = <V_INT_EQUIV>mode;
374 rtx v_bitmask = gen_reg_rtx (imode);
375 rtx op1x = gen_reg_rtx (imode);
376 rtx op2x = gen_reg_rtx (imode);
377
378 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
379 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
380
381 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
382
383 emit_move_insn (v_bitmask,
384 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
385 HOST_WIDE_INT_M1U << bits));
386
387 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
388 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
389 emit_move_insn (operands[0],
390 lowpart_subreg (<MODE>mode, op1x, imode));
391 DONE;
392 }
393 )
394
395 ;; These instructions map to the __builtins for the Dot Product operations.
396 (define_insn "aarch64_<sur>dot<vsi2qi>"
397 [(set (match_operand:VS 0 "register_operand" "=w")
398 (plus:VS (match_operand:VS 1 "register_operand" "0")
399 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
400 (match_operand:<VSI2QI> 3 "register_operand" "w")]
401 DOTPROD)))]
402 "TARGET_DOTPROD"
403 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
404 [(set_attr "type" "neon_dot")]
405 )
406
407 ;; These expands map to the Dot Product optab the vectorizer checks for.
408 ;; The auto-vectorizer expects a dot product builtin that also does an
409 ;; accumulation into the provided register.
410 ;; Given the following pattern
411 ;;
412 ;; for (i=0; i<len; i++) {
413 ;; c = a[i] * b[i];
414 ;; r += c;
415 ;; }
416 ;; return result;
417 ;;
418 ;; This can be auto-vectorized to
419 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
420 ;;
421 ;; given enough iterations. However the vectorizer can keep unrolling the loop
422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
424 ;; ...
425 ;;
426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
427 (define_expand "<sur>dot_prod<vsi2qi>"
428 [(set (match_operand:VS 0 "register_operand")
429 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
430 (match_operand:<VSI2QI> 2 "register_operand")]
431 DOTPROD)
432 (match_operand:VS 3 "register_operand")))]
433 "TARGET_DOTPROD"
434 {
435 emit_insn (
436 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
437 operands[2]));
438 emit_insn (gen_rtx_SET (operands[0], operands[3]));
439 DONE;
440 })
441
442 ;; These instructions map to the __builtins for the Dot Product
443 ;; indexed operations.
444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
445 [(set (match_operand:VS 0 "register_operand" "=w")
446 (plus:VS (match_operand:VS 1 "register_operand" "0")
447 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
448 (match_operand:V8QI 3 "register_operand" "<h_con>")
449 (match_operand:SI 4 "immediate_operand" "i")]
450 DOTPROD)))]
451 "TARGET_DOTPROD"
452 {
453 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
454 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
455 }
456 [(set_attr "type" "neon_dot")]
457 )
458
459 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
460 [(set (match_operand:VS 0 "register_operand" "=w")
461 (plus:VS (match_operand:VS 1 "register_operand" "0")
462 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
463 (match_operand:V16QI 3 "register_operand" "<h_con>")
464 (match_operand:SI 4 "immediate_operand" "i")]
465 DOTPROD)))]
466 "TARGET_DOTPROD"
467 {
468 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
469 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
470 }
471 [(set_attr "type" "neon_dot")]
472 )
473
474 (define_expand "copysign<mode>3"
475 [(match_operand:VHSDF 0 "register_operand")
476 (match_operand:VHSDF 1 "register_operand")
477 (match_operand:VHSDF 2 "register_operand")]
478 "TARGET_FLOAT && TARGET_SIMD"
479 {
480 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
481 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
482
483 emit_move_insn (v_bitmask,
484 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
485 HOST_WIDE_INT_M1U << bits));
486 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
487 operands[2], operands[1]));
488 DONE;
489 }
490 )
491
492 (define_insn "*aarch64_mul3_elt<mode>"
493 [(set (match_operand:VMUL 0 "register_operand" "=w")
494 (mult:VMUL
495 (vec_duplicate:VMUL
496 (vec_select:<VEL>
497 (match_operand:VMUL 1 "register_operand" "<h_con>")
498 (parallel [(match_operand:SI 2 "immediate_operand")])))
499 (match_operand:VMUL 3 "register_operand" "w")))]
500 "TARGET_SIMD"
501 {
502 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
503 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
504 }
505 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
506 )
507
508 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
509 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
510 (mult:VMUL_CHANGE_NLANES
511 (vec_duplicate:VMUL_CHANGE_NLANES
512 (vec_select:<VEL>
513 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
514 (parallel [(match_operand:SI 2 "immediate_operand")])))
515 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
516 "TARGET_SIMD"
517 {
518 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
519 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
520 }
521 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
522 )
523
524 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
525 [(set (match_operand:VMUL 0 "register_operand" "=w")
526 (mult:VMUL
527 (vec_duplicate:VMUL
528 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
529 (match_operand:VMUL 2 "register_operand" "w")))]
530 "TARGET_SIMD"
531 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
533 )
534
535 (define_insn "aarch64_rsqrte<mode>"
536 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
537 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
538 UNSPEC_RSQRTE))]
539 "TARGET_SIMD"
540 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
541 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
542
543 (define_insn "aarch64_rsqrts<mode>"
544 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
545 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
546 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
547 UNSPEC_RSQRTS))]
548 "TARGET_SIMD"
549 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
550 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
551
552 (define_expand "rsqrt<mode>2"
553 [(set (match_operand:VALLF 0 "register_operand" "=w")
554 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
555 UNSPEC_RSQRT))]
556 "TARGET_SIMD"
557 {
558 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
559 DONE;
560 })
561
562 (define_insn "*aarch64_mul3_elt_to_64v2df"
563 [(set (match_operand:DF 0 "register_operand" "=w")
564 (mult:DF
565 (vec_select:DF
566 (match_operand:V2DF 1 "register_operand" "w")
567 (parallel [(match_operand:SI 2 "immediate_operand")]))
568 (match_operand:DF 3 "register_operand" "w")))]
569 "TARGET_SIMD"
570 {
571 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
572 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
573 }
574 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
575 )
576
577 (define_insn "neg<mode>2"
578 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
579 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
580 "TARGET_SIMD"
581 "neg\t%0.<Vtype>, %1.<Vtype>"
582 [(set_attr "type" "neon_neg<q>")]
583 )
584
585 (define_insn "abs<mode>2"
586 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
587 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
588 "TARGET_SIMD"
589 "abs\t%0.<Vtype>, %1.<Vtype>"
590 [(set_attr "type" "neon_abs<q>")]
591 )
592
593 ;; The intrinsic version of integer ABS must not be allowed to
594 ;; combine with any operation with an integerated ABS step, such
595 ;; as SABD.
596 (define_insn "aarch64_abs<mode>"
597 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
598 (unspec:VSDQ_I_DI
599 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
600 UNSPEC_ABS))]
601 "TARGET_SIMD"
602 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
603 [(set_attr "type" "neon_abs<q>")]
604 )
605
606 (define_insn "abd<mode>_3"
607 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
608 (abs:VDQ_BHSI (minus:VDQ_BHSI
609 (match_operand:VDQ_BHSI 1 "register_operand" "w")
610 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
611 "TARGET_SIMD"
612 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
613 [(set_attr "type" "neon_abd<q>")]
614 )
615
616 (define_insn "aba<mode>_3"
617 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
618 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
619 (match_operand:VDQ_BHSI 1 "register_operand" "w")
620 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
621 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
622 "TARGET_SIMD"
623 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
624 [(set_attr "type" "neon_arith_acc<q>")]
625 )
626
627 (define_insn "fabd<mode>3"
628 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
629 (abs:VHSDF_HSDF
630 (minus:VHSDF_HSDF
631 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
632 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
633 "TARGET_SIMD"
634 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
635 [(set_attr "type" "neon_fp_abd_<stype><q>")]
636 )
637
638 ;; For AND (vector, register) and BIC (vector, immediate)
639 (define_insn "and<mode>3"
640 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
641 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
642 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
643 "TARGET_SIMD"
644 {
645 switch (which_alternative)
646 {
647 case 0:
648 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
649 case 1:
650 return aarch64_output_simd_mov_immediate (operands[2],
651 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
652 default:
653 gcc_unreachable ();
654 }
655 }
656 [(set_attr "type" "neon_logic<q>")]
657 )
658
659 ;; For ORR (vector, register) and ORR (vector, immediate)
660 (define_insn "ior<mode>3"
661 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
662 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
663 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
664 "TARGET_SIMD"
665 {
666 switch (which_alternative)
667 {
668 case 0:
669 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
670 case 1:
671 return aarch64_output_simd_mov_immediate (operands[2],
672 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
673 default:
674 gcc_unreachable ();
675 }
676 }
677 [(set_attr "type" "neon_logic<q>")]
678 )
679
680 (define_insn "xor<mode>3"
681 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
682 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
683 (match_operand:VDQ_I 2 "register_operand" "w")))]
684 "TARGET_SIMD"
685 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
686 [(set_attr "type" "neon_logic<q>")]
687 )
688
689 (define_insn "one_cmpl<mode>2"
690 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
691 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
692 "TARGET_SIMD"
693 "not\t%0.<Vbtype>, %1.<Vbtype>"
694 [(set_attr "type" "neon_logic<q>")]
695 )
696
697 (define_insn "aarch64_simd_vec_set<mode>"
698 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
699 (vec_merge:VDQ_BHSI
700 (vec_duplicate:VDQ_BHSI
701 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
702 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
703 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
704 "TARGET_SIMD"
705 {
706 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
707 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
708 switch (which_alternative)
709 {
710 case 0:
711 return "ins\\t%0.<Vetype>[%p2], %w1";
712 case 1:
713 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
714 case 2:
715 return "ld1\\t{%0.<Vetype>}[%p2], %1";
716 default:
717 gcc_unreachable ();
718 }
719 }
720 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
721 )
722
723 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
724 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
725 (vec_merge:VALL_F16
726 (vec_duplicate:VALL_F16
727 (vec_select:<VEL>
728 (match_operand:VALL_F16 3 "register_operand" "w")
729 (parallel
730 [(match_operand:SI 4 "immediate_operand" "i")])))
731 (match_operand:VALL_F16 1 "register_operand" "0")
732 (match_operand:SI 2 "immediate_operand" "i")))]
733 "TARGET_SIMD"
734 {
735 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
736 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
737 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
738
739 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
740 }
741 [(set_attr "type" "neon_ins<q>")]
742 )
743
744 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
745 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
746 (vec_merge:VALL_F16_NO_V2Q
747 (vec_duplicate:VALL_F16_NO_V2Q
748 (vec_select:<VEL>
749 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
750 (parallel
751 [(match_operand:SI 4 "immediate_operand" "i")])))
752 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
753 (match_operand:SI 2 "immediate_operand" "i")))]
754 "TARGET_SIMD"
755 {
756 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
757 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
758 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
759 INTVAL (operands[4]));
760
761 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
762 }
763 [(set_attr "type" "neon_ins<q>")]
764 )
765
766 (define_insn "aarch64_simd_lshr<mode>"
767 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
768 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
769 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
770 "TARGET_SIMD"
771 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
772 [(set_attr "type" "neon_shift_imm<q>")]
773 )
774
775 (define_insn "aarch64_simd_ashr<mode>"
776 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
777 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
778 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
779 "TARGET_SIMD"
780 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
781 [(set_attr "type" "neon_shift_imm<q>")]
782 )
783
784 (define_insn "aarch64_simd_imm_shl<mode>"
785 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
786 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
787 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
788 "TARGET_SIMD"
789 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
790 [(set_attr "type" "neon_shift_imm<q>")]
791 )
792
793 (define_insn "aarch64_simd_reg_sshl<mode>"
794 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
795 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
796 (match_operand:VDQ_I 2 "register_operand" "w")))]
797 "TARGET_SIMD"
798 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
799 [(set_attr "type" "neon_shift_reg<q>")]
800 )
801
802 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
803 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
804 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
805 (match_operand:VDQ_I 2 "register_operand" "w")]
806 UNSPEC_ASHIFT_UNSIGNED))]
807 "TARGET_SIMD"
808 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
809 [(set_attr "type" "neon_shift_reg<q>")]
810 )
811
812 (define_insn "aarch64_simd_reg_shl<mode>_signed"
813 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
814 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
815 (match_operand:VDQ_I 2 "register_operand" "w")]
816 UNSPEC_ASHIFT_SIGNED))]
817 "TARGET_SIMD"
818 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
819 [(set_attr "type" "neon_shift_reg<q>")]
820 )
821
822 (define_expand "ashl<mode>3"
823 [(match_operand:VDQ_I 0 "register_operand" "")
824 (match_operand:VDQ_I 1 "register_operand" "")
825 (match_operand:SI 2 "general_operand" "")]
826 "TARGET_SIMD"
827 {
828 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
829 int shift_amount;
830
831 if (CONST_INT_P (operands[2]))
832 {
833 shift_amount = INTVAL (operands[2]);
834 if (shift_amount >= 0 && shift_amount < bit_width)
835 {
836 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
837 shift_amount);
838 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
839 operands[1],
840 tmp));
841 DONE;
842 }
843 else
844 {
845 operands[2] = force_reg (SImode, operands[2]);
846 }
847 }
848 else if (MEM_P (operands[2]))
849 {
850 operands[2] = force_reg (SImode, operands[2]);
851 }
852
853 if (REG_P (operands[2]))
854 {
855 rtx tmp = gen_reg_rtx (<MODE>mode);
856 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
857 convert_to_mode (<VEL>mode,
858 operands[2],
859 0)));
860 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
861 tmp));
862 DONE;
863 }
864 else
865 FAIL;
866 }
867 )
868
869 (define_expand "lshr<mode>3"
870 [(match_operand:VDQ_I 0 "register_operand" "")
871 (match_operand:VDQ_I 1 "register_operand" "")
872 (match_operand:SI 2 "general_operand" "")]
873 "TARGET_SIMD"
874 {
875 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
876 int shift_amount;
877
878 if (CONST_INT_P (operands[2]))
879 {
880 shift_amount = INTVAL (operands[2]);
881 if (shift_amount > 0 && shift_amount <= bit_width)
882 {
883 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
884 shift_amount);
885 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
886 operands[1],
887 tmp));
888 DONE;
889 }
890 else
891 operands[2] = force_reg (SImode, operands[2]);
892 }
893 else if (MEM_P (operands[2]))
894 {
895 operands[2] = force_reg (SImode, operands[2]);
896 }
897
898 if (REG_P (operands[2]))
899 {
900 rtx tmp = gen_reg_rtx (SImode);
901 rtx tmp1 = gen_reg_rtx (<MODE>mode);
902 emit_insn (gen_negsi2 (tmp, operands[2]));
903 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
904 convert_to_mode (<VEL>mode,
905 tmp, 0)));
906 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
907 operands[1],
908 tmp1));
909 DONE;
910 }
911 else
912 FAIL;
913 }
914 )
915
916 (define_expand "ashr<mode>3"
917 [(match_operand:VDQ_I 0 "register_operand" "")
918 (match_operand:VDQ_I 1 "register_operand" "")
919 (match_operand:SI 2 "general_operand" "")]
920 "TARGET_SIMD"
921 {
922 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
923 int shift_amount;
924
925 if (CONST_INT_P (operands[2]))
926 {
927 shift_amount = INTVAL (operands[2]);
928 if (shift_amount > 0 && shift_amount <= bit_width)
929 {
930 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
931 shift_amount);
932 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
933 operands[1],
934 tmp));
935 DONE;
936 }
937 else
938 operands[2] = force_reg (SImode, operands[2]);
939 }
940 else if (MEM_P (operands[2]))
941 {
942 operands[2] = force_reg (SImode, operands[2]);
943 }
944
945 if (REG_P (operands[2]))
946 {
947 rtx tmp = gen_reg_rtx (SImode);
948 rtx tmp1 = gen_reg_rtx (<MODE>mode);
949 emit_insn (gen_negsi2 (tmp, operands[2]));
950 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
951 convert_to_mode (<VEL>mode,
952 tmp, 0)));
953 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
954 operands[1],
955 tmp1));
956 DONE;
957 }
958 else
959 FAIL;
960 }
961 )
962
963 (define_expand "vashl<mode>3"
964 [(match_operand:VDQ_I 0 "register_operand" "")
965 (match_operand:VDQ_I 1 "register_operand" "")
966 (match_operand:VDQ_I 2 "register_operand" "")]
967 "TARGET_SIMD"
968 {
969 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
970 operands[2]));
971 DONE;
972 })
973
974 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
975 ;; Negating individual lanes most certainly offsets the
976 ;; gain from vectorization.
977 (define_expand "vashr<mode>3"
978 [(match_operand:VDQ_BHSI 0 "register_operand" "")
979 (match_operand:VDQ_BHSI 1 "register_operand" "")
980 (match_operand:VDQ_BHSI 2 "register_operand" "")]
981 "TARGET_SIMD"
982 {
983 rtx neg = gen_reg_rtx (<MODE>mode);
984 emit (gen_neg<mode>2 (neg, operands[2]));
985 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
986 neg));
987 DONE;
988 })
989
990 ;; DI vector shift
991 (define_expand "aarch64_ashr_simddi"
992 [(match_operand:DI 0 "register_operand" "=w")
993 (match_operand:DI 1 "register_operand" "w")
994 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
995 "TARGET_SIMD"
996 {
997 /* An arithmetic shift right by 64 fills the result with copies of the sign
998 bit, just like asr by 63 - however the standard pattern does not handle
999 a shift by 64. */
1000 if (INTVAL (operands[2]) == 64)
1001 operands[2] = GEN_INT (63);
1002 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1003 DONE;
1004 }
1005 )
1006
1007 (define_expand "vlshr<mode>3"
1008 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1009 (match_operand:VDQ_BHSI 1 "register_operand" "")
1010 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1011 "TARGET_SIMD"
1012 {
1013 rtx neg = gen_reg_rtx (<MODE>mode);
1014 emit (gen_neg<mode>2 (neg, operands[2]));
1015 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1016 neg));
1017 DONE;
1018 })
1019
1020 (define_expand "aarch64_lshr_simddi"
1021 [(match_operand:DI 0 "register_operand" "=w")
1022 (match_operand:DI 1 "register_operand" "w")
1023 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1024 "TARGET_SIMD"
1025 {
1026 if (INTVAL (operands[2]) == 64)
1027 emit_move_insn (operands[0], const0_rtx);
1028 else
1029 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1030 DONE;
1031 }
1032 )
1033
1034 (define_expand "vec_set<mode>"
1035 [(match_operand:VDQ_BHSI 0 "register_operand")
1036 (match_operand:<VEL> 1 "register_operand")
1037 (match_operand:SI 2 "immediate_operand")]
1038 "TARGET_SIMD"
1039 {
1040 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1041 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1042 GEN_INT (elem), operands[0]));
1043 DONE;
1044 }
1045 )
1046
1047 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1048 (define_insn "vec_shr_<mode>"
1049 [(set (match_operand:VD 0 "register_operand" "=w")
1050 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1051 (match_operand:SI 2 "immediate_operand" "i")]
1052 UNSPEC_VEC_SHR))]
1053 "TARGET_SIMD"
1054 {
1055 if (BYTES_BIG_ENDIAN)
1056 return "shl %d0, %d1, %2";
1057 else
1058 return "ushr %d0, %d1, %2";
1059 }
1060 [(set_attr "type" "neon_shift_imm")]
1061 )
1062
1063 (define_insn "aarch64_simd_vec_setv2di"
1064 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1065 (vec_merge:V2DI
1066 (vec_duplicate:V2DI
1067 (match_operand:DI 1 "register_operand" "r,w"))
1068 (match_operand:V2DI 3 "register_operand" "0,0")
1069 (match_operand:SI 2 "immediate_operand" "i,i")))]
1070 "TARGET_SIMD"
1071 {
1072 int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1073 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1074 switch (which_alternative)
1075 {
1076 case 0:
1077 return "ins\\t%0.d[%p2], %1";
1078 case 1:
1079 return "ins\\t%0.d[%p2], %1.d[0]";
1080 default:
1081 gcc_unreachable ();
1082 }
1083 }
1084 [(set_attr "type" "neon_from_gp, neon_ins_q")]
1085 )
1086
1087 (define_expand "vec_setv2di"
1088 [(match_operand:V2DI 0 "register_operand")
1089 (match_operand:DI 1 "register_operand")
1090 (match_operand:SI 2 "immediate_operand")]
1091 "TARGET_SIMD"
1092 {
1093 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1094 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1095 GEN_INT (elem), operands[0]));
1096 DONE;
1097 }
1098 )
1099
1100 (define_insn "aarch64_simd_vec_set<mode>"
1101 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1102 (vec_merge:VDQF_F16
1103 (vec_duplicate:VDQF_F16
1104 (match_operand:<VEL> 1 "register_operand" "w"))
1105 (match_operand:VDQF_F16 3 "register_operand" "0")
1106 (match_operand:SI 2 "immediate_operand" "i")))]
1107 "TARGET_SIMD"
1108 {
1109 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1110
1111 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1112 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1113 }
1114 [(set_attr "type" "neon_ins<q>")]
1115 )
1116
1117 (define_expand "vec_set<mode>"
1118 [(match_operand:VDQF_F16 0 "register_operand" "+w")
1119 (match_operand:<VEL> 1 "register_operand" "w")
1120 (match_operand:SI 2 "immediate_operand" "")]
1121 "TARGET_SIMD"
1122 {
1123 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1124 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1125 GEN_INT (elem), operands[0]));
1126 DONE;
1127 }
1128 )
1129
1130
1131 (define_insn "aarch64_mla<mode>"
1132 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1133 (plus:VDQ_BHSI (mult:VDQ_BHSI
1134 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1135 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1136 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1137 "TARGET_SIMD"
1138 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1139 [(set_attr "type" "neon_mla_<Vetype><q>")]
1140 )
1141
1142 (define_insn "*aarch64_mla_elt<mode>"
1143 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1144 (plus:VDQHS
1145 (mult:VDQHS
1146 (vec_duplicate:VDQHS
1147 (vec_select:<VEL>
1148 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1149 (parallel [(match_operand:SI 2 "immediate_operand")])))
1150 (match_operand:VDQHS 3 "register_operand" "w"))
1151 (match_operand:VDQHS 4 "register_operand" "0")))]
1152 "TARGET_SIMD"
1153 {
1154 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1155 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1156 }
1157 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1158 )
1159
1160 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1161 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1162 (plus:VDQHS
1163 (mult:VDQHS
1164 (vec_duplicate:VDQHS
1165 (vec_select:<VEL>
1166 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1167 (parallel [(match_operand:SI 2 "immediate_operand")])))
1168 (match_operand:VDQHS 3 "register_operand" "w"))
1169 (match_operand:VDQHS 4 "register_operand" "0")))]
1170 "TARGET_SIMD"
1171 {
1172 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1173 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1174 }
1175 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1176 )
1177
1178 (define_insn "*aarch64_mla_elt_merge<mode>"
1179 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1180 (plus:VDQHS
1181 (mult:VDQHS (vec_duplicate:VDQHS
1182 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1183 (match_operand:VDQHS 2 "register_operand" "w"))
1184 (match_operand:VDQHS 3 "register_operand" "0")))]
1185 "TARGET_SIMD"
1186 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1187 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1188 )
1189
1190 (define_insn "aarch64_mls<mode>"
1191 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1192 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1193 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1194 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1195 "TARGET_SIMD"
1196 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1197 [(set_attr "type" "neon_mla_<Vetype><q>")]
1198 )
1199
1200 (define_insn "*aarch64_mls_elt<mode>"
1201 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1202 (minus:VDQHS
1203 (match_operand:VDQHS 4 "register_operand" "0")
1204 (mult:VDQHS
1205 (vec_duplicate:VDQHS
1206 (vec_select:<VEL>
1207 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1208 (parallel [(match_operand:SI 2 "immediate_operand")])))
1209 (match_operand:VDQHS 3 "register_operand" "w"))))]
1210 "TARGET_SIMD"
1211 {
1212 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1213 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1214 }
1215 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1216 )
1217
1218 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1219 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1220 (minus:VDQHS
1221 (match_operand:VDQHS 4 "register_operand" "0")
1222 (mult:VDQHS
1223 (vec_duplicate:VDQHS
1224 (vec_select:<VEL>
1225 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1226 (parallel [(match_operand:SI 2 "immediate_operand")])))
1227 (match_operand:VDQHS 3 "register_operand" "w"))))]
1228 "TARGET_SIMD"
1229 {
1230 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1231 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1232 }
1233 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1234 )
1235
1236 (define_insn "*aarch64_mls_elt_merge<mode>"
1237 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1238 (minus:VDQHS
1239 (match_operand:VDQHS 1 "register_operand" "0")
1240 (mult:VDQHS (vec_duplicate:VDQHS
1241 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1242 (match_operand:VDQHS 3 "register_operand" "w"))))]
1243 "TARGET_SIMD"
1244 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1245 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1246 )
1247
1248 ;; Max/Min operations.
1249 (define_insn "<su><maxmin><mode>3"
1250 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1251 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1252 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1253 "TARGET_SIMD"
1254 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1255 [(set_attr "type" "neon_minmax<q>")]
1256 )
1257
1258 (define_expand "<su><maxmin>v2di3"
1259 [(set (match_operand:V2DI 0 "register_operand" "")
1260 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1261 (match_operand:V2DI 2 "register_operand" "")))]
1262 "TARGET_SIMD"
1263 {
1264 enum rtx_code cmp_operator;
1265 rtx cmp_fmt;
1266
1267 switch (<CODE>)
1268 {
1269 case UMIN:
1270 cmp_operator = LTU;
1271 break;
1272 case SMIN:
1273 cmp_operator = LT;
1274 break;
1275 case UMAX:
1276 cmp_operator = GTU;
1277 break;
1278 case SMAX:
1279 cmp_operator = GT;
1280 break;
1281 default:
1282 gcc_unreachable ();
1283 }
1284
1285 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1286 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1287 operands[2], cmp_fmt, operands[1], operands[2]));
1288 DONE;
1289 })
1290
1291 ;; Pairwise Integer Max/Min operations.
1292 (define_insn "aarch64_<maxmin_uns>p<mode>"
1293 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1294 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1295 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1296 MAXMINV))]
1297 "TARGET_SIMD"
1298 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1299 [(set_attr "type" "neon_minmax<q>")]
1300 )
1301
1302 ;; Pairwise FP Max/Min operations.
1303 (define_insn "aarch64_<maxmin_uns>p<mode>"
1304 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1305 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1306 (match_operand:VHSDF 2 "register_operand" "w")]
1307 FMAXMINV))]
1308 "TARGET_SIMD"
1309 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1310 [(set_attr "type" "neon_minmax<q>")]
1311 )
1312
1313 ;; vec_concat gives a new vector with the low elements from operand 1, and
1314 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1315 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1316 ;; What that means, is that the RTL descriptions of the below patterns
1317 ;; need to change depending on endianness.
1318
1319 ;; Move to the low architectural bits of the register.
1320 ;; On little-endian this is { operand, zeroes }
1321 ;; On big-endian this is { zeroes, operand }
1322
1323 (define_insn "move_lo_quad_internal_<mode>"
1324 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1325 (vec_concat:VQ_NO2E
1326 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1327 (vec_duplicate:<VHALF> (const_int 0))))]
1328 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1329 "@
1330 dup\\t%d0, %1.d[0]
1331 fmov\\t%d0, %1
1332 dup\\t%d0, %1"
1333 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1334 (set_attr "simd" "yes,*,yes")
1335 (set_attr "fp" "*,yes,*")
1336 (set_attr "length" "4")]
1337 )
1338
1339 (define_insn "move_lo_quad_internal_<mode>"
1340 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1341 (vec_concat:VQ_2E
1342 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1343 (const_int 0)))]
1344 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1345 "@
1346 dup\\t%d0, %1.d[0]
1347 fmov\\t%d0, %1
1348 dup\\t%d0, %1"
1349 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1350 (set_attr "simd" "yes,*,yes")
1351 (set_attr "fp" "*,yes,*")
1352 (set_attr "length" "4")]
1353 )
1354
1355 (define_insn "move_lo_quad_internal_be_<mode>"
1356 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1357 (vec_concat:VQ_NO2E
1358 (vec_duplicate:<VHALF> (const_int 0))
1359 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1360 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1361 "@
1362 dup\\t%d0, %1.d[0]
1363 fmov\\t%d0, %1
1364 dup\\t%d0, %1"
1365 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1366 (set_attr "simd" "yes,*,yes")
1367 (set_attr "fp" "*,yes,*")
1368 (set_attr "length" "4")]
1369 )
1370
1371 (define_insn "move_lo_quad_internal_be_<mode>"
1372 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1373 (vec_concat:VQ_2E
1374 (const_int 0)
1375 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1376 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1377 "@
1378 dup\\t%d0, %1.d[0]
1379 fmov\\t%d0, %1
1380 dup\\t%d0, %1"
1381 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1382 (set_attr "simd" "yes,*,yes")
1383 (set_attr "fp" "*,yes,*")
1384 (set_attr "length" "4")]
1385 )
1386
1387 (define_expand "move_lo_quad_<mode>"
1388 [(match_operand:VQ 0 "register_operand")
1389 (match_operand:VQ 1 "register_operand")]
1390 "TARGET_SIMD"
1391 {
1392 if (BYTES_BIG_ENDIAN)
1393 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1394 else
1395 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1396 DONE;
1397 }
1398 )
1399
1400 ;; Move operand1 to the high architectural bits of the register, keeping
1401 ;; the low architectural bits of operand2.
1402 ;; For little-endian this is { operand2, operand1 }
1403 ;; For big-endian this is { operand1, operand2 }
1404
1405 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1406 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1407 (vec_concat:VQ
1408 (vec_select:<VHALF>
1409 (match_dup 0)
1410 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1411 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1412 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1413 "@
1414 ins\\t%0.d[1], %1.d[0]
1415 ins\\t%0.d[1], %1"
1416 [(set_attr "type" "neon_ins")]
1417 )
1418
1419 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1420 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1421 (vec_concat:VQ
1422 (match_operand:<VHALF> 1 "register_operand" "w,r")
1423 (vec_select:<VHALF>
1424 (match_dup 0)
1425 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1426 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1427 "@
1428 ins\\t%0.d[1], %1.d[0]
1429 ins\\t%0.d[1], %1"
1430 [(set_attr "type" "neon_ins")]
1431 )
1432
1433 (define_expand "move_hi_quad_<mode>"
1434 [(match_operand:VQ 0 "register_operand" "")
1435 (match_operand:<VHALF> 1 "register_operand" "")]
1436 "TARGET_SIMD"
1437 {
1438 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1439 if (BYTES_BIG_ENDIAN)
1440 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1441 operands[1], p));
1442 else
1443 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1444 operands[1], p));
1445 DONE;
1446 })
1447
1448 ;; Narrowing operations.
1449
1450 ;; For doubles.
1451 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1452 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1453 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1454 "TARGET_SIMD"
1455 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1456 [(set_attr "type" "neon_shift_imm_narrow_q")]
1457 )
1458
1459 (define_expand "vec_pack_trunc_<mode>"
1460 [(match_operand:<VNARROWD> 0 "register_operand" "")
1461 (match_operand:VDN 1 "register_operand" "")
1462 (match_operand:VDN 2 "register_operand" "")]
1463 "TARGET_SIMD"
1464 {
1465 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1466 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1467 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1468
1469 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1470 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1471 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1472 DONE;
1473 })
1474
1475 ;; For quads.
1476
1477 (define_insn "vec_pack_trunc_<mode>"
1478 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1479 (vec_concat:<VNARROWQ2>
1480 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1481 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1482 "TARGET_SIMD"
1483 {
1484 if (BYTES_BIG_ENDIAN)
1485 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1486 else
1487 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1488 }
1489 [(set_attr "type" "multiple")
1490 (set_attr "length" "8")]
1491 )
1492
1493 ;; Widening operations.
1494
1495 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1496 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1497 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1498 (match_operand:VQW 1 "register_operand" "w")
1499 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1500 )))]
1501 "TARGET_SIMD"
1502 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1503 [(set_attr "type" "neon_shift_imm_long")]
1504 )
1505
1506 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1507 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1508 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1509 (match_operand:VQW 1 "register_operand" "w")
1510 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1511 )))]
1512 "TARGET_SIMD"
1513 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1514 [(set_attr "type" "neon_shift_imm_long")]
1515 )
1516
1517 (define_expand "vec_unpack<su>_hi_<mode>"
1518 [(match_operand:<VWIDE> 0 "register_operand" "")
1519 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1520 "TARGET_SIMD"
1521 {
1522 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1523 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1524 operands[1], p));
1525 DONE;
1526 }
1527 )
1528
1529 (define_expand "vec_unpack<su>_lo_<mode>"
1530 [(match_operand:<VWIDE> 0 "register_operand" "")
1531 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1532 "TARGET_SIMD"
1533 {
1534 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1535 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1536 operands[1], p));
1537 DONE;
1538 }
1539 )
1540
1541 ;; Widening arithmetic.
1542
1543 (define_insn "*aarch64_<su>mlal_lo<mode>"
1544 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1545 (plus:<VWIDE>
1546 (mult:<VWIDE>
1547 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1548 (match_operand:VQW 2 "register_operand" "w")
1549 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1550 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1551 (match_operand:VQW 4 "register_operand" "w")
1552 (match_dup 3))))
1553 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1554 "TARGET_SIMD"
1555 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1556 [(set_attr "type" "neon_mla_<Vetype>_long")]
1557 )
1558
1559 (define_insn "*aarch64_<su>mlal_hi<mode>"
1560 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1561 (plus:<VWIDE>
1562 (mult:<VWIDE>
1563 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1564 (match_operand:VQW 2 "register_operand" "w")
1565 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1566 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1567 (match_operand:VQW 4 "register_operand" "w")
1568 (match_dup 3))))
1569 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1570 "TARGET_SIMD"
1571 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1572 [(set_attr "type" "neon_mla_<Vetype>_long")]
1573 )
1574
1575 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1576 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1577 (minus:<VWIDE>
1578 (match_operand:<VWIDE> 1 "register_operand" "0")
1579 (mult:<VWIDE>
1580 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581 (match_operand:VQW 2 "register_operand" "w")
1582 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1583 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584 (match_operand:VQW 4 "register_operand" "w")
1585 (match_dup 3))))))]
1586 "TARGET_SIMD"
1587 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1588 [(set_attr "type" "neon_mla_<Vetype>_long")]
1589 )
1590
1591 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1592 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1593 (minus:<VWIDE>
1594 (match_operand:<VWIDE> 1 "register_operand" "0")
1595 (mult:<VWIDE>
1596 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1597 (match_operand:VQW 2 "register_operand" "w")
1598 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1599 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1600 (match_operand:VQW 4 "register_operand" "w")
1601 (match_dup 3))))))]
1602 "TARGET_SIMD"
1603 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1604 [(set_attr "type" "neon_mla_<Vetype>_long")]
1605 )
1606
1607 (define_insn "*aarch64_<su>mlal<mode>"
1608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1609 (plus:<VWIDE>
1610 (mult:<VWIDE>
1611 (ANY_EXTEND:<VWIDE>
1612 (match_operand:VD_BHSI 1 "register_operand" "w"))
1613 (ANY_EXTEND:<VWIDE>
1614 (match_operand:VD_BHSI 2 "register_operand" "w")))
1615 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1616 "TARGET_SIMD"
1617 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1618 [(set_attr "type" "neon_mla_<Vetype>_long")]
1619 )
1620
1621 (define_insn "*aarch64_<su>mlsl<mode>"
1622 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1623 (minus:<VWIDE>
1624 (match_operand:<VWIDE> 1 "register_operand" "0")
1625 (mult:<VWIDE>
1626 (ANY_EXTEND:<VWIDE>
1627 (match_operand:VD_BHSI 2 "register_operand" "w"))
1628 (ANY_EXTEND:<VWIDE>
1629 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1630 "TARGET_SIMD"
1631 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1632 [(set_attr "type" "neon_mla_<Vetype>_long")]
1633 )
1634
1635 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1636 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1637 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1638 (match_operand:VQW 1 "register_operand" "w")
1639 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1640 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1641 (match_operand:VQW 2 "register_operand" "w")
1642 (match_dup 3)))))]
1643 "TARGET_SIMD"
1644 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1645 [(set_attr "type" "neon_mul_<Vetype>_long")]
1646 )
1647
1648 (define_expand "vec_widen_<su>mult_lo_<mode>"
1649 [(match_operand:<VWIDE> 0 "register_operand" "")
1650 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1651 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1652 "TARGET_SIMD"
1653 {
1654 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1655 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1656 operands[1],
1657 operands[2], p));
1658 DONE;
1659 }
1660 )
1661
1662 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1663 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1664 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665 (match_operand:VQW 1 "register_operand" "w")
1666 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668 (match_operand:VQW 2 "register_operand" "w")
1669 (match_dup 3)))))]
1670 "TARGET_SIMD"
1671 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1672 [(set_attr "type" "neon_mul_<Vetype>_long")]
1673 )
1674
1675 (define_expand "vec_widen_<su>mult_hi_<mode>"
1676 [(match_operand:<VWIDE> 0 "register_operand" "")
1677 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1678 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1679 "TARGET_SIMD"
1680 {
1681 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1682 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1683 operands[1],
1684 operands[2], p));
1685 DONE;
1686
1687 }
1688 )
1689
1690 ;; FP vector operations.
1691 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1692 ;; double-precision (64-bit) floating-point data types and arithmetic as
1693 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1694 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1695 ;;
1696 ;; Floating-point operations can raise an exception. Vectorizing such
1697 ;; operations are safe because of reasons explained below.
1698 ;;
1699 ;; ARMv8 permits an extension to enable trapped floating-point
1700 ;; exception handling, however this is an optional feature. In the
1701 ;; event of a floating-point exception being raised by vectorised
1702 ;; code then:
1703 ;; 1. If trapped floating-point exceptions are available, then a trap
1704 ;; will be taken when any lane raises an enabled exception. A trap
1705 ;; handler may determine which lane raised the exception.
1706 ;; 2. Alternatively a sticky exception flag is set in the
1707 ;; floating-point status register (FPSR). Software may explicitly
1708 ;; test the exception flags, in which case the tests will either
1709 ;; prevent vectorisation, allowing precise identification of the
1710 ;; failing operation, or if tested outside of vectorisable regions
1711 ;; then the specific operation and lane are not of interest.
1712
1713 ;; FP arithmetic operations.
1714
1715 (define_insn "add<mode>3"
1716 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1717 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1718 (match_operand:VHSDF 2 "register_operand" "w")))]
1719 "TARGET_SIMD"
1720 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1721 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1722 )
1723
1724 (define_insn "sub<mode>3"
1725 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1726 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1727 (match_operand:VHSDF 2 "register_operand" "w")))]
1728 "TARGET_SIMD"
1729 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1730 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1731 )
1732
1733 (define_insn "mul<mode>3"
1734 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1735 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1736 (match_operand:VHSDF 2 "register_operand" "w")))]
1737 "TARGET_SIMD"
1738 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1739 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1740 )
1741
1742 (define_expand "div<mode>3"
1743 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1744 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1745 (match_operand:VHSDF 2 "register_operand" "w")))]
1746 "TARGET_SIMD"
1747 {
1748 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1749 DONE;
1750
1751 operands[1] = force_reg (<MODE>mode, operands[1]);
1752 })
1753
1754 (define_insn "*div<mode>3"
1755 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757 (match_operand:VHSDF 2 "register_operand" "w")))]
1758 "TARGET_SIMD"
1759 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760 [(set_attr "type" "neon_fp_div_<stype><q>")]
1761 )
1762
1763 (define_insn "neg<mode>2"
1764 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1766 "TARGET_SIMD"
1767 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1768 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1769 )
1770
1771 (define_insn "abs<mode>2"
1772 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1774 "TARGET_SIMD"
1775 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1776 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1777 )
1778
1779 (define_insn "fma<mode>4"
1780 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1781 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1782 (match_operand:VHSDF 2 "register_operand" "w")
1783 (match_operand:VHSDF 3 "register_operand" "0")))]
1784 "TARGET_SIMD"
1785 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1787 )
1788
1789 (define_insn "*aarch64_fma4_elt<mode>"
1790 [(set (match_operand:VDQF 0 "register_operand" "=w")
1791 (fma:VDQF
1792 (vec_duplicate:VDQF
1793 (vec_select:<VEL>
1794 (match_operand:VDQF 1 "register_operand" "<h_con>")
1795 (parallel [(match_operand:SI 2 "immediate_operand")])))
1796 (match_operand:VDQF 3 "register_operand" "w")
1797 (match_operand:VDQF 4 "register_operand" "0")))]
1798 "TARGET_SIMD"
1799 {
1800 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1801 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1802 }
1803 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1804 )
1805
1806 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1807 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1808 (fma:VDQSF
1809 (vec_duplicate:VDQSF
1810 (vec_select:<VEL>
1811 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1812 (parallel [(match_operand:SI 2 "immediate_operand")])))
1813 (match_operand:VDQSF 3 "register_operand" "w")
1814 (match_operand:VDQSF 4 "register_operand" "0")))]
1815 "TARGET_SIMD"
1816 {
1817 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1818 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1819 }
1820 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1821 )
1822
1823 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1824 [(set (match_operand:VMUL 0 "register_operand" "=w")
1825 (fma:VMUL
1826 (vec_duplicate:VMUL
1827 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1828 (match_operand:VMUL 2 "register_operand" "w")
1829 (match_operand:VMUL 3 "register_operand" "0")))]
1830 "TARGET_SIMD"
1831 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1832 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1833 )
1834
1835 (define_insn "*aarch64_fma4_elt_to_64v2df"
1836 [(set (match_operand:DF 0 "register_operand" "=w")
1837 (fma:DF
1838 (vec_select:DF
1839 (match_operand:V2DF 1 "register_operand" "w")
1840 (parallel [(match_operand:SI 2 "immediate_operand")]))
1841 (match_operand:DF 3 "register_operand" "w")
1842 (match_operand:DF 4 "register_operand" "0")))]
1843 "TARGET_SIMD"
1844 {
1845 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1846 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1847 }
1848 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1849 )
1850
1851 (define_insn "fnma<mode>4"
1852 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1853 (fma:VHSDF
1854 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1855 (match_operand:VHSDF 2 "register_operand" "w")
1856 (match_operand:VHSDF 3 "register_operand" "0")))]
1857 "TARGET_SIMD"
1858 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1859 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1860 )
1861
1862 (define_insn "*aarch64_fnma4_elt<mode>"
1863 [(set (match_operand:VDQF 0 "register_operand" "=w")
1864 (fma:VDQF
1865 (neg:VDQF
1866 (match_operand:VDQF 3 "register_operand" "w"))
1867 (vec_duplicate:VDQF
1868 (vec_select:<VEL>
1869 (match_operand:VDQF 1 "register_operand" "<h_con>")
1870 (parallel [(match_operand:SI 2 "immediate_operand")])))
1871 (match_operand:VDQF 4 "register_operand" "0")))]
1872 "TARGET_SIMD"
1873 {
1874 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1875 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1876 }
1877 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1878 )
1879
1880 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1881 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1882 (fma:VDQSF
1883 (neg:VDQSF
1884 (match_operand:VDQSF 3 "register_operand" "w"))
1885 (vec_duplicate:VDQSF
1886 (vec_select:<VEL>
1887 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1888 (parallel [(match_operand:SI 2 "immediate_operand")])))
1889 (match_operand:VDQSF 4 "register_operand" "0")))]
1890 "TARGET_SIMD"
1891 {
1892 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1893 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1894 }
1895 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1896 )
1897
1898 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1899 [(set (match_operand:VMUL 0 "register_operand" "=w")
1900 (fma:VMUL
1901 (neg:VMUL
1902 (match_operand:VMUL 2 "register_operand" "w"))
1903 (vec_duplicate:VMUL
1904 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1905 (match_operand:VMUL 3 "register_operand" "0")))]
1906 "TARGET_SIMD"
1907 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1908 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1909 )
1910
1911 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1912 [(set (match_operand:DF 0 "register_operand" "=w")
1913 (fma:DF
1914 (vec_select:DF
1915 (match_operand:V2DF 1 "register_operand" "w")
1916 (parallel [(match_operand:SI 2 "immediate_operand")]))
1917 (neg:DF
1918 (match_operand:DF 3 "register_operand" "w"))
1919 (match_operand:DF 4 "register_operand" "0")))]
1920 "TARGET_SIMD"
1921 {
1922 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1923 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1924 }
1925 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1926 )
1927
1928 ;; Vector versions of the floating-point frint patterns.
1929 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1930 (define_insn "<frint_pattern><mode>2"
1931 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1932 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1933 FRINT))]
1934 "TARGET_SIMD"
1935 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1936 [(set_attr "type" "neon_fp_round_<stype><q>")]
1937 )
1938
1939 ;; Vector versions of the fcvt standard patterns.
1940 ;; Expands to lbtrunc, lround, lceil, lfloor
1941 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1942 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1943 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1944 [(match_operand:VHSDF 1 "register_operand" "w")]
1945 FCVT)))]
1946 "TARGET_SIMD"
1947 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1948 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1949 )
1950
1951 ;; HF Scalar variants of related SIMD instructions.
1952 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1953 [(set (match_operand:HI 0 "register_operand" "=w")
1954 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1955 FCVT)))]
1956 "TARGET_SIMD_F16INST"
1957 "fcvt<frint_suffix><su>\t%h0, %h1"
1958 [(set_attr "type" "neon_fp_to_int_s")]
1959 )
1960
1961 (define_insn "<optab>_trunchfhi2"
1962 [(set (match_operand:HI 0 "register_operand" "=w")
1963 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1964 "TARGET_SIMD_F16INST"
1965 "fcvtz<su>\t%h0, %h1"
1966 [(set_attr "type" "neon_fp_to_int_s")]
1967 )
1968
1969 (define_insn "<optab>hihf2"
1970 [(set (match_operand:HF 0 "register_operand" "=w")
1971 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1972 "TARGET_SIMD_F16INST"
1973 "<su_optab>cvtf\t%h0, %h1"
1974 [(set_attr "type" "neon_int_to_fp_s")]
1975 )
1976
1977 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1978 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1979 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1980 [(mult:VDQF
1981 (match_operand:VDQF 1 "register_operand" "w")
1982 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1983 UNSPEC_FRINTZ)))]
1984 "TARGET_SIMD
1985 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1986 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1987 {
1988 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1989 char buf[64];
1990 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1991 output_asm_insn (buf, operands);
1992 return "";
1993 }
1994 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1995 )
1996
1997 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1998 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1999 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2000 [(match_operand:VHSDF 1 "register_operand")]
2001 UNSPEC_FRINTZ)))]
2002 "TARGET_SIMD"
2003 {})
2004
2005 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2006 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2007 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2008 [(match_operand:VHSDF 1 "register_operand")]
2009 UNSPEC_FRINTZ)))]
2010 "TARGET_SIMD"
2011 {})
2012
2013 (define_expand "ftrunc<VHSDF:mode>2"
2014 [(set (match_operand:VHSDF 0 "register_operand")
2015 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2016 UNSPEC_FRINTZ))]
2017 "TARGET_SIMD"
2018 {})
2019
2020 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2021 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2022 (FLOATUORS:VHSDF
2023 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2024 "TARGET_SIMD"
2025 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2026 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2027 )
2028
2029 ;; Conversions between vectors of floats and doubles.
2030 ;; Contains a mix of patterns to match standard pattern names
2031 ;; and those for intrinsics.
2032
2033 ;; Float widening operations.
2034
2035 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2036 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2037 (float_extend:<VWIDE> (vec_select:<VHALF>
2038 (match_operand:VQ_HSF 1 "register_operand" "w")
2039 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2040 )))]
2041 "TARGET_SIMD"
2042 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2043 [(set_attr "type" "neon_fp_cvt_widen_s")]
2044 )
2045
2046 ;; Convert between fixed-point and floating-point (vector modes)
2047
2048 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2049 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2050 (unspec:<VHSDF:FCVT_TARGET>
2051 [(match_operand:VHSDF 1 "register_operand" "w")
2052 (match_operand:SI 2 "immediate_operand" "i")]
2053 FCVT_F2FIXED))]
2054 "TARGET_SIMD"
2055 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2056 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2057 )
2058
2059 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2060 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2061 (unspec:<VDQ_HSDI:FCVT_TARGET>
2062 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2063 (match_operand:SI 2 "immediate_operand" "i")]
2064 FCVT_FIXED2F))]
2065 "TARGET_SIMD"
2066 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2067 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2068 )
2069
2070 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2071 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2072 ;; the meaning of HI and LO changes depending on the target endianness.
2073 ;; While elsewhere we map the higher numbered elements of a vector to
2074 ;; the lower architectural lanes of the vector, for these patterns we want
2075 ;; to always treat "hi" as referring to the higher architectural lanes.
2076 ;; Consequently, while the patterns below look inconsistent with our
2077 ;; other big-endian patterns their behavior is as required.
2078
2079 (define_expand "vec_unpacks_lo_<mode>"
2080 [(match_operand:<VWIDE> 0 "register_operand" "")
2081 (match_operand:VQ_HSF 1 "register_operand" "")]
2082 "TARGET_SIMD"
2083 {
2084 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2085 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2086 operands[1], p));
2087 DONE;
2088 }
2089 )
2090
2091 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093 (float_extend:<VWIDE> (vec_select:<VHALF>
2094 (match_operand:VQ_HSF 1 "register_operand" "w")
2095 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2096 )))]
2097 "TARGET_SIMD"
2098 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2099 [(set_attr "type" "neon_fp_cvt_widen_s")]
2100 )
2101
2102 (define_expand "vec_unpacks_hi_<mode>"
2103 [(match_operand:<VWIDE> 0 "register_operand" "")
2104 (match_operand:VQ_HSF 1 "register_operand" "")]
2105 "TARGET_SIMD"
2106 {
2107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2109 operands[1], p));
2110 DONE;
2111 }
2112 )
2113 (define_insn "aarch64_float_extend_lo_<Vwide>"
2114 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2115 (float_extend:<VWIDE>
2116 (match_operand:VDF 1 "register_operand" "w")))]
2117 "TARGET_SIMD"
2118 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2119 [(set_attr "type" "neon_fp_cvt_widen_s")]
2120 )
2121
2122 ;; Float narrowing operations.
2123
2124 (define_insn "aarch64_float_truncate_lo_<mode>"
2125 [(set (match_operand:VDF 0 "register_operand" "=w")
2126 (float_truncate:VDF
2127 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2128 "TARGET_SIMD"
2129 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2130 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2131 )
2132
2133 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2134 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2135 (vec_concat:<VDBL>
2136 (match_operand:VDF 1 "register_operand" "0")
2137 (float_truncate:VDF
2138 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2139 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2140 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2141 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2142 )
2143
2144 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2145 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2146 (vec_concat:<VDBL>
2147 (float_truncate:VDF
2148 (match_operand:<VWIDE> 2 "register_operand" "w"))
2149 (match_operand:VDF 1 "register_operand" "0")))]
2150 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2151 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2152 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2153 )
2154
2155 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2156 [(match_operand:<VDBL> 0 "register_operand" "=w")
2157 (match_operand:VDF 1 "register_operand" "0")
2158 (match_operand:<VWIDE> 2 "register_operand" "w")]
2159 "TARGET_SIMD"
2160 {
2161 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2162 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2163 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2164 emit_insn (gen (operands[0], operands[1], operands[2]));
2165 DONE;
2166 }
2167 )
2168
2169 (define_expand "vec_pack_trunc_v2df"
2170 [(set (match_operand:V4SF 0 "register_operand")
2171 (vec_concat:V4SF
2172 (float_truncate:V2SF
2173 (match_operand:V2DF 1 "register_operand"))
2174 (float_truncate:V2SF
2175 (match_operand:V2DF 2 "register_operand"))
2176 ))]
2177 "TARGET_SIMD"
2178 {
2179 rtx tmp = gen_reg_rtx (V2SFmode);
2180 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2181 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2182
2183 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2184 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2185 tmp, operands[hi]));
2186 DONE;
2187 }
2188 )
2189
2190 (define_expand "vec_pack_trunc_df"
2191 [(set (match_operand:V2SF 0 "register_operand")
2192 (vec_concat:V2SF
2193 (float_truncate:SF
2194 (match_operand:DF 1 "register_operand"))
2195 (float_truncate:SF
2196 (match_operand:DF 2 "register_operand"))
2197 ))]
2198 "TARGET_SIMD"
2199 {
2200 rtx tmp = gen_reg_rtx (V2SFmode);
2201 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2203
2204 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2205 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2206 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2207 DONE;
2208 }
2209 )
2210
2211 ;; FP Max/Min
2212 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2213 ;; expression like:
2214 ;; a = (b < c) ? b : c;
2215 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2216 ;; either explicitly or indirectly via -ffast-math.
2217 ;;
2218 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2219 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2220 ;; operand will be returned when both operands are zero (i.e. they may not
2221 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2222 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2223 ;; NaNs.
2224
2225 (define_insn "<su><maxmin><mode>3"
2226 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2227 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2228 (match_operand:VHSDF 2 "register_operand" "w")))]
2229 "TARGET_SIMD"
2230 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2231 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2232 )
2233
2234 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2235 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2236 ;; which implement the IEEE fmax ()/fmin () functions.
2237 (define_insn "<maxmin_uns><mode>3"
2238 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2239 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2240 (match_operand:VHSDF 2 "register_operand" "w")]
2241 FMAXMIN_UNS))]
2242 "TARGET_SIMD"
2243 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2244 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2245 )
2246
2247 ;; 'across lanes' add.
2248
2249 (define_expand "reduc_plus_scal_<mode>"
2250 [(match_operand:<VEL> 0 "register_operand" "=w")
2251 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2252 UNSPEC_ADDV)]
2253 "TARGET_SIMD"
2254 {
2255 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2256 rtx scratch = gen_reg_rtx (<MODE>mode);
2257 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2258 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2259 DONE;
2260 }
2261 )
2262
2263 (define_insn "aarch64_faddp<mode>"
2264 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2265 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2266 (match_operand:VHSDF 2 "register_operand" "w")]
2267 UNSPEC_FADDV))]
2268 "TARGET_SIMD"
2269 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2270 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2271 )
2272
2273 (define_insn "aarch64_reduc_plus_internal<mode>"
2274 [(set (match_operand:VDQV 0 "register_operand" "=w")
2275 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2276 UNSPEC_ADDV))]
2277 "TARGET_SIMD"
2278 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2279 [(set_attr "type" "neon_reduc_add<q>")]
2280 )
2281
2282 (define_insn "aarch64_reduc_plus_internalv2si"
2283 [(set (match_operand:V2SI 0 "register_operand" "=w")
2284 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2285 UNSPEC_ADDV))]
2286 "TARGET_SIMD"
2287 "addp\\t%0.2s, %1.2s, %1.2s"
2288 [(set_attr "type" "neon_reduc_add")]
2289 )
2290
2291 (define_insn "reduc_plus_scal_<mode>"
2292 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2293 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2294 UNSPEC_FADDV))]
2295 "TARGET_SIMD"
2296 "faddp\\t%<Vetype>0, %1.<Vtype>"
2297 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2298 )
2299
2300 (define_expand "reduc_plus_scal_v4sf"
2301 [(set (match_operand:SF 0 "register_operand")
2302 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2303 UNSPEC_FADDV))]
2304 "TARGET_SIMD"
2305 {
2306 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2307 rtx scratch = gen_reg_rtx (V4SFmode);
2308 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2309 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2310 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2311 DONE;
2312 })
2313
2314 (define_insn "clrsb<mode>2"
2315 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2316 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2317 "TARGET_SIMD"
2318 "cls\\t%0.<Vtype>, %1.<Vtype>"
2319 [(set_attr "type" "neon_cls<q>")]
2320 )
2321
2322 (define_insn "clz<mode>2"
2323 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2324 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2325 "TARGET_SIMD"
2326 "clz\\t%0.<Vtype>, %1.<Vtype>"
2327 [(set_attr "type" "neon_cls<q>")]
2328 )
2329
2330 (define_insn "popcount<mode>2"
2331 [(set (match_operand:VB 0 "register_operand" "=w")
2332 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2333 "TARGET_SIMD"
2334 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2335 [(set_attr "type" "neon_cnt<q>")]
2336 )
2337
2338 ;; 'across lanes' max and min ops.
2339
2340 ;; Template for outputting a scalar, so we can create __builtins which can be
2341 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2342 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2343 [(match_operand:<VEL> 0 "register_operand")
2344 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2345 FMAXMINV)]
2346 "TARGET_SIMD"
2347 {
2348 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2349 rtx scratch = gen_reg_rtx (<MODE>mode);
2350 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2351 operands[1]));
2352 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2353 DONE;
2354 }
2355 )
2356
2357 ;; Likewise for integer cases, signed and unsigned.
2358 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2359 [(match_operand:<VEL> 0 "register_operand")
2360 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2361 MAXMINV)]
2362 "TARGET_SIMD"
2363 {
2364 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2365 rtx scratch = gen_reg_rtx (<MODE>mode);
2366 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2367 operands[1]));
2368 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2369 DONE;
2370 }
2371 )
2372
2373 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2374 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2375 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2376 MAXMINV))]
2377 "TARGET_SIMD"
2378 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2379 [(set_attr "type" "neon_reduc_minmax<q>")]
2380 )
2381
2382 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2383 [(set (match_operand:V2SI 0 "register_operand" "=w")
2384 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2385 MAXMINV))]
2386 "TARGET_SIMD"
2387 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2388 [(set_attr "type" "neon_reduc_minmax")]
2389 )
2390
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2393 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2394 FMAXMINV))]
2395 "TARGET_SIMD"
2396 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2397 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2398 )
2399
2400 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2401 ;; allocation.
2402 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2403 ;; to select.
2404 ;;
2405 ;; Thus our BSL is of the form:
2406 ;; op0 = bsl (mask, op2, op3)
2407 ;; We can use any of:
2408 ;;
2409 ;; if (op0 = mask)
2410 ;; bsl mask, op1, op2
2411 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2412 ;; bit op0, op2, mask
2413 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2414 ;; bif op0, op1, mask
2415 ;;
2416 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2417 ;; Some forms of straight-line code may generate the equivalent form
2418 ;; in *aarch64_simd_bsl<mode>_alt.
2419
2420 (define_insn "aarch64_simd_bsl<mode>_internal"
2421 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2422 (xor:VDQ_I
2423 (and:VDQ_I
2424 (xor:VDQ_I
2425 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2426 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2427 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2428 (match_dup:<V_INT_EQUIV> 3)
2429 ))]
2430 "TARGET_SIMD"
2431 "@
2432 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2433 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2434 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2435 [(set_attr "type" "neon_bsl<q>")]
2436 )
2437
2438 ;; We need this form in addition to the above pattern to match the case
2439 ;; when combine tries merging three insns such that the second operand of
2440 ;; the outer XOR matches the second operand of the inner XOR rather than
2441 ;; the first. The two are equivalent but since recog doesn't try all
2442 ;; permutations of commutative operations, we have to have a separate pattern.
2443
2444 (define_insn "*aarch64_simd_bsl<mode>_alt"
2445 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2446 (xor:VDQ_I
2447 (and:VDQ_I
2448 (xor:VDQ_I
2449 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2450 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2451 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2452 (match_dup:<V_INT_EQUIV> 2)))]
2453 "TARGET_SIMD"
2454 "@
2455 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2456 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2457 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2458 [(set_attr "type" "neon_bsl<q>")]
2459 )
2460
2461 ;; DImode is special, we want to avoid computing operations which are
2462 ;; more naturally computed in general purpose registers in the vector
2463 ;; registers. If we do that, we need to move all three operands from general
2464 ;; purpose registers to vector registers, then back again. However, we
2465 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2466 ;; optimizations based on the component operations of a BSL.
2467 ;;
2468 ;; That means we need a splitter back to the individual operations, if they
2469 ;; would be better calculated on the integer side.
2470
2471 (define_insn_and_split "aarch64_simd_bsldi_internal"
2472 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2473 (xor:DI
2474 (and:DI
2475 (xor:DI
2476 (match_operand:DI 3 "register_operand" "w,0,w,r")
2477 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2478 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2479 (match_dup:DI 3)
2480 ))]
2481 "TARGET_SIMD"
2482 "@
2483 bsl\\t%0.8b, %2.8b, %3.8b
2484 bit\\t%0.8b, %2.8b, %1.8b
2485 bif\\t%0.8b, %3.8b, %1.8b
2486 #"
2487 "&& GP_REGNUM_P (REGNO (operands[0]))"
2488 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2489 {
2490 /* Split back to individual operations. If we're before reload, and
2491 able to create a temporary register, do so. If we're after reload,
2492 we've got an early-clobber destination register, so use that.
2493 Otherwise, we can't create pseudos and we can't yet guarantee that
2494 operands[0] is safe to write, so FAIL to split. */
2495
2496 rtx scratch;
2497 if (reload_completed)
2498 scratch = operands[0];
2499 else if (can_create_pseudo_p ())
2500 scratch = gen_reg_rtx (DImode);
2501 else
2502 FAIL;
2503
2504 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2505 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2506 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2507 DONE;
2508 }
2509 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2510 (set_attr "length" "4,4,4,12")]
2511 )
2512
2513 (define_insn_and_split "aarch64_simd_bsldi_alt"
2514 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2515 (xor:DI
2516 (and:DI
2517 (xor:DI
2518 (match_operand:DI 3 "register_operand" "w,w,0,r")
2519 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2520 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2521 (match_dup:DI 2)
2522 ))]
2523 "TARGET_SIMD"
2524 "@
2525 bsl\\t%0.8b, %3.8b, %2.8b
2526 bit\\t%0.8b, %3.8b, %1.8b
2527 bif\\t%0.8b, %2.8b, %1.8b
2528 #"
2529 "&& GP_REGNUM_P (REGNO (operands[0]))"
2530 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2531 {
2532 /* Split back to individual operations. If we're before reload, and
2533 able to create a temporary register, do so. If we're after reload,
2534 we've got an early-clobber destination register, so use that.
2535 Otherwise, we can't create pseudos and we can't yet guarantee that
2536 operands[0] is safe to write, so FAIL to split. */
2537
2538 rtx scratch;
2539 if (reload_completed)
2540 scratch = operands[0];
2541 else if (can_create_pseudo_p ())
2542 scratch = gen_reg_rtx (DImode);
2543 else
2544 FAIL;
2545
2546 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2547 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2548 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2549 DONE;
2550 }
2551 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2552 (set_attr "length" "4,4,4,12")]
2553 )
2554
2555 (define_expand "aarch64_simd_bsl<mode>"
2556 [(match_operand:VALLDIF 0 "register_operand")
2557 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2558 (match_operand:VALLDIF 2 "register_operand")
2559 (match_operand:VALLDIF 3 "register_operand")]
2560 "TARGET_SIMD"
2561 {
2562 /* We can't alias operands together if they have different modes. */
2563 rtx tmp = operands[0];
2564 if (FLOAT_MODE_P (<MODE>mode))
2565 {
2566 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2567 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2568 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2569 }
2570 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2571 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2572 operands[1],
2573 operands[2],
2574 operands[3]));
2575 if (tmp != operands[0])
2576 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2577
2578 DONE;
2579 })
2580
2581 (define_expand "vcond_mask_<mode><v_int_equiv>"
2582 [(match_operand:VALLDI 0 "register_operand")
2583 (match_operand:VALLDI 1 "nonmemory_operand")
2584 (match_operand:VALLDI 2 "nonmemory_operand")
2585 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2586 "TARGET_SIMD"
2587 {
2588 /* If we have (a = (P) ? -1 : 0);
2589 Then we can simply move the generated mask (result must be int). */
2590 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2591 && operands[2] == CONST0_RTX (<MODE>mode))
2592 emit_move_insn (operands[0], operands[3]);
2593 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2594 else if (operands[1] == CONST0_RTX (<MODE>mode)
2595 && operands[2] == CONSTM1_RTX (<MODE>mode))
2596 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2597 else
2598 {
2599 if (!REG_P (operands[1]))
2600 operands[1] = force_reg (<MODE>mode, operands[1]);
2601 if (!REG_P (operands[2]))
2602 operands[2] = force_reg (<MODE>mode, operands[2]);
2603 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2604 operands[1], operands[2]));
2605 }
2606
2607 DONE;
2608 })
2609
2610 ;; Patterns comparing two vectors to produce a mask.
2611
2612 (define_expand "vec_cmp<mode><mode>"
2613 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2614 (match_operator 1 "comparison_operator"
2615 [(match_operand:VSDQ_I_DI 2 "register_operand")
2616 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2617 "TARGET_SIMD"
2618 {
2619 rtx mask = operands[0];
2620 enum rtx_code code = GET_CODE (operands[1]);
2621
2622 switch (code)
2623 {
2624 case NE:
2625 case LE:
2626 case LT:
2627 case GE:
2628 case GT:
2629 case EQ:
2630 if (operands[3] == CONST0_RTX (<MODE>mode))
2631 break;
2632
2633 /* Fall through. */
2634 default:
2635 if (!REG_P (operands[3]))
2636 operands[3] = force_reg (<MODE>mode, operands[3]);
2637
2638 break;
2639 }
2640
2641 switch (code)
2642 {
2643 case LT:
2644 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2645 break;
2646
2647 case GE:
2648 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2649 break;
2650
2651 case LE:
2652 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2653 break;
2654
2655 case GT:
2656 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2657 break;
2658
2659 case LTU:
2660 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2661 break;
2662
2663 case GEU:
2664 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2665 break;
2666
2667 case LEU:
2668 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2669 break;
2670
2671 case GTU:
2672 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2673 break;
2674
2675 case NE:
2676 /* Handle NE as !EQ. */
2677 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2678 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2679 break;
2680
2681 case EQ:
2682 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2683 break;
2684
2685 default:
2686 gcc_unreachable ();
2687 }
2688
2689 DONE;
2690 })
2691
2692 (define_expand "vec_cmp<mode><v_int_equiv>"
2693 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2694 (match_operator 1 "comparison_operator"
2695 [(match_operand:VDQF 2 "register_operand")
2696 (match_operand:VDQF 3 "nonmemory_operand")]))]
2697 "TARGET_SIMD"
2698 {
2699 int use_zero_form = 0;
2700 enum rtx_code code = GET_CODE (operands[1]);
2701 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2702
2703 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2704
2705 switch (code)
2706 {
2707 case LE:
2708 case LT:
2709 case GE:
2710 case GT:
2711 case EQ:
2712 if (operands[3] == CONST0_RTX (<MODE>mode))
2713 {
2714 use_zero_form = 1;
2715 break;
2716 }
2717 /* Fall through. */
2718 default:
2719 if (!REG_P (operands[3]))
2720 operands[3] = force_reg (<MODE>mode, operands[3]);
2721
2722 break;
2723 }
2724
2725 switch (code)
2726 {
2727 case LT:
2728 if (use_zero_form)
2729 {
2730 comparison = gen_aarch64_cmlt<mode>;
2731 break;
2732 }
2733 /* Fall through. */
2734 case UNGE:
2735 std::swap (operands[2], operands[3]);
2736 /* Fall through. */
2737 case UNLE:
2738 case GT:
2739 comparison = gen_aarch64_cmgt<mode>;
2740 break;
2741 case LE:
2742 if (use_zero_form)
2743 {
2744 comparison = gen_aarch64_cmle<mode>;
2745 break;
2746 }
2747 /* Fall through. */
2748 case UNGT:
2749 std::swap (operands[2], operands[3]);
2750 /* Fall through. */
2751 case UNLT:
2752 case GE:
2753 comparison = gen_aarch64_cmge<mode>;
2754 break;
2755 case NE:
2756 case EQ:
2757 comparison = gen_aarch64_cmeq<mode>;
2758 break;
2759 case UNEQ:
2760 case ORDERED:
2761 case UNORDERED:
2762 break;
2763 default:
2764 gcc_unreachable ();
2765 }
2766
2767 switch (code)
2768 {
2769 case UNGE:
2770 case UNGT:
2771 case UNLE:
2772 case UNLT:
2773 case NE:
2774 /* FCM returns false for lanes which are unordered, so if we use
2775 the inverse of the comparison we actually want to emit, then
2776 invert the result, we will end up with the correct result.
2777 Note that a NE NaN and NaN NE b are true for all a, b.
2778
2779 Our transformations are:
2780 a UNGE b -> !(b GT a)
2781 a UNGT b -> !(b GE a)
2782 a UNLE b -> !(a GT b)
2783 a UNLT b -> !(a GE b)
2784 a NE b -> !(a EQ b) */
2785 gcc_assert (comparison != NULL);
2786 emit_insn (comparison (operands[0], operands[2], operands[3]));
2787 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2788 break;
2789
2790 case LT:
2791 case LE:
2792 case GT:
2793 case GE:
2794 case EQ:
2795 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2796 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2797 a GE b -> a GE b
2798 a GT b -> a GT b
2799 a LE b -> b GE a
2800 a LT b -> b GT a
2801 a EQ b -> a EQ b */
2802 gcc_assert (comparison != NULL);
2803 emit_insn (comparison (operands[0], operands[2], operands[3]));
2804 break;
2805
2806 case UNEQ:
2807 /* We first check (a > b || b > a) which is !UNEQ, inverting
2808 this result will then give us (a == b || a UNORDERED b). */
2809 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2810 operands[2], operands[3]));
2811 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2812 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2813 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2814 break;
2815
2816 case UNORDERED:
2817 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2818 UNORDERED as !ORDERED. */
2819 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2820 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2821 operands[3], operands[2]));
2822 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2823 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2824 break;
2825
2826 case ORDERED:
2827 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2828 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2829 operands[3], operands[2]));
2830 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2831 break;
2832
2833 default:
2834 gcc_unreachable ();
2835 }
2836
2837 DONE;
2838 })
2839
2840 (define_expand "vec_cmpu<mode><mode>"
2841 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2842 (match_operator 1 "comparison_operator"
2843 [(match_operand:VSDQ_I_DI 2 "register_operand")
2844 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2845 "TARGET_SIMD"
2846 {
2847 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2848 operands[2], operands[3]));
2849 DONE;
2850 })
2851
2852 (define_expand "vcond<mode><mode>"
2853 [(set (match_operand:VALLDI 0 "register_operand")
2854 (if_then_else:VALLDI
2855 (match_operator 3 "comparison_operator"
2856 [(match_operand:VALLDI 4 "register_operand")
2857 (match_operand:VALLDI 5 "nonmemory_operand")])
2858 (match_operand:VALLDI 1 "nonmemory_operand")
2859 (match_operand:VALLDI 2 "nonmemory_operand")))]
2860 "TARGET_SIMD"
2861 {
2862 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2863 enum rtx_code code = GET_CODE (operands[3]);
2864
2865 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2866 it as well as switch operands 1/2 in order to avoid the additional
2867 NOT instruction. */
2868 if (code == NE)
2869 {
2870 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2871 operands[4], operands[5]);
2872 std::swap (operands[1], operands[2]);
2873 }
2874 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2875 operands[4], operands[5]));
2876 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2877 operands[2], mask));
2878
2879 DONE;
2880 })
2881
2882 (define_expand "vcond<v_cmp_mixed><mode>"
2883 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2884 (if_then_else:<V_cmp_mixed>
2885 (match_operator 3 "comparison_operator"
2886 [(match_operand:VDQF_COND 4 "register_operand")
2887 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2888 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2889 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2890 "TARGET_SIMD"
2891 {
2892 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2893 enum rtx_code code = GET_CODE (operands[3]);
2894
2895 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2896 it as well as switch operands 1/2 in order to avoid the additional
2897 NOT instruction. */
2898 if (code == NE)
2899 {
2900 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2901 operands[4], operands[5]);
2902 std::swap (operands[1], operands[2]);
2903 }
2904 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2905 operands[4], operands[5]));
2906 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2907 operands[0], operands[1],
2908 operands[2], mask));
2909
2910 DONE;
2911 })
2912
2913 (define_expand "vcondu<mode><mode>"
2914 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2915 (if_then_else:VSDQ_I_DI
2916 (match_operator 3 "comparison_operator"
2917 [(match_operand:VSDQ_I_DI 4 "register_operand")
2918 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2919 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2920 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2921 "TARGET_SIMD"
2922 {
2923 rtx mask = gen_reg_rtx (<MODE>mode);
2924 enum rtx_code code = GET_CODE (operands[3]);
2925
2926 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2927 it as well as switch operands 1/2 in order to avoid the additional
2928 NOT instruction. */
2929 if (code == NE)
2930 {
2931 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2932 operands[4], operands[5]);
2933 std::swap (operands[1], operands[2]);
2934 }
2935 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2936 operands[4], operands[5]));
2937 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2938 operands[2], mask));
2939 DONE;
2940 })
2941
2942 (define_expand "vcondu<mode><v_cmp_mixed>"
2943 [(set (match_operand:VDQF 0 "register_operand")
2944 (if_then_else:VDQF
2945 (match_operator 3 "comparison_operator"
2946 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2947 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2948 (match_operand:VDQF 1 "nonmemory_operand")
2949 (match_operand:VDQF 2 "nonmemory_operand")))]
2950 "TARGET_SIMD"
2951 {
2952 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2953 enum rtx_code code = GET_CODE (operands[3]);
2954
2955 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2956 it as well as switch operands 1/2 in order to avoid the additional
2957 NOT instruction. */
2958 if (code == NE)
2959 {
2960 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2961 operands[4], operands[5]);
2962 std::swap (operands[1], operands[2]);
2963 }
2964 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2965 mask, operands[3],
2966 operands[4], operands[5]));
2967 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2968 operands[2], mask));
2969 DONE;
2970 })
2971
2972 ;; Patterns for AArch64 SIMD Intrinsics.
2973
2974 ;; Lane extraction with sign extension to general purpose register.
2975 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2976 [(set (match_operand:GPI 0 "register_operand" "=r")
2977 (sign_extend:GPI
2978 (vec_select:<VEL>
2979 (match_operand:VDQQH 1 "register_operand" "w")
2980 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2981 "TARGET_SIMD"
2982 {
2983 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2984 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2985 }
2986 [(set_attr "type" "neon_to_gp<q>")]
2987 )
2988
2989 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2990 [(set (match_operand:SI 0 "register_operand" "=r")
2991 (zero_extend:SI
2992 (vec_select:<VEL>
2993 (match_operand:VDQQH 1 "register_operand" "w")
2994 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2995 "TARGET_SIMD"
2996 {
2997 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2998 return "umov\\t%w0, %1.<Vetype>[%2]";
2999 }
3000 [(set_attr "type" "neon_to_gp<q>")]
3001 )
3002
3003 ;; Lane extraction of a value, neither sign nor zero extension
3004 ;; is guaranteed so upper bits should be considered undefined.
3005 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3006 (define_insn "aarch64_get_lane<mode>"
3007 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3008 (vec_select:<VEL>
3009 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3010 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3011 "TARGET_SIMD"
3012 {
3013 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3014 switch (which_alternative)
3015 {
3016 case 0:
3017 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3018 case 1:
3019 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3020 case 2:
3021 return "st1\\t{%1.<Vetype>}[%2], %0";
3022 default:
3023 gcc_unreachable ();
3024 }
3025 }
3026 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3027 )
3028
3029 (define_insn "load_pair_lanes<mode>"
3030 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3031 (vec_concat:<VDBL>
3032 (match_operand:VDC 1 "memory_operand" "Utq")
3033 (match_operand:VDC 2 "memory_operand" "m")))]
3034 "TARGET_SIMD && !STRICT_ALIGNMENT
3035 && rtx_equal_p (XEXP (operands[2], 0),
3036 plus_constant (Pmode,
3037 XEXP (operands[1], 0),
3038 GET_MODE_SIZE (<MODE>mode)))"
3039 "ldr\\t%q0, %1"
3040 [(set_attr "type" "neon_load1_1reg_q")]
3041 )
3042
3043 (define_insn "store_pair_lanes<mode>"
3044 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3045 (vec_concat:<VDBL>
3046 (match_operand:VDC 1 "register_operand" "w, r")
3047 (match_operand:VDC 2 "register_operand" "w, r")))]
3048 "TARGET_SIMD"
3049 "@
3050 stp\\t%d1, %d2, %y0
3051 stp\\t%x1, %x2, %y0"
3052 [(set_attr "type" "neon_stp, store_16")]
3053 )
3054
3055 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3056 ;; dest vector.
3057
3058 (define_insn "*aarch64_combinez<mode>"
3059 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3060 (vec_concat:<VDBL>
3061 (match_operand:VDC 1 "general_operand" "w,?r,m")
3062 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3063 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3064 "@
3065 mov\\t%0.8b, %1.8b
3066 fmov\t%d0, %1
3067 ldr\\t%d0, %1"
3068 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3069 (set_attr "simd" "yes,*,yes")
3070 (set_attr "fp" "*,yes,*")]
3071 )
3072
3073 (define_insn "*aarch64_combinez_be<mode>"
3074 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3075 (vec_concat:<VDBL>
3076 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3077 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3078 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3079 "@
3080 mov\\t%0.8b, %1.8b
3081 fmov\t%d0, %1
3082 ldr\\t%d0, %1"
3083 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3084 (set_attr "simd" "yes,*,yes")
3085 (set_attr "fp" "*,yes,*")]
3086 )
3087
3088 (define_expand "aarch64_combine<mode>"
3089 [(match_operand:<VDBL> 0 "register_operand")
3090 (match_operand:VDC 1 "register_operand")
3091 (match_operand:VDC 2 "register_operand")]
3092 "TARGET_SIMD"
3093 {
3094 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3095
3096 DONE;
3097 }
3098 )
3099
3100 (define_expand "aarch64_simd_combine<mode>"
3101 [(match_operand:<VDBL> 0 "register_operand")
3102 (match_operand:VDC 1 "register_operand")
3103 (match_operand:VDC 2 "register_operand")]
3104 "TARGET_SIMD"
3105 {
3106 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3107 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3108 DONE;
3109 }
3110 [(set_attr "type" "multiple")]
3111 )
3112
3113 ;; <su><addsub>l<q>.
3114
3115 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3116 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3117 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3118 (match_operand:VQW 1 "register_operand" "w")
3119 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3120 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3121 (match_operand:VQW 2 "register_operand" "w")
3122 (match_dup 3)))))]
3123 "TARGET_SIMD"
3124 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3125 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3126 )
3127
3128 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3129 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3130 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3131 (match_operand:VQW 1 "register_operand" "w")
3132 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3133 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3134 (match_operand:VQW 2 "register_operand" "w")
3135 (match_dup 3)))))]
3136 "TARGET_SIMD"
3137 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3138 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3139 )
3140
3141
3142 (define_expand "aarch64_saddl2<mode>"
3143 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3144 (match_operand:VQW 1 "register_operand" "w")
3145 (match_operand:VQW 2 "register_operand" "w")]
3146 "TARGET_SIMD"
3147 {
3148 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3149 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3150 operands[2], p));
3151 DONE;
3152 })
3153
3154 (define_expand "aarch64_uaddl2<mode>"
3155 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3156 (match_operand:VQW 1 "register_operand" "w")
3157 (match_operand:VQW 2 "register_operand" "w")]
3158 "TARGET_SIMD"
3159 {
3160 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3161 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3162 operands[2], p));
3163 DONE;
3164 })
3165
3166 (define_expand "aarch64_ssubl2<mode>"
3167 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3168 (match_operand:VQW 1 "register_operand" "w")
3169 (match_operand:VQW 2 "register_operand" "w")]
3170 "TARGET_SIMD"
3171 {
3172 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3173 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3174 operands[2], p));
3175 DONE;
3176 })
3177
3178 (define_expand "aarch64_usubl2<mode>"
3179 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3180 (match_operand:VQW 1 "register_operand" "w")
3181 (match_operand:VQW 2 "register_operand" "w")]
3182 "TARGET_SIMD"
3183 {
3184 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3185 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3186 operands[2], p));
3187 DONE;
3188 })
3189
3190 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3191 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3192 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3193 (match_operand:VD_BHSI 1 "register_operand" "w"))
3194 (ANY_EXTEND:<VWIDE>
3195 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3196 "TARGET_SIMD"
3197 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3198 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3199 )
3200
3201 ;; <su><addsub>w<q>.
3202
3203 (define_expand "widen_ssum<mode>3"
3204 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3205 (plus:<VDBLW> (sign_extend:<VDBLW>
3206 (match_operand:VQW 1 "register_operand" ""))
3207 (match_operand:<VDBLW> 2 "register_operand" "")))]
3208 "TARGET_SIMD"
3209 {
3210 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3211 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3212
3213 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3214 operands[1], p));
3215 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3216 DONE;
3217 }
3218 )
3219
3220 (define_expand "widen_ssum<mode>3"
3221 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3222 (plus:<VWIDE> (sign_extend:<VWIDE>
3223 (match_operand:VD_BHSI 1 "register_operand" ""))
3224 (match_operand:<VWIDE> 2 "register_operand" "")))]
3225 "TARGET_SIMD"
3226 {
3227 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3228 DONE;
3229 })
3230
3231 (define_expand "widen_usum<mode>3"
3232 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3233 (plus:<VDBLW> (zero_extend:<VDBLW>
3234 (match_operand:VQW 1 "register_operand" ""))
3235 (match_operand:<VDBLW> 2 "register_operand" "")))]
3236 "TARGET_SIMD"
3237 {
3238 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3239 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3240
3241 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3242 operands[1], p));
3243 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3244 DONE;
3245 }
3246 )
3247
3248 (define_expand "widen_usum<mode>3"
3249 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3250 (plus:<VWIDE> (zero_extend:<VWIDE>
3251 (match_operand:VD_BHSI 1 "register_operand" ""))
3252 (match_operand:<VWIDE> 2 "register_operand" "")))]
3253 "TARGET_SIMD"
3254 {
3255 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3256 DONE;
3257 })
3258
3259 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3260 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3261 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3262 (ANY_EXTEND:<VWIDE>
3263 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3264 "TARGET_SIMD"
3265 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3266 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3267 )
3268
3269 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3270 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3271 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3272 (ANY_EXTEND:<VWIDE>
3273 (vec_select:<VHALF>
3274 (match_operand:VQW 2 "register_operand" "w")
3275 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3276 "TARGET_SIMD"
3277 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3278 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3279 )
3280
3281 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3282 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3283 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3284 (ANY_EXTEND:<VWIDE>
3285 (vec_select:<VHALF>
3286 (match_operand:VQW 2 "register_operand" "w")
3287 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3288 "TARGET_SIMD"
3289 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3290 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3291 )
3292
3293 (define_expand "aarch64_saddw2<mode>"
3294 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3295 (match_operand:<VWIDE> 1 "register_operand" "w")
3296 (match_operand:VQW 2 "register_operand" "w")]
3297 "TARGET_SIMD"
3298 {
3299 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3300 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3301 operands[2], p));
3302 DONE;
3303 })
3304
3305 (define_expand "aarch64_uaddw2<mode>"
3306 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3307 (match_operand:<VWIDE> 1 "register_operand" "w")
3308 (match_operand:VQW 2 "register_operand" "w")]
3309 "TARGET_SIMD"
3310 {
3311 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3312 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3313 operands[2], p));
3314 DONE;
3315 })
3316
3317
3318 (define_expand "aarch64_ssubw2<mode>"
3319 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3320 (match_operand:<VWIDE> 1 "register_operand" "w")
3321 (match_operand:VQW 2 "register_operand" "w")]
3322 "TARGET_SIMD"
3323 {
3324 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3325 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3326 operands[2], p));
3327 DONE;
3328 })
3329
3330 (define_expand "aarch64_usubw2<mode>"
3331 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3332 (match_operand:<VWIDE> 1 "register_operand" "w")
3333 (match_operand:VQW 2 "register_operand" "w")]
3334 "TARGET_SIMD"
3335 {
3336 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3337 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3338 operands[2], p));
3339 DONE;
3340 })
3341
3342 ;; <su><r>h<addsub>.
3343
3344 (define_insn "aarch64_<sur>h<addsub><mode>"
3345 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3346 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3347 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3348 HADDSUB))]
3349 "TARGET_SIMD"
3350 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3351 [(set_attr "type" "neon_<addsub>_halve<q>")]
3352 )
3353
3354 ;; <r><addsub>hn<q>.
3355
3356 (define_insn "aarch64_<sur><addsub>hn<mode>"
3357 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3358 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3359 (match_operand:VQN 2 "register_operand" "w")]
3360 ADDSUBHN))]
3361 "TARGET_SIMD"
3362 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3363 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3364 )
3365
3366 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3367 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3368 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3369 (match_operand:VQN 2 "register_operand" "w")
3370 (match_operand:VQN 3 "register_operand" "w")]
3371 ADDSUBHN2))]
3372 "TARGET_SIMD"
3373 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3374 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3375 )
3376
3377 ;; pmul.
3378
3379 (define_insn "aarch64_pmul<mode>"
3380 [(set (match_operand:VB 0 "register_operand" "=w")
3381 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3382 (match_operand:VB 2 "register_operand" "w")]
3383 UNSPEC_PMUL))]
3384 "TARGET_SIMD"
3385 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3386 [(set_attr "type" "neon_mul_<Vetype><q>")]
3387 )
3388
3389 ;; fmulx.
3390
3391 (define_insn "aarch64_fmulx<mode>"
3392 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3393 (unspec:VHSDF_HSDF
3394 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3395 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3396 UNSPEC_FMULX))]
3397 "TARGET_SIMD"
3398 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3399 [(set_attr "type" "neon_fp_mul_<stype>")]
3400 )
3401
3402 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3403
3404 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3405 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3406 (unspec:VDQSF
3407 [(match_operand:VDQSF 1 "register_operand" "w")
3408 (vec_duplicate:VDQSF
3409 (vec_select:<VEL>
3410 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3411 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3412 UNSPEC_FMULX))]
3413 "TARGET_SIMD"
3414 {
3415 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3416 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3417 }
3418 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3419 )
3420
3421 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3422
3423 (define_insn "*aarch64_mulx_elt<mode>"
3424 [(set (match_operand:VDQF 0 "register_operand" "=w")
3425 (unspec:VDQF
3426 [(match_operand:VDQF 1 "register_operand" "w")
3427 (vec_duplicate:VDQF
3428 (vec_select:<VEL>
3429 (match_operand:VDQF 2 "register_operand" "w")
3430 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3431 UNSPEC_FMULX))]
3432 "TARGET_SIMD"
3433 {
3434 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3435 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3436 }
3437 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3438 )
3439
3440 ;; vmulxq_lane
3441
3442 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3443 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3444 (unspec:VHSDF
3445 [(match_operand:VHSDF 1 "register_operand" "w")
3446 (vec_duplicate:VHSDF
3447 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3448 UNSPEC_FMULX))]
3449 "TARGET_SIMD"
3450 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3451 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3452 )
3453
3454 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3455 ;; vmulxd_lane_f64 == vmulx_lane_f64
3456 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3457
3458 (define_insn "*aarch64_vgetfmulx<mode>"
3459 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3460 (unspec:<VEL>
3461 [(match_operand:<VEL> 1 "register_operand" "w")
3462 (vec_select:<VEL>
3463 (match_operand:VDQF 2 "register_operand" "w")
3464 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3465 UNSPEC_FMULX))]
3466 "TARGET_SIMD"
3467 {
3468 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3469 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3470 }
3471 [(set_attr "type" "fmul<Vetype>")]
3472 )
3473 ;; <su>q<addsub>
3474
3475 (define_insn "aarch64_<su_optab><optab><mode>"
3476 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3477 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3478 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3479 "TARGET_SIMD"
3480 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3481 [(set_attr "type" "neon_<optab><q>")]
3482 )
3483
3484 ;; suqadd and usqadd
3485
3486 (define_insn "aarch64_<sur>qadd<mode>"
3487 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3488 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3489 (match_operand:VSDQ_I 2 "register_operand" "w")]
3490 USSUQADD))]
3491 "TARGET_SIMD"
3492 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3493 [(set_attr "type" "neon_qadd<q>")]
3494 )
3495
3496 ;; sqmovun
3497
3498 (define_insn "aarch64_sqmovun<mode>"
3499 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3500 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3501 UNSPEC_SQXTUN))]
3502 "TARGET_SIMD"
3503 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3504 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3505 )
3506
3507 ;; sqmovn and uqmovn
3508
3509 (define_insn "aarch64_<sur>qmovn<mode>"
3510 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3511 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3512 SUQMOVN))]
3513 "TARGET_SIMD"
3514 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3515 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3516 )
3517
3518 ;; <su>q<absneg>
3519
3520 (define_insn "aarch64_s<optab><mode>"
3521 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3522 (UNQOPS:VSDQ_I
3523 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3524 "TARGET_SIMD"
3525 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3526 [(set_attr "type" "neon_<optab><q>")]
3527 )
3528
3529 ;; sq<r>dmulh.
3530
3531 (define_insn "aarch64_sq<r>dmulh<mode>"
3532 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3533 (unspec:VSDQ_HSI
3534 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3535 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3536 VQDMULH))]
3537 "TARGET_SIMD"
3538 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3539 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3540 )
3541
3542 ;; sq<r>dmulh_lane
3543
3544 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3545 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3546 (unspec:VDQHS
3547 [(match_operand:VDQHS 1 "register_operand" "w")
3548 (vec_select:<VEL>
3549 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3550 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3551 VQDMULH))]
3552 "TARGET_SIMD"
3553 "*
3554 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3555 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3556 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3557 )
3558
3559 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3560 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3561 (unspec:VDQHS
3562 [(match_operand:VDQHS 1 "register_operand" "w")
3563 (vec_select:<VEL>
3564 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3565 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3566 VQDMULH))]
3567 "TARGET_SIMD"
3568 "*
3569 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3570 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3571 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3572 )
3573
3574 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3575 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3576 (unspec:SD_HSI
3577 [(match_operand:SD_HSI 1 "register_operand" "w")
3578 (vec_select:<VEL>
3579 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3580 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3581 VQDMULH))]
3582 "TARGET_SIMD"
3583 "*
3584 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3585 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3586 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3587 )
3588
3589 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3590 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3591 (unspec:SD_HSI
3592 [(match_operand:SD_HSI 1 "register_operand" "w")
3593 (vec_select:<VEL>
3594 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3595 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3596 VQDMULH))]
3597 "TARGET_SIMD"
3598 "*
3599 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3600 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3601 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3602 )
3603
3604 ;; sqrdml[as]h.
3605
3606 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3607 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3608 (unspec:VSDQ_HSI
3609 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3610 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3611 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3612 SQRDMLH_AS))]
3613 "TARGET_SIMD_RDMA"
3614 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3615 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3616 )
3617
3618 ;; sqrdml[as]h_lane.
3619
3620 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3621 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3622 (unspec:VDQHS
3623 [(match_operand:VDQHS 1 "register_operand" "0")
3624 (match_operand:VDQHS 2 "register_operand" "w")
3625 (vec_select:<VEL>
3626 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3627 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3628 SQRDMLH_AS))]
3629 "TARGET_SIMD_RDMA"
3630 {
3631 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3632 return
3633 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3634 }
3635 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3636 )
3637
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3640 (unspec:SD_HSI
3641 [(match_operand:SD_HSI 1 "register_operand" "0")
3642 (match_operand:SD_HSI 2 "register_operand" "w")
3643 (vec_select:<VEL>
3644 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3646 SQRDMLH_AS))]
3647 "TARGET_SIMD_RDMA"
3648 {
3649 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3650 return
3651 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3652 }
3653 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3654 )
3655
3656 ;; sqrdml[as]h_laneq.
3657
3658 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3659 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3660 (unspec:VDQHS
3661 [(match_operand:VDQHS 1 "register_operand" "0")
3662 (match_operand:VDQHS 2 "register_operand" "w")
3663 (vec_select:<VEL>
3664 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3665 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3666 SQRDMLH_AS))]
3667 "TARGET_SIMD_RDMA"
3668 {
3669 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3670 return
3671 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3672 }
3673 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3674 )
3675
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3678 (unspec:SD_HSI
3679 [(match_operand:SD_HSI 1 "register_operand" "0")
3680 (match_operand:SD_HSI 2 "register_operand" "w")
3681 (vec_select:<VEL>
3682 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3684 SQRDMLH_AS))]
3685 "TARGET_SIMD_RDMA"
3686 {
3687 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3688 return
3689 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3690 }
3691 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3692 )
3693
3694 ;; vqdml[sa]l
3695
3696 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3697 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3698 (SBINQOPS:<VWIDE>
3699 (match_operand:<VWIDE> 1 "register_operand" "0")
3700 (ss_ashift:<VWIDE>
3701 (mult:<VWIDE>
3702 (sign_extend:<VWIDE>
3703 (match_operand:VSD_HSI 2 "register_operand" "w"))
3704 (sign_extend:<VWIDE>
3705 (match_operand:VSD_HSI 3 "register_operand" "w")))
3706 (const_int 1))))]
3707 "TARGET_SIMD"
3708 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3709 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3710 )
3711
3712 ;; vqdml[sa]l_lane
3713
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3715 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3716 (SBINQOPS:<VWIDE>
3717 (match_operand:<VWIDE> 1 "register_operand" "0")
3718 (ss_ashift:<VWIDE>
3719 (mult:<VWIDE>
3720 (sign_extend:<VWIDE>
3721 (match_operand:VD_HSI 2 "register_operand" "w"))
3722 (sign_extend:<VWIDE>
3723 (vec_duplicate:VD_HSI
3724 (vec_select:<VEL>
3725 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3726 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3727 ))
3728 (const_int 1))))]
3729 "TARGET_SIMD"
3730 {
3731 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3732 return
3733 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3734 }
3735 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3736 )
3737
3738 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3739 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3740 (SBINQOPS:<VWIDE>
3741 (match_operand:<VWIDE> 1 "register_operand" "0")
3742 (ss_ashift:<VWIDE>
3743 (mult:<VWIDE>
3744 (sign_extend:<VWIDE>
3745 (match_operand:VD_HSI 2 "register_operand" "w"))
3746 (sign_extend:<VWIDE>
3747 (vec_duplicate:VD_HSI
3748 (vec_select:<VEL>
3749 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3750 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3751 ))
3752 (const_int 1))))]
3753 "TARGET_SIMD"
3754 {
3755 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3756 return
3757 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3758 }
3759 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3760 )
3761
3762 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3763 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3764 (SBINQOPS:<VWIDE>
3765 (match_operand:<VWIDE> 1 "register_operand" "0")
3766 (ss_ashift:<VWIDE>
3767 (mult:<VWIDE>
3768 (sign_extend:<VWIDE>
3769 (match_operand:SD_HSI 2 "register_operand" "w"))
3770 (sign_extend:<VWIDE>
3771 (vec_select:<VEL>
3772 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3773 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3774 )
3775 (const_int 1))))]
3776 "TARGET_SIMD"
3777 {
3778 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3779 return
3780 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3781 }
3782 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3783 )
3784
3785 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3786 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3787 (SBINQOPS:<VWIDE>
3788 (match_operand:<VWIDE> 1 "register_operand" "0")
3789 (ss_ashift:<VWIDE>
3790 (mult:<VWIDE>
3791 (sign_extend:<VWIDE>
3792 (match_operand:SD_HSI 2 "register_operand" "w"))
3793 (sign_extend:<VWIDE>
3794 (vec_select:<VEL>
3795 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3796 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3797 )
3798 (const_int 1))))]
3799 "TARGET_SIMD"
3800 {
3801 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3802 return
3803 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3804 }
3805 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3806 )
3807
3808 ;; vqdml[sa]l_n
3809
3810 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3811 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3812 (SBINQOPS:<VWIDE>
3813 (match_operand:<VWIDE> 1 "register_operand" "0")
3814 (ss_ashift:<VWIDE>
3815 (mult:<VWIDE>
3816 (sign_extend:<VWIDE>
3817 (match_operand:VD_HSI 2 "register_operand" "w"))
3818 (sign_extend:<VWIDE>
3819 (vec_duplicate:VD_HSI
3820 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3821 (const_int 1))))]
3822 "TARGET_SIMD"
3823 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3824 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3825 )
3826
3827 ;; sqdml[as]l2
3828
3829 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3830 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3831 (SBINQOPS:<VWIDE>
3832 (match_operand:<VWIDE> 1 "register_operand" "0")
3833 (ss_ashift:<VWIDE>
3834 (mult:<VWIDE>
3835 (sign_extend:<VWIDE>
3836 (vec_select:<VHALF>
3837 (match_operand:VQ_HSI 2 "register_operand" "w")
3838 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3839 (sign_extend:<VWIDE>
3840 (vec_select:<VHALF>
3841 (match_operand:VQ_HSI 3 "register_operand" "w")
3842 (match_dup 4))))
3843 (const_int 1))))]
3844 "TARGET_SIMD"
3845 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3846 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3847 )
3848
3849 (define_expand "aarch64_sqdmlal2<mode>"
3850 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3851 (match_operand:<VWIDE> 1 "register_operand" "w")
3852 (match_operand:VQ_HSI 2 "register_operand" "w")
3853 (match_operand:VQ_HSI 3 "register_operand" "w")]
3854 "TARGET_SIMD"
3855 {
3856 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3857 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3858 operands[2], operands[3], p));
3859 DONE;
3860 })
3861
3862 (define_expand "aarch64_sqdmlsl2<mode>"
3863 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3864 (match_operand:<VWIDE> 1 "register_operand" "w")
3865 (match_operand:VQ_HSI 2 "register_operand" "w")
3866 (match_operand:VQ_HSI 3 "register_operand" "w")]
3867 "TARGET_SIMD"
3868 {
3869 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3870 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3871 operands[2], operands[3], p));
3872 DONE;
3873 })
3874
3875 ;; vqdml[sa]l2_lane
3876
3877 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3878 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3879 (SBINQOPS:<VWIDE>
3880 (match_operand:<VWIDE> 1 "register_operand" "0")
3881 (ss_ashift:<VWIDE>
3882 (mult:<VWIDE>
3883 (sign_extend:<VWIDE>
3884 (vec_select:<VHALF>
3885 (match_operand:VQ_HSI 2 "register_operand" "w")
3886 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3887 (sign_extend:<VWIDE>
3888 (vec_duplicate:<VHALF>
3889 (vec_select:<VEL>
3890 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3891 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3892 ))))
3893 (const_int 1))))]
3894 "TARGET_SIMD"
3895 {
3896 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3897 return
3898 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3899 }
3900 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3901 )
3902
3903 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3904 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3905 (SBINQOPS:<VWIDE>
3906 (match_operand:<VWIDE> 1 "register_operand" "0")
3907 (ss_ashift:<VWIDE>
3908 (mult:<VWIDE>
3909 (sign_extend:<VWIDE>
3910 (vec_select:<VHALF>
3911 (match_operand:VQ_HSI 2 "register_operand" "w")
3912 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3913 (sign_extend:<VWIDE>
3914 (vec_duplicate:<VHALF>
3915 (vec_select:<VEL>
3916 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3917 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3918 ))))
3919 (const_int 1))))]
3920 "TARGET_SIMD"
3921 {
3922 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3923 return
3924 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3925 }
3926 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3927 )
3928
3929 (define_expand "aarch64_sqdmlal2_lane<mode>"
3930 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3931 (match_operand:<VWIDE> 1 "register_operand" "w")
3932 (match_operand:VQ_HSI 2 "register_operand" "w")
3933 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3934 (match_operand:SI 4 "immediate_operand" "i")]
3935 "TARGET_SIMD"
3936 {
3937 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3938 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3939 operands[2], operands[3],
3940 operands[4], p));
3941 DONE;
3942 })
3943
3944 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3945 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3946 (match_operand:<VWIDE> 1 "register_operand" "w")
3947 (match_operand:VQ_HSI 2 "register_operand" "w")
3948 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3949 (match_operand:SI 4 "immediate_operand" "i")]
3950 "TARGET_SIMD"
3951 {
3952 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3953 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3954 operands[2], operands[3],
3955 operands[4], p));
3956 DONE;
3957 })
3958
3959 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3960 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3961 (match_operand:<VWIDE> 1 "register_operand" "w")
3962 (match_operand:VQ_HSI 2 "register_operand" "w")
3963 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3964 (match_operand:SI 4 "immediate_operand" "i")]
3965 "TARGET_SIMD"
3966 {
3967 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3968 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3969 operands[2], operands[3],
3970 operands[4], p));
3971 DONE;
3972 })
3973
3974 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3975 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3976 (match_operand:<VWIDE> 1 "register_operand" "w")
3977 (match_operand:VQ_HSI 2 "register_operand" "w")
3978 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3979 (match_operand:SI 4 "immediate_operand" "i")]
3980 "TARGET_SIMD"
3981 {
3982 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3983 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3984 operands[2], operands[3],
3985 operands[4], p));
3986 DONE;
3987 })
3988
3989 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3990 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3991 (SBINQOPS:<VWIDE>
3992 (match_operand:<VWIDE> 1 "register_operand" "0")
3993 (ss_ashift:<VWIDE>
3994 (mult:<VWIDE>
3995 (sign_extend:<VWIDE>
3996 (vec_select:<VHALF>
3997 (match_operand:VQ_HSI 2 "register_operand" "w")
3998 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3999 (sign_extend:<VWIDE>
4000 (vec_duplicate:<VHALF>
4001 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4002 (const_int 1))))]
4003 "TARGET_SIMD"
4004 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4005 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4006 )
4007
4008 (define_expand "aarch64_sqdmlal2_n<mode>"
4009 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4010 (match_operand:<VWIDE> 1 "register_operand" "w")
4011 (match_operand:VQ_HSI 2 "register_operand" "w")
4012 (match_operand:<VEL> 3 "register_operand" "w")]
4013 "TARGET_SIMD"
4014 {
4015 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4016 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4017 operands[2], operands[3],
4018 p));
4019 DONE;
4020 })
4021
4022 (define_expand "aarch64_sqdmlsl2_n<mode>"
4023 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4024 (match_operand:<VWIDE> 1 "register_operand" "w")
4025 (match_operand:VQ_HSI 2 "register_operand" "w")
4026 (match_operand:<VEL> 3 "register_operand" "w")]
4027 "TARGET_SIMD"
4028 {
4029 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4030 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4031 operands[2], operands[3],
4032 p));
4033 DONE;
4034 })
4035
4036 ;; vqdmull
4037
4038 (define_insn "aarch64_sqdmull<mode>"
4039 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4040 (ss_ashift:<VWIDE>
4041 (mult:<VWIDE>
4042 (sign_extend:<VWIDE>
4043 (match_operand:VSD_HSI 1 "register_operand" "w"))
4044 (sign_extend:<VWIDE>
4045 (match_operand:VSD_HSI 2 "register_operand" "w")))
4046 (const_int 1)))]
4047 "TARGET_SIMD"
4048 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4049 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4050 )
4051
4052 ;; vqdmull_lane
4053
4054 (define_insn "aarch64_sqdmull_lane<mode>"
4055 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4056 (ss_ashift:<VWIDE>
4057 (mult:<VWIDE>
4058 (sign_extend:<VWIDE>
4059 (match_operand:VD_HSI 1 "register_operand" "w"))
4060 (sign_extend:<VWIDE>
4061 (vec_duplicate:VD_HSI
4062 (vec_select:<VEL>
4063 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4064 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4065 ))
4066 (const_int 1)))]
4067 "TARGET_SIMD"
4068 {
4069 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4070 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4071 }
4072 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4073 )
4074
4075 (define_insn "aarch64_sqdmull_laneq<mode>"
4076 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4077 (ss_ashift:<VWIDE>
4078 (mult:<VWIDE>
4079 (sign_extend:<VWIDE>
4080 (match_operand:VD_HSI 1 "register_operand" "w"))
4081 (sign_extend:<VWIDE>
4082 (vec_duplicate:VD_HSI
4083 (vec_select:<VEL>
4084 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4085 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4086 ))
4087 (const_int 1)))]
4088 "TARGET_SIMD"
4089 {
4090 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4091 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4092 }
4093 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4094 )
4095
4096 (define_insn "aarch64_sqdmull_lane<mode>"
4097 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4098 (ss_ashift:<VWIDE>
4099 (mult:<VWIDE>
4100 (sign_extend:<VWIDE>
4101 (match_operand:SD_HSI 1 "register_operand" "w"))
4102 (sign_extend:<VWIDE>
4103 (vec_select:<VEL>
4104 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4105 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4106 ))
4107 (const_int 1)))]
4108 "TARGET_SIMD"
4109 {
4110 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4111 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4112 }
4113 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4114 )
4115
4116 (define_insn "aarch64_sqdmull_laneq<mode>"
4117 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4118 (ss_ashift:<VWIDE>
4119 (mult:<VWIDE>
4120 (sign_extend:<VWIDE>
4121 (match_operand:SD_HSI 1 "register_operand" "w"))
4122 (sign_extend:<VWIDE>
4123 (vec_select:<VEL>
4124 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4125 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4126 ))
4127 (const_int 1)))]
4128 "TARGET_SIMD"
4129 {
4130 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4131 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4132 }
4133 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4134 )
4135
4136 ;; vqdmull_n
4137
4138 (define_insn "aarch64_sqdmull_n<mode>"
4139 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4140 (ss_ashift:<VWIDE>
4141 (mult:<VWIDE>
4142 (sign_extend:<VWIDE>
4143 (match_operand:VD_HSI 1 "register_operand" "w"))
4144 (sign_extend:<VWIDE>
4145 (vec_duplicate:VD_HSI
4146 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4147 )
4148 (const_int 1)))]
4149 "TARGET_SIMD"
4150 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4151 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4152 )
4153
4154 ;; vqdmull2
4155
4156
4157
4158 (define_insn "aarch64_sqdmull2<mode>_internal"
4159 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4160 (ss_ashift:<VWIDE>
4161 (mult:<VWIDE>
4162 (sign_extend:<VWIDE>
4163 (vec_select:<VHALF>
4164 (match_operand:VQ_HSI 1 "register_operand" "w")
4165 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4166 (sign_extend:<VWIDE>
4167 (vec_select:<VHALF>
4168 (match_operand:VQ_HSI 2 "register_operand" "w")
4169 (match_dup 3)))
4170 )
4171 (const_int 1)))]
4172 "TARGET_SIMD"
4173 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4174 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4175 )
4176
4177 (define_expand "aarch64_sqdmull2<mode>"
4178 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4179 (match_operand:VQ_HSI 1 "register_operand" "w")
4180 (match_operand:VQ_HSI 2 "register_operand" "w")]
4181 "TARGET_SIMD"
4182 {
4183 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4184 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4185 operands[2], p));
4186 DONE;
4187 })
4188
4189 ;; vqdmull2_lane
4190
4191 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4192 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4193 (ss_ashift:<VWIDE>
4194 (mult:<VWIDE>
4195 (sign_extend:<VWIDE>
4196 (vec_select:<VHALF>
4197 (match_operand:VQ_HSI 1 "register_operand" "w")
4198 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4199 (sign_extend:<VWIDE>
4200 (vec_duplicate:<VHALF>
4201 (vec_select:<VEL>
4202 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4203 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4204 ))
4205 (const_int 1)))]
4206 "TARGET_SIMD"
4207 {
4208 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4209 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4210 }
4211 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4212 )
4213
4214 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4215 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4216 (ss_ashift:<VWIDE>
4217 (mult:<VWIDE>
4218 (sign_extend:<VWIDE>
4219 (vec_select:<VHALF>
4220 (match_operand:VQ_HSI 1 "register_operand" "w")
4221 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4222 (sign_extend:<VWIDE>
4223 (vec_duplicate:<VHALF>
4224 (vec_select:<VEL>
4225 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4226 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4227 ))
4228 (const_int 1)))]
4229 "TARGET_SIMD"
4230 {
4231 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4232 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4233 }
4234 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4235 )
4236
4237 (define_expand "aarch64_sqdmull2_lane<mode>"
4238 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4239 (match_operand:VQ_HSI 1 "register_operand" "w")
4240 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4241 (match_operand:SI 3 "immediate_operand" "i")]
4242 "TARGET_SIMD"
4243 {
4244 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4245 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4246 operands[2], operands[3],
4247 p));
4248 DONE;
4249 })
4250
4251 (define_expand "aarch64_sqdmull2_laneq<mode>"
4252 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4253 (match_operand:VQ_HSI 1 "register_operand" "w")
4254 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4255 (match_operand:SI 3 "immediate_operand" "i")]
4256 "TARGET_SIMD"
4257 {
4258 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4259 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4260 operands[2], operands[3],
4261 p));
4262 DONE;
4263 })
4264
4265 ;; vqdmull2_n
4266
4267 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4268 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4269 (ss_ashift:<VWIDE>
4270 (mult:<VWIDE>
4271 (sign_extend:<VWIDE>
4272 (vec_select:<VHALF>
4273 (match_operand:VQ_HSI 1 "register_operand" "w")
4274 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4275 (sign_extend:<VWIDE>
4276 (vec_duplicate:<VHALF>
4277 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4278 )
4279 (const_int 1)))]
4280 "TARGET_SIMD"
4281 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4282 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4283 )
4284
4285 (define_expand "aarch64_sqdmull2_n<mode>"
4286 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4287 (match_operand:VQ_HSI 1 "register_operand" "w")
4288 (match_operand:<VEL> 2 "register_operand" "w")]
4289 "TARGET_SIMD"
4290 {
4291 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4292 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4293 operands[2], p));
4294 DONE;
4295 })
4296
4297 ;; vshl
4298
4299 (define_insn "aarch64_<sur>shl<mode>"
4300 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4301 (unspec:VSDQ_I_DI
4302 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4303 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4304 VSHL))]
4305 "TARGET_SIMD"
4306 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4307 [(set_attr "type" "neon_shift_reg<q>")]
4308 )
4309
4310
4311 ;; vqshl
4312
4313 (define_insn "aarch64_<sur>q<r>shl<mode>"
4314 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4315 (unspec:VSDQ_I
4316 [(match_operand:VSDQ_I 1 "register_operand" "w")
4317 (match_operand:VSDQ_I 2 "register_operand" "w")]
4318 VQSHL))]
4319 "TARGET_SIMD"
4320 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4321 [(set_attr "type" "neon_sat_shift_reg<q>")]
4322 )
4323
4324 ;; vshll_n
4325
4326 (define_insn "aarch64_<sur>shll_n<mode>"
4327 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4328 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4329 (match_operand:SI 2
4330 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4331 VSHLL))]
4332 "TARGET_SIMD"
4333 {
4334 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4335 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4336 else
4337 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4338 }
4339 [(set_attr "type" "neon_shift_imm_long")]
4340 )
4341
4342 ;; vshll_high_n
4343
4344 (define_insn "aarch64_<sur>shll2_n<mode>"
4345 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4347 (match_operand:SI 2 "immediate_operand" "i")]
4348 VSHLL))]
4349 "TARGET_SIMD"
4350 {
4351 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4352 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4353 else
4354 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4355 }
4356 [(set_attr "type" "neon_shift_imm_long")]
4357 )
4358
4359 ;; vrshr_n
4360
4361 (define_insn "aarch64_<sur>shr_n<mode>"
4362 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4363 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4364 (match_operand:SI 2
4365 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4366 VRSHR_N))]
4367 "TARGET_SIMD"
4368 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4369 [(set_attr "type" "neon_sat_shift_imm<q>")]
4370 )
4371
4372 ;; v(r)sra_n
4373
4374 (define_insn "aarch64_<sur>sra_n<mode>"
4375 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4376 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4377 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4378 (match_operand:SI 3
4379 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4380 VSRA))]
4381 "TARGET_SIMD"
4382 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4383 [(set_attr "type" "neon_shift_acc<q>")]
4384 )
4385
4386 ;; vs<lr>i_n
4387
4388 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4389 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4390 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4391 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4392 (match_operand:SI 3
4393 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4394 VSLRI))]
4395 "TARGET_SIMD"
4396 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4397 [(set_attr "type" "neon_shift_imm<q>")]
4398 )
4399
4400 ;; vqshl(u)
4401
4402 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4403 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4404 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4405 (match_operand:SI 2
4406 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4407 VQSHL_N))]
4408 "TARGET_SIMD"
4409 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4410 [(set_attr "type" "neon_sat_shift_imm<q>")]
4411 )
4412
4413
4414 ;; vq(r)shr(u)n_n
4415
4416 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4417 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4418 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4419 (match_operand:SI 2
4420 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4421 VQSHRN_N))]
4422 "TARGET_SIMD"
4423 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4424 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4425 )
4426
4427
4428 ;; cm(eq|ge|gt|lt|le)
4429 ;; Note, we have constraints for Dz and Z as different expanders
4430 ;; have different ideas of what should be passed to this pattern.
4431
4432 (define_insn "aarch64_cm<optab><mode>"
4433 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4434 (neg:<V_INT_EQUIV>
4435 (COMPARISONS:<V_INT_EQUIV>
4436 (match_operand:VDQ_I 1 "register_operand" "w,w")
4437 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4438 )))]
4439 "TARGET_SIMD"
4440 "@
4441 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4442 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4443 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4444 )
4445
4446 (define_insn_and_split "aarch64_cm<optab>di"
4447 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4448 (neg:DI
4449 (COMPARISONS:DI
4450 (match_operand:DI 1 "register_operand" "w,w,r")
4451 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4452 )))
4453 (clobber (reg:CC CC_REGNUM))]
4454 "TARGET_SIMD"
4455 "#"
4456 "reload_completed"
4457 [(set (match_operand:DI 0 "register_operand")
4458 (neg:DI
4459 (COMPARISONS:DI
4460 (match_operand:DI 1 "register_operand")
4461 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4462 )))]
4463 {
4464 /* If we are in the general purpose register file,
4465 we split to a sequence of comparison and store. */
4466 if (GP_REGNUM_P (REGNO (operands[0]))
4467 && GP_REGNUM_P (REGNO (operands[1])))
4468 {
4469 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4470 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4471 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4472 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4473 DONE;
4474 }
4475 /* Otherwise, we expand to a similar pattern which does not
4476 clobber CC_REGNUM. */
4477 }
4478 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4479 )
4480
4481 (define_insn "*aarch64_cm<optab>di"
4482 [(set (match_operand:DI 0 "register_operand" "=w,w")
4483 (neg:DI
4484 (COMPARISONS:DI
4485 (match_operand:DI 1 "register_operand" "w,w")
4486 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4487 )))]
4488 "TARGET_SIMD && reload_completed"
4489 "@
4490 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4491 cm<optab>\t%d0, %d1, #0"
4492 [(set_attr "type" "neon_compare, neon_compare_zero")]
4493 )
4494
4495 ;; cm(hs|hi)
4496
4497 (define_insn "aarch64_cm<optab><mode>"
4498 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4499 (neg:<V_INT_EQUIV>
4500 (UCOMPARISONS:<V_INT_EQUIV>
4501 (match_operand:VDQ_I 1 "register_operand" "w")
4502 (match_operand:VDQ_I 2 "register_operand" "w")
4503 )))]
4504 "TARGET_SIMD"
4505 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4506 [(set_attr "type" "neon_compare<q>")]
4507 )
4508
4509 (define_insn_and_split "aarch64_cm<optab>di"
4510 [(set (match_operand:DI 0 "register_operand" "=w,r")
4511 (neg:DI
4512 (UCOMPARISONS:DI
4513 (match_operand:DI 1 "register_operand" "w,r")
4514 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4515 )))
4516 (clobber (reg:CC CC_REGNUM))]
4517 "TARGET_SIMD"
4518 "#"
4519 "reload_completed"
4520 [(set (match_operand:DI 0 "register_operand")
4521 (neg:DI
4522 (UCOMPARISONS:DI
4523 (match_operand:DI 1 "register_operand")
4524 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4525 )))]
4526 {
4527 /* If we are in the general purpose register file,
4528 we split to a sequence of comparison and store. */
4529 if (GP_REGNUM_P (REGNO (operands[0]))
4530 && GP_REGNUM_P (REGNO (operands[1])))
4531 {
4532 machine_mode mode = CCmode;
4533 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4534 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4535 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4536 DONE;
4537 }
4538 /* Otherwise, we expand to a similar pattern which does not
4539 clobber CC_REGNUM. */
4540 }
4541 [(set_attr "type" "neon_compare,multiple")]
4542 )
4543
4544 (define_insn "*aarch64_cm<optab>di"
4545 [(set (match_operand:DI 0 "register_operand" "=w")
4546 (neg:DI
4547 (UCOMPARISONS:DI
4548 (match_operand:DI 1 "register_operand" "w")
4549 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4550 )))]
4551 "TARGET_SIMD && reload_completed"
4552 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4553 [(set_attr "type" "neon_compare")]
4554 )
4555
4556 ;; cmtst
4557
4558 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4559 ;; we don't have any insns using ne, and aarch64_vcond outputs
4560 ;; not (neg (eq (and x y) 0))
4561 ;; which is rewritten by simplify_rtx as
4562 ;; plus (eq (and x y) 0) -1.
4563
4564 (define_insn "aarch64_cmtst<mode>"
4565 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4566 (plus:<V_INT_EQUIV>
4567 (eq:<V_INT_EQUIV>
4568 (and:VDQ_I
4569 (match_operand:VDQ_I 1 "register_operand" "w")
4570 (match_operand:VDQ_I 2 "register_operand" "w"))
4571 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4572 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4573 ]
4574 "TARGET_SIMD"
4575 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4576 [(set_attr "type" "neon_tst<q>")]
4577 )
4578
4579 (define_insn_and_split "aarch64_cmtstdi"
4580 [(set (match_operand:DI 0 "register_operand" "=w,r")
4581 (neg:DI
4582 (ne:DI
4583 (and:DI
4584 (match_operand:DI 1 "register_operand" "w,r")
4585 (match_operand:DI 2 "register_operand" "w,r"))
4586 (const_int 0))))
4587 (clobber (reg:CC CC_REGNUM))]
4588 "TARGET_SIMD"
4589 "#"
4590 "reload_completed"
4591 [(set (match_operand:DI 0 "register_operand")
4592 (neg:DI
4593 (ne:DI
4594 (and:DI
4595 (match_operand:DI 1 "register_operand")
4596 (match_operand:DI 2 "register_operand"))
4597 (const_int 0))))]
4598 {
4599 /* If we are in the general purpose register file,
4600 we split to a sequence of comparison and store. */
4601 if (GP_REGNUM_P (REGNO (operands[0]))
4602 && GP_REGNUM_P (REGNO (operands[1])))
4603 {
4604 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4605 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4606 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4607 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4608 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4609 DONE;
4610 }
4611 /* Otherwise, we expand to a similar pattern which does not
4612 clobber CC_REGNUM. */
4613 }
4614 [(set_attr "type" "neon_tst,multiple")]
4615 )
4616
4617 (define_insn "*aarch64_cmtstdi"
4618 [(set (match_operand:DI 0 "register_operand" "=w")
4619 (neg:DI
4620 (ne:DI
4621 (and:DI
4622 (match_operand:DI 1 "register_operand" "w")
4623 (match_operand:DI 2 "register_operand" "w"))
4624 (const_int 0))))]
4625 "TARGET_SIMD"
4626 "cmtst\t%d0, %d1, %d2"
4627 [(set_attr "type" "neon_tst")]
4628 )
4629
4630 ;; fcm(eq|ge|gt|le|lt)
4631
4632 (define_insn "aarch64_cm<optab><mode>"
4633 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4634 (neg:<V_INT_EQUIV>
4635 (COMPARISONS:<V_INT_EQUIV>
4636 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4637 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4638 )))]
4639 "TARGET_SIMD"
4640 "@
4641 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4642 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4643 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4644 )
4645
4646 ;; fac(ge|gt)
4647 ;; Note we can also handle what would be fac(le|lt) by
4648 ;; generating fac(ge|gt).
4649
4650 (define_insn "aarch64_fac<optab><mode>"
4651 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4652 (neg:<V_INT_EQUIV>
4653 (FAC_COMPARISONS:<V_INT_EQUIV>
4654 (abs:VHSDF_HSDF
4655 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4656 (abs:VHSDF_HSDF
4657 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4658 )))]
4659 "TARGET_SIMD"
4660 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4661 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4662 )
4663
4664 ;; addp
4665
4666 (define_insn "aarch64_addp<mode>"
4667 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4668 (unspec:VD_BHSI
4669 [(match_operand:VD_BHSI 1 "register_operand" "w")
4670 (match_operand:VD_BHSI 2 "register_operand" "w")]
4671 UNSPEC_ADDP))]
4672 "TARGET_SIMD"
4673 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4674 [(set_attr "type" "neon_reduc_add<q>")]
4675 )
4676
4677 (define_insn "aarch64_addpdi"
4678 [(set (match_operand:DI 0 "register_operand" "=w")
4679 (unspec:DI
4680 [(match_operand:V2DI 1 "register_operand" "w")]
4681 UNSPEC_ADDP))]
4682 "TARGET_SIMD"
4683 "addp\t%d0, %1.2d"
4684 [(set_attr "type" "neon_reduc_add")]
4685 )
4686
4687 ;; sqrt
4688
4689 (define_expand "sqrt<mode>2"
4690 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4691 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4692 "TARGET_SIMD"
4693 {
4694 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4695 DONE;
4696 })
4697
4698 (define_insn "*sqrt<mode>2"
4699 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4700 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4701 "TARGET_SIMD"
4702 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4703 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4704 )
4705
4706 ;; Patterns for vector struct loads and stores.
4707
4708 (define_insn "aarch64_simd_ld2<mode>"
4709 [(set (match_operand:OI 0 "register_operand" "=w")
4710 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4711 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4712 UNSPEC_LD2))]
4713 "TARGET_SIMD"
4714 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4715 [(set_attr "type" "neon_load2_2reg<q>")]
4716 )
4717
4718 (define_insn "aarch64_simd_ld2r<mode>"
4719 [(set (match_operand:OI 0 "register_operand" "=w")
4720 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4721 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4722 UNSPEC_LD2_DUP))]
4723 "TARGET_SIMD"
4724 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4725 [(set_attr "type" "neon_load2_all_lanes<q>")]
4726 )
4727
4728 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4729 [(set (match_operand:OI 0 "register_operand" "=w")
4730 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4731 (match_operand:OI 2 "register_operand" "0")
4732 (match_operand:SI 3 "immediate_operand" "i")
4733 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4734 UNSPEC_LD2_LANE))]
4735 "TARGET_SIMD"
4736 {
4737 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4738 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4739 }
4740 [(set_attr "type" "neon_load2_one_lane")]
4741 )
4742
4743 (define_expand "vec_load_lanesoi<mode>"
4744 [(set (match_operand:OI 0 "register_operand" "=w")
4745 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4746 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4747 UNSPEC_LD2))]
4748 "TARGET_SIMD"
4749 {
4750 if (BYTES_BIG_ENDIAN)
4751 {
4752 rtx tmp = gen_reg_rtx (OImode);
4753 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4754 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4755 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4756 }
4757 else
4758 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4759 DONE;
4760 })
4761
4762 (define_insn "aarch64_simd_st2<mode>"
4763 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4764 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4765 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4766 UNSPEC_ST2))]
4767 "TARGET_SIMD"
4768 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4769 [(set_attr "type" "neon_store2_2reg<q>")]
4770 )
4771
4772 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4773 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4774 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4775 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4776 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4777 (match_operand:SI 2 "immediate_operand" "i")]
4778 UNSPEC_ST2_LANE))]
4779 "TARGET_SIMD"
4780 {
4781 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4782 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4783 }
4784 [(set_attr "type" "neon_store2_one_lane<q>")]
4785 )
4786
4787 (define_expand "vec_store_lanesoi<mode>"
4788 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4789 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4790 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4791 UNSPEC_ST2))]
4792 "TARGET_SIMD"
4793 {
4794 if (BYTES_BIG_ENDIAN)
4795 {
4796 rtx tmp = gen_reg_rtx (OImode);
4797 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4798 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4799 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4800 }
4801 else
4802 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4803 DONE;
4804 })
4805
4806 (define_insn "aarch64_simd_ld3<mode>"
4807 [(set (match_operand:CI 0 "register_operand" "=w")
4808 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4809 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4810 UNSPEC_LD3))]
4811 "TARGET_SIMD"
4812 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4813 [(set_attr "type" "neon_load3_3reg<q>")]
4814 )
4815
4816 (define_insn "aarch64_simd_ld3r<mode>"
4817 [(set (match_operand:CI 0 "register_operand" "=w")
4818 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4819 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4820 UNSPEC_LD3_DUP))]
4821 "TARGET_SIMD"
4822 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4823 [(set_attr "type" "neon_load3_all_lanes<q>")]
4824 )
4825
4826 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4827 [(set (match_operand:CI 0 "register_operand" "=w")
4828 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4829 (match_operand:CI 2 "register_operand" "0")
4830 (match_operand:SI 3 "immediate_operand" "i")
4831 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4832 UNSPEC_LD3_LANE))]
4833 "TARGET_SIMD"
4834 {
4835 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4836 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4837 }
4838 [(set_attr "type" "neon_load3_one_lane")]
4839 )
4840
4841 (define_expand "vec_load_lanesci<mode>"
4842 [(set (match_operand:CI 0 "register_operand" "=w")
4843 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4844 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4845 UNSPEC_LD3))]
4846 "TARGET_SIMD"
4847 {
4848 if (BYTES_BIG_ENDIAN)
4849 {
4850 rtx tmp = gen_reg_rtx (CImode);
4851 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4852 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4853 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4854 }
4855 else
4856 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4857 DONE;
4858 })
4859
4860 (define_insn "aarch64_simd_st3<mode>"
4861 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4862 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4863 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4864 UNSPEC_ST3))]
4865 "TARGET_SIMD"
4866 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4867 [(set_attr "type" "neon_store3_3reg<q>")]
4868 )
4869
4870 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4871 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4872 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4873 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4874 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4875 (match_operand:SI 2 "immediate_operand" "i")]
4876 UNSPEC_ST3_LANE))]
4877 "TARGET_SIMD"
4878 {
4879 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4880 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4881 }
4882 [(set_attr "type" "neon_store3_one_lane<q>")]
4883 )
4884
4885 (define_expand "vec_store_lanesci<mode>"
4886 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4887 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4888 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4889 UNSPEC_ST3))]
4890 "TARGET_SIMD"
4891 {
4892 if (BYTES_BIG_ENDIAN)
4893 {
4894 rtx tmp = gen_reg_rtx (CImode);
4895 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4896 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4897 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4898 }
4899 else
4900 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4901 DONE;
4902 })
4903
4904 (define_insn "aarch64_simd_ld4<mode>"
4905 [(set (match_operand:XI 0 "register_operand" "=w")
4906 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4907 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4908 UNSPEC_LD4))]
4909 "TARGET_SIMD"
4910 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4911 [(set_attr "type" "neon_load4_4reg<q>")]
4912 )
4913
4914 (define_insn "aarch64_simd_ld4r<mode>"
4915 [(set (match_operand:XI 0 "register_operand" "=w")
4916 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4917 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4918 UNSPEC_LD4_DUP))]
4919 "TARGET_SIMD"
4920 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4921 [(set_attr "type" "neon_load4_all_lanes<q>")]
4922 )
4923
4924 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4925 [(set (match_operand:XI 0 "register_operand" "=w")
4926 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4927 (match_operand:XI 2 "register_operand" "0")
4928 (match_operand:SI 3 "immediate_operand" "i")
4929 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4930 UNSPEC_LD4_LANE))]
4931 "TARGET_SIMD"
4932 {
4933 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4934 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4935 }
4936 [(set_attr "type" "neon_load4_one_lane")]
4937 )
4938
4939 (define_expand "vec_load_lanesxi<mode>"
4940 [(set (match_operand:XI 0 "register_operand" "=w")
4941 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4942 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4943 UNSPEC_LD4))]
4944 "TARGET_SIMD"
4945 {
4946 if (BYTES_BIG_ENDIAN)
4947 {
4948 rtx tmp = gen_reg_rtx (XImode);
4949 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4950 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4951 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4952 }
4953 else
4954 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4955 DONE;
4956 })
4957
4958 (define_insn "aarch64_simd_st4<mode>"
4959 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4960 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4961 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4962 UNSPEC_ST4))]
4963 "TARGET_SIMD"
4964 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4965 [(set_attr "type" "neon_store4_4reg<q>")]
4966 )
4967
4968 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4969 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4970 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4971 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4972 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4973 (match_operand:SI 2 "immediate_operand" "i")]
4974 UNSPEC_ST4_LANE))]
4975 "TARGET_SIMD"
4976 {
4977 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4978 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4979 }
4980 [(set_attr "type" "neon_store4_one_lane<q>")]
4981 )
4982
4983 (define_expand "vec_store_lanesxi<mode>"
4984 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4985 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4986 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4987 UNSPEC_ST4))]
4988 "TARGET_SIMD"
4989 {
4990 if (BYTES_BIG_ENDIAN)
4991 {
4992 rtx tmp = gen_reg_rtx (XImode);
4993 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4994 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4995 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4996 }
4997 else
4998 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4999 DONE;
5000 })
5001
5002 (define_insn_and_split "aarch64_rev_reglist<mode>"
5003 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5004 (unspec:VSTRUCT
5005 [(match_operand:VSTRUCT 1 "register_operand" "w")
5006 (match_operand:V16QI 2 "register_operand" "w")]
5007 UNSPEC_REV_REGLIST))]
5008 "TARGET_SIMD"
5009 "#"
5010 "&& reload_completed"
5011 [(const_int 0)]
5012 {
5013 int i;
5014 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5015 for (i = 0; i < nregs; i++)
5016 {
5017 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5018 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5019 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5020 }
5021 DONE;
5022 }
5023 [(set_attr "type" "neon_tbl1_q")
5024 (set_attr "length" "<insn_count>")]
5025 )
5026
5027 ;; Reload patterns for AdvSIMD register list operands.
5028
5029 (define_expand "mov<mode>"
5030 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5031 (match_operand:VSTRUCT 1 "general_operand" ""))]
5032 "TARGET_SIMD"
5033 {
5034 if (can_create_pseudo_p ())
5035 {
5036 if (GET_CODE (operands[0]) != REG)
5037 operands[1] = force_reg (<MODE>mode, operands[1]);
5038 }
5039 })
5040
5041 (define_insn "*aarch64_mov<mode>"
5042 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5043 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5044 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5045 && (register_operand (operands[0], <MODE>mode)
5046 || register_operand (operands[1], <MODE>mode))"
5047 "@
5048 #
5049 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5050 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5051 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5052 neon_load<nregs>_<nregs>reg_q")
5053 (set_attr "length" "<insn_count>,4,4")]
5054 )
5055
5056 (define_insn "aarch64_be_ld1<mode>"
5057 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5058 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5059 "aarch64_simd_struct_operand" "Utv")]
5060 UNSPEC_LD1))]
5061 "TARGET_SIMD"
5062 "ld1\\t{%0<Vmtype>}, %1"
5063 [(set_attr "type" "neon_load1_1reg<q>")]
5064 )
5065
5066 (define_insn "aarch64_be_st1<mode>"
5067 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5068 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5069 UNSPEC_ST1))]
5070 "TARGET_SIMD"
5071 "st1\\t{%1<Vmtype>}, %0"
5072 [(set_attr "type" "neon_store1_1reg<q>")]
5073 )
5074
5075 (define_insn "*aarch64_be_movoi"
5076 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5077 (match_operand:OI 1 "general_operand" " w,w,m"))]
5078 "TARGET_SIMD && BYTES_BIG_ENDIAN
5079 && (register_operand (operands[0], OImode)
5080 || register_operand (operands[1], OImode))"
5081 "@
5082 #
5083 stp\\t%q1, %R1, %0
5084 ldp\\t%q0, %R0, %1"
5085 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5086 (set_attr "length" "8,4,4")]
5087 )
5088
5089 (define_insn "*aarch64_be_movci"
5090 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5091 (match_operand:CI 1 "general_operand" " w,w,o"))]
5092 "TARGET_SIMD && BYTES_BIG_ENDIAN
5093 && (register_operand (operands[0], CImode)
5094 || register_operand (operands[1], CImode))"
5095 "#"
5096 [(set_attr "type" "multiple")
5097 (set_attr "length" "12,4,4")]
5098 )
5099
5100 (define_insn "*aarch64_be_movxi"
5101 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5102 (match_operand:XI 1 "general_operand" " w,w,o"))]
5103 "TARGET_SIMD && BYTES_BIG_ENDIAN
5104 && (register_operand (operands[0], XImode)
5105 || register_operand (operands[1], XImode))"
5106 "#"
5107 [(set_attr "type" "multiple")
5108 (set_attr "length" "16,4,4")]
5109 )
5110
5111 (define_split
5112 [(set (match_operand:OI 0 "register_operand")
5113 (match_operand:OI 1 "register_operand"))]
5114 "TARGET_SIMD && reload_completed"
5115 [(const_int 0)]
5116 {
5117 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5118 DONE;
5119 })
5120
5121 (define_split
5122 [(set (match_operand:CI 0 "nonimmediate_operand")
5123 (match_operand:CI 1 "general_operand"))]
5124 "TARGET_SIMD && reload_completed"
5125 [(const_int 0)]
5126 {
5127 if (register_operand (operands[0], CImode)
5128 && register_operand (operands[1], CImode))
5129 {
5130 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5131 DONE;
5132 }
5133 else if (BYTES_BIG_ENDIAN)
5134 {
5135 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5136 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5137 emit_move_insn (gen_lowpart (V16QImode,
5138 simplify_gen_subreg (TImode, operands[0],
5139 CImode, 32)),
5140 gen_lowpart (V16QImode,
5141 simplify_gen_subreg (TImode, operands[1],
5142 CImode, 32)));
5143 DONE;
5144 }
5145 else
5146 FAIL;
5147 })
5148
5149 (define_split
5150 [(set (match_operand:XI 0 "nonimmediate_operand")
5151 (match_operand:XI 1 "general_operand"))]
5152 "TARGET_SIMD && reload_completed"
5153 [(const_int 0)]
5154 {
5155 if (register_operand (operands[0], XImode)
5156 && register_operand (operands[1], XImode))
5157 {
5158 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5159 DONE;
5160 }
5161 else if (BYTES_BIG_ENDIAN)
5162 {
5163 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5164 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5165 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5166 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5167 DONE;
5168 }
5169 else
5170 FAIL;
5171 })
5172
5173 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5174 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5175 (match_operand:DI 1 "register_operand" "w")
5176 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5177 "TARGET_SIMD"
5178 {
5179 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5180 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5181 * <VSTRUCT:nregs>);
5182
5183 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5184 mem));
5185 DONE;
5186 })
5187
5188 (define_insn "aarch64_ld2<mode>_dreg"
5189 [(set (match_operand:OI 0 "register_operand" "=w")
5190 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5191 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5192 UNSPEC_LD2_DREG))]
5193 "TARGET_SIMD"
5194 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5195 [(set_attr "type" "neon_load2_2reg<q>")]
5196 )
5197
5198 (define_insn "aarch64_ld2<mode>_dreg"
5199 [(set (match_operand:OI 0 "register_operand" "=w")
5200 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5201 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5202 UNSPEC_LD2_DREG))]
5203 "TARGET_SIMD"
5204 "ld1\\t{%S0.1d - %T0.1d}, %1"
5205 [(set_attr "type" "neon_load1_2reg<q>")]
5206 )
5207
5208 (define_insn "aarch64_ld3<mode>_dreg"
5209 [(set (match_operand:CI 0 "register_operand" "=w")
5210 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5211 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5212 UNSPEC_LD3_DREG))]
5213 "TARGET_SIMD"
5214 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5215 [(set_attr "type" "neon_load3_3reg<q>")]
5216 )
5217
5218 (define_insn "aarch64_ld3<mode>_dreg"
5219 [(set (match_operand:CI 0 "register_operand" "=w")
5220 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5221 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5222 UNSPEC_LD3_DREG))]
5223 "TARGET_SIMD"
5224 "ld1\\t{%S0.1d - %U0.1d}, %1"
5225 [(set_attr "type" "neon_load1_3reg<q>")]
5226 )
5227
5228 (define_insn "aarch64_ld4<mode>_dreg"
5229 [(set (match_operand:XI 0 "register_operand" "=w")
5230 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5231 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5232 UNSPEC_LD4_DREG))]
5233 "TARGET_SIMD"
5234 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5235 [(set_attr "type" "neon_load4_4reg<q>")]
5236 )
5237
5238 (define_insn "aarch64_ld4<mode>_dreg"
5239 [(set (match_operand:XI 0 "register_operand" "=w")
5240 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5241 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5242 UNSPEC_LD4_DREG))]
5243 "TARGET_SIMD"
5244 "ld1\\t{%S0.1d - %V0.1d}, %1"
5245 [(set_attr "type" "neon_load1_4reg<q>")]
5246 )
5247
5248 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5249 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5250 (match_operand:DI 1 "register_operand" "r")
5251 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252 "TARGET_SIMD"
5253 {
5254 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5255 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5256
5257 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5258 DONE;
5259 })
5260
5261 (define_expand "aarch64_ld1<VALL_F16:mode>"
5262 [(match_operand:VALL_F16 0 "register_operand")
5263 (match_operand:DI 1 "register_operand")]
5264 "TARGET_SIMD"
5265 {
5266 machine_mode mode = <VALL_F16:MODE>mode;
5267 rtx mem = gen_rtx_MEM (mode, operands[1]);
5268
5269 if (BYTES_BIG_ENDIAN)
5270 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5271 else
5272 emit_move_insn (operands[0], mem);
5273 DONE;
5274 })
5275
5276 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5277 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5278 (match_operand:DI 1 "register_operand" "r")
5279 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5280 "TARGET_SIMD"
5281 {
5282 machine_mode mode = <VSTRUCT:MODE>mode;
5283 rtx mem = gen_rtx_MEM (mode, operands[1]);
5284
5285 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5286 DONE;
5287 })
5288
5289 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5290 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5291 (match_operand:DI 1 "register_operand" "w")
5292 (match_operand:VSTRUCT 2 "register_operand" "0")
5293 (match_operand:SI 3 "immediate_operand" "i")
5294 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5295 "TARGET_SIMD"
5296 {
5297 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5298 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5299 * <VSTRUCT:nregs>);
5300
5301 aarch64_simd_lane_bounds (operands[3], 0,
5302 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5303 NULL);
5304 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5305 operands[0], mem, operands[2], operands[3]));
5306 DONE;
5307 })
5308
5309 ;; Expanders for builtins to extract vector registers from large
5310 ;; opaque integer modes.
5311
5312 ;; D-register list.
5313
5314 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5315 [(match_operand:VDC 0 "register_operand" "=w")
5316 (match_operand:VSTRUCT 1 "register_operand" "w")
5317 (match_operand:SI 2 "immediate_operand" "i")]
5318 "TARGET_SIMD"
5319 {
5320 int part = INTVAL (operands[2]);
5321 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5322 int offset = part * 16;
5323
5324 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5325 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5326 DONE;
5327 })
5328
5329 ;; Q-register list.
5330
5331 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5332 [(match_operand:VQ 0 "register_operand" "=w")
5333 (match_operand:VSTRUCT 1 "register_operand" "w")
5334 (match_operand:SI 2 "immediate_operand" "i")]
5335 "TARGET_SIMD"
5336 {
5337 int part = INTVAL (operands[2]);
5338 int offset = part * 16;
5339
5340 emit_move_insn (operands[0],
5341 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5342 DONE;
5343 })
5344
5345 ;; Permuted-store expanders for neon intrinsics.
5346
5347 ;; Permute instructions
5348
5349 ;; vec_perm support
5350
5351 (define_expand "vec_perm_const<mode>"
5352 [(match_operand:VALL_F16 0 "register_operand")
5353 (match_operand:VALL_F16 1 "register_operand")
5354 (match_operand:VALL_F16 2 "register_operand")
5355 (match_operand:<V_INT_EQUIV> 3)]
5356 "TARGET_SIMD"
5357 {
5358 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5359 operands[2], operands[3], <nunits>))
5360 DONE;
5361 else
5362 FAIL;
5363 })
5364
5365 (define_expand "vec_perm<mode>"
5366 [(match_operand:VB 0 "register_operand")
5367 (match_operand:VB 1 "register_operand")
5368 (match_operand:VB 2 "register_operand")
5369 (match_operand:VB 3 "register_operand")]
5370 "TARGET_SIMD"
5371 {
5372 aarch64_expand_vec_perm (operands[0], operands[1],
5373 operands[2], operands[3], <nunits>);
5374 DONE;
5375 })
5376
5377 (define_insn "aarch64_tbl1<mode>"
5378 [(set (match_operand:VB 0 "register_operand" "=w")
5379 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5380 (match_operand:VB 2 "register_operand" "w")]
5381 UNSPEC_TBL))]
5382 "TARGET_SIMD"
5383 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5384 [(set_attr "type" "neon_tbl1<q>")]
5385 )
5386
5387 ;; Two source registers.
5388
5389 (define_insn "aarch64_tbl2v16qi"
5390 [(set (match_operand:V16QI 0 "register_operand" "=w")
5391 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5392 (match_operand:V16QI 2 "register_operand" "w")]
5393 UNSPEC_TBL))]
5394 "TARGET_SIMD"
5395 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5396 [(set_attr "type" "neon_tbl2_q")]
5397 )
5398
5399 (define_insn "aarch64_tbl3<mode>"
5400 [(set (match_operand:VB 0 "register_operand" "=w")
5401 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5402 (match_operand:VB 2 "register_operand" "w")]
5403 UNSPEC_TBL))]
5404 "TARGET_SIMD"
5405 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5406 [(set_attr "type" "neon_tbl3")]
5407 )
5408
5409 (define_insn "aarch64_tbx4<mode>"
5410 [(set (match_operand:VB 0 "register_operand" "=w")
5411 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5412 (match_operand:OI 2 "register_operand" "w")
5413 (match_operand:VB 3 "register_operand" "w")]
5414 UNSPEC_TBX))]
5415 "TARGET_SIMD"
5416 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5417 [(set_attr "type" "neon_tbl4")]
5418 )
5419
5420 ;; Three source registers.
5421
5422 (define_insn "aarch64_qtbl3<mode>"
5423 [(set (match_operand:VB 0 "register_operand" "=w")
5424 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5425 (match_operand:VB 2 "register_operand" "w")]
5426 UNSPEC_TBL))]
5427 "TARGET_SIMD"
5428 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5429 [(set_attr "type" "neon_tbl3")]
5430 )
5431
5432 (define_insn "aarch64_qtbx3<mode>"
5433 [(set (match_operand:VB 0 "register_operand" "=w")
5434 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5435 (match_operand:CI 2 "register_operand" "w")
5436 (match_operand:VB 3 "register_operand" "w")]
5437 UNSPEC_TBX))]
5438 "TARGET_SIMD"
5439 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5440 [(set_attr "type" "neon_tbl3")]
5441 )
5442
5443 ;; Four source registers.
5444
5445 (define_insn "aarch64_qtbl4<mode>"
5446 [(set (match_operand:VB 0 "register_operand" "=w")
5447 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5448 (match_operand:VB 2 "register_operand" "w")]
5449 UNSPEC_TBL))]
5450 "TARGET_SIMD"
5451 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5452 [(set_attr "type" "neon_tbl4")]
5453 )
5454
5455 (define_insn "aarch64_qtbx4<mode>"
5456 [(set (match_operand:VB 0 "register_operand" "=w")
5457 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5458 (match_operand:XI 2 "register_operand" "w")
5459 (match_operand:VB 3 "register_operand" "w")]
5460 UNSPEC_TBX))]
5461 "TARGET_SIMD"
5462 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5463 [(set_attr "type" "neon_tbl4")]
5464 )
5465
5466 (define_insn_and_split "aarch64_combinev16qi"
5467 [(set (match_operand:OI 0 "register_operand" "=w")
5468 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5469 (match_operand:V16QI 2 "register_operand" "w")]
5470 UNSPEC_CONCAT))]
5471 "TARGET_SIMD"
5472 "#"
5473 "&& reload_completed"
5474 [(const_int 0)]
5475 {
5476 aarch64_split_combinev16qi (operands);
5477 DONE;
5478 }
5479 [(set_attr "type" "multiple")]
5480 )
5481
5482 ;; This instruction's pattern is generated directly by
5483 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5484 ;; need corresponding changes there.
5485 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5486 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5487 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5488 (match_operand:VALL_F16 2 "register_operand" "w")]
5489 PERMUTE))]
5490 "TARGET_SIMD"
5491 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5492 [(set_attr "type" "neon_permute<q>")]
5493 )
5494
5495 ;; This instruction's pattern is generated directly by
5496 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5497 ;; need corresponding changes there. Note that the immediate (third)
5498 ;; operand is a lane index not a byte index.
5499 (define_insn "aarch64_ext<mode>"
5500 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5501 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5502 (match_operand:VALL_F16 2 "register_operand" "w")
5503 (match_operand:SI 3 "immediate_operand" "i")]
5504 UNSPEC_EXT))]
5505 "TARGET_SIMD"
5506 {
5507 operands[3] = GEN_INT (INTVAL (operands[3])
5508 * GET_MODE_UNIT_SIZE (<MODE>mode));
5509 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5510 }
5511 [(set_attr "type" "neon_ext<q>")]
5512 )
5513
5514 ;; This instruction's pattern is generated directly by
5515 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5516 ;; need corresponding changes there.
5517 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5518 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5519 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5520 REVERSE))]
5521 "TARGET_SIMD"
5522 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5523 [(set_attr "type" "neon_rev<q>")]
5524 )
5525
5526 (define_insn "aarch64_st2<mode>_dreg"
5527 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5528 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5529 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5530 UNSPEC_ST2))]
5531 "TARGET_SIMD"
5532 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5533 [(set_attr "type" "neon_store2_2reg")]
5534 )
5535
5536 (define_insn "aarch64_st2<mode>_dreg"
5537 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5538 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5539 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5540 UNSPEC_ST2))]
5541 "TARGET_SIMD"
5542 "st1\\t{%S1.1d - %T1.1d}, %0"
5543 [(set_attr "type" "neon_store1_2reg")]
5544 )
5545
5546 (define_insn "aarch64_st3<mode>_dreg"
5547 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5548 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5549 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5550 UNSPEC_ST3))]
5551 "TARGET_SIMD"
5552 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5553 [(set_attr "type" "neon_store3_3reg")]
5554 )
5555
5556 (define_insn "aarch64_st3<mode>_dreg"
5557 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5558 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5559 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5560 UNSPEC_ST3))]
5561 "TARGET_SIMD"
5562 "st1\\t{%S1.1d - %U1.1d}, %0"
5563 [(set_attr "type" "neon_store1_3reg")]
5564 )
5565
5566 (define_insn "aarch64_st4<mode>_dreg"
5567 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5568 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5569 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5570 UNSPEC_ST4))]
5571 "TARGET_SIMD"
5572 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5573 [(set_attr "type" "neon_store4_4reg")]
5574 )
5575
5576 (define_insn "aarch64_st4<mode>_dreg"
5577 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5578 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5579 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5580 UNSPEC_ST4))]
5581 "TARGET_SIMD"
5582 "st1\\t{%S1.1d - %V1.1d}, %0"
5583 [(set_attr "type" "neon_store1_4reg")]
5584 )
5585
5586 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5587 [(match_operand:DI 0 "register_operand" "r")
5588 (match_operand:VSTRUCT 1 "register_operand" "w")
5589 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5590 "TARGET_SIMD"
5591 {
5592 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5593 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5594
5595 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5596 DONE;
5597 })
5598
5599 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5600 [(match_operand:DI 0 "register_operand" "r")
5601 (match_operand:VSTRUCT 1 "register_operand" "w")
5602 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5603 "TARGET_SIMD"
5604 {
5605 machine_mode mode = <VSTRUCT:MODE>mode;
5606 rtx mem = gen_rtx_MEM (mode, operands[0]);
5607
5608 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5609 DONE;
5610 })
5611
5612 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5613 [(match_operand:DI 0 "register_operand" "r")
5614 (match_operand:VSTRUCT 1 "register_operand" "w")
5615 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5616 (match_operand:SI 2 "immediate_operand")]
5617 "TARGET_SIMD"
5618 {
5619 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5620 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5621 * <VSTRUCT:nregs>);
5622
5623 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5624 mem, operands[1], operands[2]));
5625 DONE;
5626 })
5627
5628 (define_expand "aarch64_st1<VALL_F16:mode>"
5629 [(match_operand:DI 0 "register_operand")
5630 (match_operand:VALL_F16 1 "register_operand")]
5631 "TARGET_SIMD"
5632 {
5633 machine_mode mode = <VALL_F16:MODE>mode;
5634 rtx mem = gen_rtx_MEM (mode, operands[0]);
5635
5636 if (BYTES_BIG_ENDIAN)
5637 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5638 else
5639 emit_move_insn (mem, operands[1]);
5640 DONE;
5641 })
5642
5643 ;; Expander for builtins to insert vector registers into large
5644 ;; opaque integer modes.
5645
5646 ;; Q-register list. We don't need a D-reg inserter as we zero
5647 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5648
5649 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5650 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5651 (match_operand:VSTRUCT 1 "register_operand" "0")
5652 (match_operand:VQ 2 "register_operand" "w")
5653 (match_operand:SI 3 "immediate_operand" "i")]
5654 "TARGET_SIMD"
5655 {
5656 int part = INTVAL (operands[3]);
5657 int offset = part * 16;
5658
5659 emit_move_insn (operands[0], operands[1]);
5660 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5661 operands[2]);
5662 DONE;
5663 })
5664
5665 ;; Standard pattern name vec_init<mode><Vel>.
5666
5667 (define_expand "vec_init<mode><Vel>"
5668 [(match_operand:VALL_F16 0 "register_operand" "")
5669 (match_operand 1 "" "")]
5670 "TARGET_SIMD"
5671 {
5672 aarch64_expand_vector_init (operands[0], operands[1]);
5673 DONE;
5674 })
5675
5676 (define_insn "*aarch64_simd_ld1r<mode>"
5677 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5678 (vec_duplicate:VALL_F16
5679 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5680 "TARGET_SIMD"
5681 "ld1r\\t{%0.<Vtype>}, %1"
5682 [(set_attr "type" "neon_load1_all_lanes")]
5683 )
5684
5685 (define_insn "aarch64_frecpe<mode>"
5686 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5687 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5688 UNSPEC_FRECPE))]
5689 "TARGET_SIMD"
5690 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5691 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5692 )
5693
5694 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5695 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5696 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5697 FRECP))]
5698 "TARGET_SIMD"
5699 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5700 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5701 )
5702
5703 (define_insn "aarch64_frecps<mode>"
5704 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5705 (unspec:VHSDF_HSDF
5706 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5707 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5708 UNSPEC_FRECPS))]
5709 "TARGET_SIMD"
5710 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5711 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5712 )
5713
5714 (define_insn "aarch64_urecpe<mode>"
5715 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5716 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5717 UNSPEC_URECPE))]
5718 "TARGET_SIMD"
5719 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5720 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5721
5722 ;; Standard pattern name vec_extract<mode><Vel>.
5723
5724 (define_expand "vec_extract<mode><Vel>"
5725 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5726 (match_operand:VALL_F16 1 "register_operand" "")
5727 (match_operand:SI 2 "immediate_operand" "")]
5728 "TARGET_SIMD"
5729 {
5730 emit_insn
5731 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5732 DONE;
5733 })
5734
5735 ;; aes
5736
5737 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5738 [(set (match_operand:V16QI 0 "register_operand" "=w")
5739 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5740 (match_operand:V16QI 2 "register_operand" "w")]
5741 CRYPTO_AES))]
5742 "TARGET_SIMD && TARGET_CRYPTO"
5743 "aes<aes_op>\\t%0.16b, %2.16b"
5744 [(set_attr "type" "crypto_aese")]
5745 )
5746
5747 ;; When AES/AESMC fusion is enabled we want the register allocation to
5748 ;; look like:
5749 ;; AESE Vn, _
5750 ;; AESMC Vn, Vn
5751 ;; So prefer to tie operand 1 to operand 0 when fusing.
5752
5753 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5754 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5755 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5756 CRYPTO_AESMC))]
5757 "TARGET_SIMD && TARGET_CRYPTO"
5758 "aes<aesmc_op>\\t%0.16b, %1.16b"
5759 [(set_attr "type" "crypto_aesmc")
5760 (set_attr_alternative "enabled"
5761 [(if_then_else (match_test
5762 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5763 (const_string "yes" )
5764 (const_string "no"))
5765 (const_string "yes")])]
5766 )
5767
5768 ;; sha1
5769
5770 (define_insn "aarch64_crypto_sha1hsi"
5771 [(set (match_operand:SI 0 "register_operand" "=w")
5772 (unspec:SI [(match_operand:SI 1
5773 "register_operand" "w")]
5774 UNSPEC_SHA1H))]
5775 "TARGET_SIMD && TARGET_CRYPTO"
5776 "sha1h\\t%s0, %s1"
5777 [(set_attr "type" "crypto_sha1_fast")]
5778 )
5779
5780 (define_insn "aarch64_crypto_sha1hv4si"
5781 [(set (match_operand:SI 0 "register_operand" "=w")
5782 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5783 (parallel [(const_int 0)]))]
5784 UNSPEC_SHA1H))]
5785 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5786 "sha1h\\t%s0, %s1"
5787 [(set_attr "type" "crypto_sha1_fast")]
5788 )
5789
5790 (define_insn "aarch64_be_crypto_sha1hv4si"
5791 [(set (match_operand:SI 0 "register_operand" "=w")
5792 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5793 (parallel [(const_int 3)]))]
5794 UNSPEC_SHA1H))]
5795 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5796 "sha1h\\t%s0, %s1"
5797 [(set_attr "type" "crypto_sha1_fast")]
5798 )
5799
5800 (define_insn "aarch64_crypto_sha1su1v4si"
5801 [(set (match_operand:V4SI 0 "register_operand" "=w")
5802 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5803 (match_operand:V4SI 2 "register_operand" "w")]
5804 UNSPEC_SHA1SU1))]
5805 "TARGET_SIMD && TARGET_CRYPTO"
5806 "sha1su1\\t%0.4s, %2.4s"
5807 [(set_attr "type" "crypto_sha1_fast")]
5808 )
5809
5810 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5811 [(set (match_operand:V4SI 0 "register_operand" "=w")
5812 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5813 (match_operand:SI 2 "register_operand" "w")
5814 (match_operand:V4SI 3 "register_operand" "w")]
5815 CRYPTO_SHA1))]
5816 "TARGET_SIMD && TARGET_CRYPTO"
5817 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5818 [(set_attr "type" "crypto_sha1_slow")]
5819 )
5820
5821 (define_insn "aarch64_crypto_sha1su0v4si"
5822 [(set (match_operand:V4SI 0 "register_operand" "=w")
5823 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5824 (match_operand:V4SI 2 "register_operand" "w")
5825 (match_operand:V4SI 3 "register_operand" "w")]
5826 UNSPEC_SHA1SU0))]
5827 "TARGET_SIMD && TARGET_CRYPTO"
5828 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5829 [(set_attr "type" "crypto_sha1_xor")]
5830 )
5831
5832 ;; sha256
5833
5834 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5835 [(set (match_operand:V4SI 0 "register_operand" "=w")
5836 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5837 (match_operand:V4SI 2 "register_operand" "w")
5838 (match_operand:V4SI 3 "register_operand" "w")]
5839 CRYPTO_SHA256))]
5840 "TARGET_SIMD && TARGET_CRYPTO"
5841 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5842 [(set_attr "type" "crypto_sha256_slow")]
5843 )
5844
5845 (define_insn "aarch64_crypto_sha256su0v4si"
5846 [(set (match_operand:V4SI 0 "register_operand" "=w")
5847 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5848 (match_operand:V4SI 2 "register_operand" "w")]
5849 UNSPEC_SHA256SU0))]
5850 "TARGET_SIMD &&TARGET_CRYPTO"
5851 "sha256su0\\t%0.4s, %2.4s"
5852 [(set_attr "type" "crypto_sha256_fast")]
5853 )
5854
5855 (define_insn "aarch64_crypto_sha256su1v4si"
5856 [(set (match_operand:V4SI 0 "register_operand" "=w")
5857 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5858 (match_operand:V4SI 2 "register_operand" "w")
5859 (match_operand:V4SI 3 "register_operand" "w")]
5860 UNSPEC_SHA256SU1))]
5861 "TARGET_SIMD &&TARGET_CRYPTO"
5862 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5863 [(set_attr "type" "crypto_sha256_slow")]
5864 )
5865
5866 ;; pmull
5867
5868 (define_insn "aarch64_crypto_pmulldi"
5869 [(set (match_operand:TI 0 "register_operand" "=w")
5870 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5871 (match_operand:DI 2 "register_operand" "w")]
5872 UNSPEC_PMULL))]
5873 "TARGET_SIMD && TARGET_CRYPTO"
5874 "pmull\\t%0.1q, %1.1d, %2.1d"
5875 [(set_attr "type" "crypto_pmull")]
5876 )
5877
5878 (define_insn "aarch64_crypto_pmullv2di"
5879 [(set (match_operand:TI 0 "register_operand" "=w")
5880 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5881 (match_operand:V2DI 2 "register_operand" "w")]
5882 UNSPEC_PMULL2))]
5883 "TARGET_SIMD && TARGET_CRYPTO"
5884 "pmull2\\t%0.1q, %1.2d, %2.2d"
5885 [(set_attr "type" "crypto_pmull")]
5886 )