]>
Commit | Line | Data |
---|---|---|
624d0f07 | 1 | /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) |
7adcbafe | 2 | Copyright (C) 2018-2022 Free Software Foundation, Inc. |
624d0f07 RS |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tm.h" | |
24 | #include "tree.h" | |
25 | #include "rtl.h" | |
26 | #include "tm_p.h" | |
27 | #include "memmodel.h" | |
28 | #include "insn-codes.h" | |
29 | #include "optabs.h" | |
30 | #include "recog.h" | |
31 | #include "expr.h" | |
32 | #include "basic-block.h" | |
33 | #include "function.h" | |
34 | #include "fold-const.h" | |
35 | #include "gimple.h" | |
36 | #include "gimple-iterator.h" | |
37 | #include "gimplify.h" | |
38 | #include "explow.h" | |
39 | #include "emit-rtl.h" | |
40 | #include "tree-vector-builder.h" | |
41 | #include "rtx-vector-builder.h" | |
42 | #include "vec-perm-indices.h" | |
43 | #include "aarch64-sve-builtins.h" | |
44 | #include "aarch64-sve-builtins-shapes.h" | |
45 | #include "aarch64-sve-builtins-base.h" | |
46 | #include "aarch64-sve-builtins-functions.h" | |
494bec02 | 47 | #include "ssa.h" |
624d0f07 RS |
48 | |
49 | using namespace aarch64_sve; | |
50 | ||
51 | namespace { | |
52 | ||
0a09a948 RS |
53 | /* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */ |
54 | static int | |
55 | unspec_cmla (int rot) | |
56 | { | |
57 | switch (rot) | |
58 | { | |
59 | case 0: return UNSPEC_CMLA; | |
60 | case 90: return UNSPEC_CMLA90; | |
61 | case 180: return UNSPEC_CMLA180; | |
62 | case 270: return UNSPEC_CMLA270; | |
63 | default: gcc_unreachable (); | |
64 | } | |
65 | } | |
66 | ||
67 | /* Return the UNSPEC_FCMLA* unspec for rotation amount ROT. */ | |
68 | static int | |
69 | unspec_fcmla (int rot) | |
70 | { | |
71 | switch (rot) | |
72 | { | |
73 | case 0: return UNSPEC_FCMLA; | |
74 | case 90: return UNSPEC_FCMLA90; | |
75 | case 180: return UNSPEC_FCMLA180; | |
76 | case 270: return UNSPEC_FCMLA270; | |
77 | default: gcc_unreachable (); | |
78 | } | |
79 | } | |
80 | ||
81 | /* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT. */ | |
82 | static int | |
83 | unspec_cond_fcmla (int rot) | |
84 | { | |
85 | switch (rot) | |
86 | { | |
87 | case 0: return UNSPEC_COND_FCMLA; | |
88 | case 90: return UNSPEC_COND_FCMLA90; | |
89 | case 180: return UNSPEC_COND_FCMLA180; | |
90 | case 270: return UNSPEC_COND_FCMLA270; | |
91 | default: gcc_unreachable (); | |
92 | } | |
93 | } | |
94 | ||
624d0f07 RS |
95 | /* Expand a call to svmad, or svmla after reordering its operands. |
96 | Make _m forms merge with argument MERGE_ARGNO. */ | |
97 | static rtx | |
98 | expand_mad (function_expander &e, | |
99 | unsigned int merge_argno = DEFAULT_MERGE_ARGNO) | |
100 | { | |
101 | if (e.pred == PRED_x) | |
102 | { | |
103 | insn_code icode; | |
104 | if (e.type_suffix (0).integer_p) | |
105 | icode = code_for_aarch64_pred_fma (e.vector_mode (0)); | |
106 | else | |
107 | icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0)); | |
108 | return e.use_pred_x_insn (icode); | |
109 | } | |
110 | ||
111 | insn_code icode = e.direct_optab_handler (cond_fma_optab); | |
112 | return e.use_cond_insn (icode, merge_argno); | |
113 | } | |
114 | ||
0a09a948 RS |
115 | /* Expand a call to svmla_lane or svmls_lane using floating-point unspec |
116 | UNSPEC. */ | |
117 | static rtx | |
118 | expand_mla_mls_lane (function_expander &e, int unspec) | |
119 | { | |
120 | /* Put the operands in the normal (fma ...) order, with the accumulator | |
121 | last. This fits naturally since that's also the unprinted operand | |
122 | in the asm output. */ | |
123 | e.rotate_inputs_left (0, 4); | |
124 | insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); | |
125 | return e.use_exact_insn (icode); | |
126 | } | |
127 | ||
624d0f07 RS |
128 | /* Expand a call to svmsb, or svmls after reordering its operands. |
129 | Make _m forms merge with argument MERGE_ARGNO. */ | |
130 | static rtx | |
131 | expand_msb (function_expander &e, | |
132 | unsigned int merge_argno = DEFAULT_MERGE_ARGNO) | |
133 | { | |
134 | if (e.pred == PRED_x) | |
135 | { | |
136 | insn_code icode; | |
137 | if (e.type_suffix (0).integer_p) | |
138 | icode = code_for_aarch64_pred_fnma (e.vector_mode (0)); | |
139 | else | |
140 | icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0)); | |
141 | return e.use_pred_x_insn (icode); | |
142 | } | |
143 | ||
144 | insn_code icode = e.direct_optab_handler (cond_fnma_optab); | |
145 | return e.use_cond_insn (icode, merge_argno); | |
146 | } | |
147 | ||
148 | class svabd_impl : public function_base | |
149 | { | |
150 | public: | |
151 | rtx | |
ff171cb1 | 152 | expand (function_expander &e) const override |
624d0f07 RS |
153 | { |
154 | /* The integer operations are represented as the subtraction of the | |
155 | minimum from the maximum, with the signedness of the instruction | |
156 | keyed off the signedness of the maximum operation. */ | |
157 | rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX; | |
158 | insn_code icode; | |
159 | if (e.pred == PRED_x) | |
160 | { | |
161 | if (e.type_suffix (0).integer_p) | |
162 | icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0)); | |
163 | else | |
164 | icode = code_for_aarch64_pred_abd (e.vector_mode (0)); | |
165 | return e.use_pred_x_insn (icode); | |
166 | } | |
167 | ||
168 | if (e.type_suffix (0).integer_p) | |
169 | icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0)); | |
170 | else | |
171 | icode = code_for_aarch64_cond_abd (e.vector_mode (0)); | |
172 | return e.use_cond_insn (icode); | |
173 | } | |
174 | }; | |
175 | ||
176 | /* Implements svacge, svacgt, svacle and svaclt. */ | |
177 | class svac_impl : public function_base | |
178 | { | |
179 | public: | |
f95d3d5d | 180 | CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {} |
624d0f07 RS |
181 | |
182 | rtx | |
ff171cb1 | 183 | expand (function_expander &e) const override |
624d0f07 RS |
184 | { |
185 | e.add_ptrue_hint (0, e.gp_mode (0)); | |
186 | insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0)); | |
187 | return e.use_exact_insn (icode); | |
188 | } | |
189 | ||
190 | /* The unspec code for the underlying comparison. */ | |
191 | int m_unspec; | |
192 | }; | |
193 | ||
194 | class svadda_impl : public function_base | |
195 | { | |
196 | public: | |
197 | rtx | |
ff171cb1 | 198 | expand (function_expander &e) const override |
624d0f07 RS |
199 | { |
200 | /* Put the predicate last, as required by mask_fold_left_plus_optab. */ | |
201 | e.rotate_inputs_left (0, 3); | |
202 | machine_mode mode = e.vector_mode (0); | |
203 | insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode); | |
204 | return e.use_exact_insn (icode); | |
205 | } | |
206 | }; | |
207 | ||
208 | /* Implements svadr[bhwd]. */ | |
209 | class svadr_bhwd_impl : public function_base | |
210 | { | |
211 | public: | |
f95d3d5d | 212 | CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {} |
624d0f07 RS |
213 | |
214 | rtx | |
ff171cb1 | 215 | expand (function_expander &e) const override |
624d0f07 RS |
216 | { |
217 | machine_mode mode = GET_MODE (e.args[0]); | |
218 | if (m_shift == 0) | |
219 | return e.use_exact_insn (code_for_aarch64_adr (mode)); | |
220 | ||
221 | /* Turn the access size into an extra shift argument. */ | |
222 | rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode)); | |
223 | e.args.quick_push (expand_vector_broadcast (mode, shift)); | |
224 | return e.use_exact_insn (code_for_aarch64_adr_shift (mode)); | |
225 | } | |
226 | ||
227 | /* How many bits left to shift the vector displacement. */ | |
228 | unsigned int m_shift; | |
229 | }; | |
230 | ||
624d0f07 RS |
231 | class svbic_impl : public function_base |
232 | { | |
233 | public: | |
234 | rtx | |
ff171cb1 | 235 | expand (function_expander &e) const override |
624d0f07 RS |
236 | { |
237 | /* Convert svbic of a constant into svand of its inverse. */ | |
238 | if (CONST_INT_P (e.args[2])) | |
239 | { | |
240 | machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); | |
241 | e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode); | |
242 | return e.map_to_rtx_codes (AND, AND, -1); | |
243 | } | |
244 | ||
245 | if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) | |
246 | { | |
247 | gcc_assert (e.pred == PRED_z); | |
248 | return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z); | |
249 | } | |
250 | ||
251 | if (e.pred == PRED_x) | |
252 | return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0))); | |
253 | ||
254 | return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0))); | |
255 | } | |
256 | }; | |
257 | ||
258 | /* Implements svbrkn, svbrkpa and svbrkpb. */ | |
259 | class svbrk_binary_impl : public function_base | |
260 | { | |
261 | public: | |
f95d3d5d | 262 | CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {} |
624d0f07 RS |
263 | |
264 | rtx | |
ff171cb1 | 265 | expand (function_expander &e) const override |
624d0f07 RS |
266 | { |
267 | return e.use_exact_insn (code_for_aarch64_brk (m_unspec)); | |
268 | } | |
269 | ||
270 | /* The unspec code associated with the operation. */ | |
271 | int m_unspec; | |
272 | }; | |
273 | ||
274 | /* Implements svbrka and svbrkb. */ | |
275 | class svbrk_unary_impl : public function_base | |
276 | { | |
277 | public: | |
f95d3d5d | 278 | CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {} |
624d0f07 RS |
279 | |
280 | rtx | |
ff171cb1 | 281 | expand (function_expander &e) const override |
624d0f07 RS |
282 | { |
283 | return e.use_cond_insn (code_for_aarch64_brk (m_unspec)); | |
284 | } | |
285 | ||
286 | /* The unspec code associated with the operation. */ | |
287 | int m_unspec; | |
288 | }; | |
289 | ||
290 | class svcadd_impl : public function_base | |
291 | { | |
292 | public: | |
293 | rtx | |
ff171cb1 | 294 | expand (function_expander &e) const override |
624d0f07 RS |
295 | { |
296 | /* Convert the rotation amount into a specific unspec. */ | |
0a09a948 RS |
297 | int rot = INTVAL (e.args.pop ()); |
298 | if (rot == 90) | |
299 | return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90, | |
300 | UNSPEC_COND_FCADD90); | |
301 | if (rot == 270) | |
302 | return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270, | |
303 | UNSPEC_COND_FCADD270); | |
304 | gcc_unreachable (); | |
624d0f07 RS |
305 | } |
306 | }; | |
307 | ||
308 | /* Implements svclasta and svclastb. */ | |
309 | class svclast_impl : public quiet<function_base> | |
310 | { | |
311 | public: | |
f95d3d5d | 312 | CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {} |
624d0f07 RS |
313 | |
314 | rtx | |
ff171cb1 | 315 | expand (function_expander &e) const override |
624d0f07 RS |
316 | { |
317 | /* Match the fold_extract_optab order. */ | |
318 | std::swap (e.args[0], e.args[1]); | |
319 | machine_mode mode = e.vector_mode (0); | |
320 | insn_code icode; | |
321 | if (e.mode_suffix_id == MODE_n) | |
322 | icode = code_for_fold_extract (m_unspec, mode); | |
323 | else | |
324 | icode = code_for_aarch64_fold_extract_vector (m_unspec, mode); | |
325 | return e.use_exact_insn (icode); | |
326 | } | |
327 | ||
328 | /* The unspec code associated with the operation. */ | |
329 | int m_unspec; | |
330 | }; | |
331 | ||
332 | class svcmla_impl : public function_base | |
333 | { | |
334 | public: | |
335 | rtx | |
ff171cb1 | 336 | expand (function_expander &e) const override |
624d0f07 RS |
337 | { |
338 | /* Convert the rotation amount into a specific unspec. */ | |
0a09a948 RS |
339 | int rot = INTVAL (e.args.pop ()); |
340 | if (e.type_suffix (0).float_p) | |
341 | { | |
342 | /* Make the operand order the same as the one used by the fma optabs, | |
343 | with the accumulator last. */ | |
344 | e.rotate_inputs_left (1, 4); | |
345 | return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3); | |
346 | } | |
347 | else | |
348 | { | |
349 | int cmla = unspec_cmla (rot); | |
350 | return e.map_to_unspecs (cmla, cmla, -1); | |
351 | } | |
624d0f07 RS |
352 | } |
353 | }; | |
354 | ||
355 | class svcmla_lane_impl : public function_base | |
356 | { | |
357 | public: | |
358 | rtx | |
ff171cb1 | 359 | expand (function_expander &e) const override |
624d0f07 RS |
360 | { |
361 | /* Convert the rotation amount into a specific unspec. */ | |
0a09a948 RS |
362 | int rot = INTVAL (e.args.pop ()); |
363 | machine_mode mode = e.vector_mode (0); | |
364 | if (e.type_suffix (0).float_p) | |
365 | { | |
366 | /* Make the operand order the same as the one used by the fma optabs, | |
367 | with the accumulator last. */ | |
368 | e.rotate_inputs_left (0, 4); | |
369 | insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode); | |
370 | return e.use_exact_insn (icode); | |
371 | } | |
372 | else | |
373 | { | |
374 | insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode); | |
375 | return e.use_exact_insn (icode); | |
376 | } | |
624d0f07 RS |
377 | } |
378 | }; | |
379 | ||
380 | /* Implements svcmp<cc> (except svcmpuo, which is handled separately). */ | |
381 | class svcmp_impl : public function_base | |
382 | { | |
383 | public: | |
f95d3d5d | 384 | CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp) |
624d0f07 RS |
385 | : m_code (code), m_unspec_for_fp (unspec_for_fp) {} |
386 | ||
0435b10d | 387 | gimple * |
ff171cb1 | 388 | fold (gimple_folder &f) const override |
0435b10d RS |
389 | { |
390 | tree pg = gimple_call_arg (f.call, 0); | |
391 | tree rhs1 = gimple_call_arg (f.call, 1); | |
392 | tree rhs2 = gimple_call_arg (f.call, 2); | |
393 | ||
394 | /* Convert a ptrue-predicated integer comparison into the corresponding | |
395 | gimple-level operation. */ | |
396 | if (integer_all_onesp (pg) | |
397 | && f.type_suffix (0).element_bytes == 1 | |
398 | && f.type_suffix (0).integer_p) | |
399 | { | |
400 | gimple_seq stmts = NULL; | |
401 | rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2); | |
402 | gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
403 | return gimple_build_assign (f.lhs, m_code, rhs1, rhs2); | |
404 | } | |
405 | ||
406 | return NULL; | |
407 | } | |
408 | ||
624d0f07 | 409 | rtx |
ff171cb1 | 410 | expand (function_expander &e) const override |
624d0f07 RS |
411 | { |
412 | machine_mode mode = e.vector_mode (0); | |
413 | ||
414 | /* Comparisons are UNSPEC_PRED_Z operations and so need a hint | |
415 | operand. */ | |
416 | e.add_ptrue_hint (0, e.gp_mode (0)); | |
417 | ||
418 | if (e.type_suffix (0).integer_p) | |
419 | { | |
420 | bool unsigned_p = e.type_suffix (0).unsigned_p; | |
421 | rtx_code code = get_rtx_code (m_code, unsigned_p); | |
422 | return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode)); | |
423 | } | |
424 | ||
425 | insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode); | |
426 | return e.use_exact_insn (icode); | |
427 | } | |
428 | ||
429 | /* The tree code associated with the comparison. */ | |
430 | tree_code m_code; | |
431 | ||
432 | /* The unspec code to use for floating-point comparisons. */ | |
433 | int m_unspec_for_fp; | |
434 | }; | |
435 | ||
436 | /* Implements svcmp<cc>_wide. */ | |
437 | class svcmp_wide_impl : public function_base | |
438 | { | |
439 | public: | |
f95d3d5d | 440 | CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint, |
624d0f07 RS |
441 | int unspec_for_uint) |
442 | : m_code (code), m_unspec_for_sint (unspec_for_sint), | |
443 | m_unspec_for_uint (unspec_for_uint) {} | |
444 | ||
445 | rtx | |
ff171cb1 | 446 | expand (function_expander &e) const override |
624d0f07 RS |
447 | { |
448 | machine_mode mode = e.vector_mode (0); | |
449 | bool unsigned_p = e.type_suffix (0).unsigned_p; | |
450 | rtx_code code = get_rtx_code (m_code, unsigned_p); | |
451 | ||
452 | /* Comparisons are UNSPEC_PRED_Z operations and so need a hint | |
453 | operand. */ | |
454 | e.add_ptrue_hint (0, e.gp_mode (0)); | |
455 | ||
456 | /* If the argument is a constant that the unwidened comparisons | |
457 | can handle directly, use them instead. */ | |
458 | insn_code icode = code_for_aarch64_pred_cmp (code, mode); | |
459 | rtx op2 = unwrap_const_vec_duplicate (e.args[3]); | |
460 | if (CONSTANT_P (op2) | |
461 | && insn_data[icode].operand[4].predicate (op2, DImode)) | |
462 | { | |
463 | e.args[3] = op2; | |
464 | return e.use_exact_insn (icode); | |
465 | } | |
466 | ||
467 | int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint); | |
468 | return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode)); | |
469 | } | |
470 | ||
471 | /* The tree code associated with the comparison. */ | |
472 | tree_code m_code; | |
473 | ||
474 | /* The unspec codes for signed and unsigned wide comparisons | |
475 | respectively. */ | |
476 | int m_unspec_for_sint; | |
477 | int m_unspec_for_uint; | |
478 | }; | |
479 | ||
480 | class svcmpuo_impl : public quiet<function_base> | |
481 | { | |
482 | public: | |
483 | rtx | |
ff171cb1 | 484 | expand (function_expander &e) const override |
624d0f07 RS |
485 | { |
486 | e.add_ptrue_hint (0, e.gp_mode (0)); | |
487 | return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0))); | |
488 | } | |
489 | }; | |
490 | ||
491 | class svcnot_impl : public function_base | |
492 | { | |
493 | public: | |
494 | rtx | |
ff171cb1 | 495 | expand (function_expander &e) const override |
624d0f07 RS |
496 | { |
497 | machine_mode mode = e.vector_mode (0); | |
498 | if (e.pred == PRED_x) | |
499 | { | |
500 | /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs | |
501 | a ptrue hint. */ | |
502 | e.add_ptrue_hint (0, e.gp_mode (0)); | |
503 | return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); | |
504 | } | |
505 | ||
506 | return e.use_cond_insn (code_for_cond_cnot (mode), 0); | |
507 | } | |
508 | }; | |
509 | ||
510 | /* Implements svcnt[bhwd], which count the number of elements | |
511 | in a particular vector mode. */ | |
512 | class svcnt_bhwd_impl : public function_base | |
513 | { | |
514 | public: | |
f95d3d5d | 515 | CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {} |
624d0f07 RS |
516 | |
517 | gimple * | |
ff171cb1 | 518 | fold (gimple_folder &f) const override |
624d0f07 | 519 | { |
df99e9e4 | 520 | return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode)); |
624d0f07 RS |
521 | } |
522 | ||
523 | rtx | |
ff171cb1 | 524 | expand (function_expander &) const override |
624d0f07 RS |
525 | { |
526 | return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode); | |
527 | } | |
528 | ||
529 | /* The mode of the vector associated with the [bhwd] suffix. */ | |
530 | machine_mode m_ref_mode; | |
531 | }; | |
532 | ||
533 | /* Implements svcnt[bhwd]_pat. */ | |
534 | class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl | |
535 | { | |
536 | public: | |
7bca7218 | 537 | using svcnt_bhwd_impl::svcnt_bhwd_impl; |
624d0f07 RS |
538 | |
539 | gimple * | |
ff171cb1 | 540 | fold (gimple_folder &f) const override |
624d0f07 RS |
541 | { |
542 | tree pattern_arg = gimple_call_arg (f.call, 0); | |
543 | aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); | |
544 | ||
545 | if (pattern == AARCH64_SV_ALL) | |
546 | /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */ | |
547 | return svcnt_bhwd_impl::fold (f); | |
548 | ||
549 | /* See whether we can count the number of elements in the pattern | |
550 | at compile time. */ | |
551 | unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); | |
552 | HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq); | |
553 | if (value >= 0) | |
df99e9e4 | 554 | return f.fold_to_cstu (value); |
624d0f07 RS |
555 | |
556 | return NULL; | |
557 | } | |
558 | ||
559 | rtx | |
ff171cb1 | 560 | expand (function_expander &e) const override |
624d0f07 RS |
561 | { |
562 | unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); | |
563 | e.args.quick_push (gen_int_mode (elements_per_vq, DImode)); | |
564 | e.args.quick_push (const1_rtx); | |
565 | return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat); | |
566 | } | |
567 | }; | |
568 | ||
569 | class svcntp_impl : public function_base | |
570 | { | |
571 | public: | |
572 | rtx | |
ff171cb1 | 573 | expand (function_expander &e) const override |
624d0f07 RS |
574 | { |
575 | machine_mode mode = e.vector_mode (0); | |
576 | e.add_ptrue_hint (0, mode); | |
577 | return e.use_exact_insn (code_for_aarch64_pred_cntp (mode)); | |
578 | } | |
579 | }; | |
580 | ||
624d0f07 RS |
581 | /* Implements svcreate2, svcreate3 and svcreate4. */ |
582 | class svcreate_impl : public quiet<multi_vector_function> | |
583 | { | |
584 | public: | |
7bca7218 | 585 | using quiet<multi_vector_function>::quiet; |
624d0f07 RS |
586 | |
587 | gimple * | |
ff171cb1 | 588 | fold (gimple_folder &f) const override |
624d0f07 RS |
589 | { |
590 | unsigned int nargs = gimple_call_num_args (f.call); | |
591 | tree lhs_type = TREE_TYPE (f.lhs); | |
592 | ||
593 | /* Replace the call with a clobber of the result (to prevent it from | |
594 | becoming upwards exposed) followed by stores into each individual | |
595 | vector of tuple. | |
596 | ||
597 | The fold routines expect the replacement statement to have the | |
598 | same lhs as the original call, so return the clobber statement | |
599 | rather than the final vector store. */ | |
600 | gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type)); | |
601 | ||
602 | for (unsigned int i = nargs; i-- > 0; ) | |
603 | { | |
604 | tree rhs_vector = gimple_call_arg (f.call, i); | |
605 | tree field = tuple_type_field (TREE_TYPE (f.lhs)); | |
606 | tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), | |
607 | unshare_expr (f.lhs), field, NULL_TREE); | |
608 | tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), | |
609 | lhs_array, size_int (i), | |
610 | NULL_TREE, NULL_TREE); | |
611 | gassign *assign = gimple_build_assign (lhs_vector, rhs_vector); | |
612 | gsi_insert_after (f.gsi, assign, GSI_SAME_STMT); | |
613 | } | |
614 | return clobber; | |
615 | } | |
616 | ||
617 | rtx | |
ff171cb1 | 618 | expand (function_expander &e) const override |
624d0f07 RS |
619 | { |
620 | rtx lhs_tuple = e.get_nonoverlapping_reg_target (); | |
621 | ||
622 | /* Record that LHS_TUPLE is dead before the first store. */ | |
623 | emit_clobber (lhs_tuple); | |
624 | for (unsigned int i = 0; i < e.args.length (); ++i) | |
625 | { | |
626 | /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */ | |
627 | rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]), | |
628 | lhs_tuple, GET_MODE (lhs_tuple), | |
629 | i * BYTES_PER_SVE_VECTOR); | |
630 | emit_move_insn (lhs_vector, e.args[i]); | |
631 | } | |
632 | return lhs_tuple; | |
633 | } | |
634 | }; | |
635 | ||
636 | class svcvt_impl : public function_base | |
637 | { | |
638 | public: | |
639 | rtx | |
ff171cb1 | 640 | expand (function_expander &e) const override |
624d0f07 RS |
641 | { |
642 | machine_mode mode0 = e.vector_mode (0); | |
643 | machine_mode mode1 = e.vector_mode (1); | |
644 | insn_code icode; | |
645 | /* All this complication comes from the need to select four things | |
646 | simultaneously: | |
647 | ||
648 | (1) the kind of conversion (int<-float, float<-int, float<-float) | |
649 | (2) signed vs. unsigned integers, where relevant | |
650 | (3) the predication mode, which must be the wider of the predication | |
651 | modes for MODE0 and MODE1 | |
652 | (4) the predication type (m, x or z) | |
653 | ||
654 | The only supported int<->float conversions for which the integer is | |
655 | narrower than the float are SI<->DF. It's therefore more convenient | |
656 | to handle (3) by defining two patterns for int<->float conversions: | |
657 | one in which the integer is at least as wide as the float and so | |
658 | determines the predication mode, and another single SI<->DF pattern | |
659 | in which the float's mode determines the predication mode (which is | |
660 | always VNx2BI in that case). | |
661 | ||
662 | The names of the patterns follow the optab convention of giving | |
663 | the source mode before the destination mode. */ | |
664 | if (e.type_suffix (1).integer_p) | |
665 | { | |
666 | int unspec = (e.type_suffix (1).unsigned_p | |
667 | ? UNSPEC_COND_UCVTF | |
668 | : UNSPEC_COND_SCVTF); | |
669 | if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes) | |
670 | icode = (e.pred == PRED_x | |
671 | ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0) | |
672 | : code_for_cond_nonextend (unspec, mode1, mode0)); | |
673 | else | |
674 | icode = (e.pred == PRED_x | |
675 | ? code_for_aarch64_sve_extend (unspec, mode1, mode0) | |
676 | : code_for_cond_extend (unspec, mode1, mode0)); | |
677 | } | |
678 | else | |
679 | { | |
680 | int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT | |
681 | : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU | |
682 | : UNSPEC_COND_FCVTZS); | |
683 | if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes) | |
684 | icode = (e.pred == PRED_x | |
685 | ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0) | |
686 | : code_for_cond_nontrunc (unspec, mode1, mode0)); | |
687 | else | |
688 | icode = (e.pred == PRED_x | |
689 | ? code_for_aarch64_sve_trunc (unspec, mode1, mode0) | |
690 | : code_for_cond_trunc (unspec, mode1, mode0)); | |
691 | } | |
692 | ||
693 | if (e.pred == PRED_x) | |
694 | return e.use_pred_x_insn (icode); | |
695 | return e.use_cond_insn (icode); | |
696 | } | |
697 | }; | |
698 | ||
699 | class svdot_impl : public function_base | |
700 | { | |
701 | public: | |
702 | rtx | |
ff171cb1 | 703 | expand (function_expander &e) const override |
624d0f07 RS |
704 | { |
705 | /* In the optab, the multiplication operands come before the accumulator | |
706 | operand. The optab is keyed off the multiplication mode. */ | |
707 | e.rotate_inputs_left (0, 3); | |
708 | insn_code icode | |
709 | = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab, | |
710 | 0, GET_MODE (e.args[0])); | |
711 | return e.use_unpred_insn (icode); | |
712 | } | |
713 | }; | |
714 | ||
36696774 | 715 | class svdotprod_lane_impl : public unspec_based_function_base |
624d0f07 RS |
716 | { |
717 | public: | |
7bca7218 | 718 | using unspec_based_function_base::unspec_based_function_base; |
36696774 | 719 | |
624d0f07 | 720 | rtx |
ff171cb1 | 721 | expand (function_expander &e) const override |
624d0f07 RS |
722 | { |
723 | /* Use the same ordering as the dot_prod_optab, with the | |
724 | accumulator last. */ | |
725 | e.rotate_inputs_left (0, 4); | |
36696774 | 726 | int unspec = unspec_for (e); |
624d0f07 RS |
727 | machine_mode mode = e.vector_mode (0); |
728 | return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode)); | |
729 | } | |
730 | }; | |
731 | ||
732 | class svdup_impl : public quiet<function_base> | |
733 | { | |
734 | public: | |
735 | gimple * | |
ff171cb1 | 736 | fold (gimple_folder &f) const override |
624d0f07 RS |
737 | { |
738 | tree vec_type = TREE_TYPE (f.lhs); | |
739 | tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1); | |
740 | ||
741 | if (f.pred == PRED_none || f.pred == PRED_x) | |
742 | { | |
743 | if (CONSTANT_CLASS_P (rhs)) | |
744 | { | |
745 | if (f.type_suffix (0).bool_p) | |
746 | return (tree_to_shwi (rhs) | |
747 | ? f.fold_to_ptrue () | |
748 | : f.fold_to_pfalse ()); | |
749 | ||
750 | tree rhs_vector = build_vector_from_val (vec_type, rhs); | |
751 | return gimple_build_assign (f.lhs, rhs_vector); | |
752 | } | |
753 | ||
754 | /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we | |
755 | would need to introduce an extra and unwanted conversion to | |
756 | the truth vector element type. */ | |
757 | if (!f.type_suffix (0).bool_p) | |
758 | return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs); | |
759 | } | |
760 | ||
0435b10d RS |
761 | /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>. */ |
762 | if (f.pred == PRED_z) | |
763 | { | |
764 | gimple_seq stmts = NULL; | |
765 | tree pred = f.convert_pred (stmts, vec_type, 0); | |
766 | rhs = f.force_vector (stmts, vec_type, rhs); | |
767 | gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
768 | return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs, | |
769 | build_zero_cst (vec_type)); | |
770 | } | |
771 | ||
624d0f07 RS |
772 | return NULL; |
773 | } | |
774 | ||
775 | rtx | |
ff171cb1 | 776 | expand (function_expander &e) const override |
624d0f07 RS |
777 | { |
778 | if (e.pred == PRED_none || e.pred == PRED_x) | |
779 | /* There's no benefit to using predicated instructions for _x here. */ | |
780 | return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab)); | |
781 | ||
782 | /* Model predicated svdups as a SEL in which the "true" value is | |
783 | the duplicate of the function argument and the "false" value | |
784 | is the value of inactive lanes. */ | |
785 | insn_code icode; | |
786 | machine_mode mode = e.vector_mode (0); | |
787 | if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ())) | |
788 | /* Duplicate the constant to fill a vector. The pattern optimizes | |
789 | various cases involving constant operands, falling back to SEL | |
790 | if necessary. */ | |
791 | icode = code_for_vcond_mask (mode, mode); | |
792 | else | |
793 | /* Use the pattern for selecting between a duplicated scalar | |
794 | variable and a vector fallback. */ | |
795 | icode = code_for_aarch64_sel_dup (mode); | |
796 | return e.use_vcond_mask_insn (icode); | |
797 | } | |
798 | }; | |
799 | ||
800 | class svdup_lane_impl : public quiet<function_base> | |
801 | { | |
802 | public: | |
803 | rtx | |
ff171cb1 | 804 | expand (function_expander &e) const override |
624d0f07 RS |
805 | { |
806 | /* The native DUP lane has an index range of 64 bytes. */ | |
807 | machine_mode mode = e.vector_mode (0); | |
808 | if (CONST_INT_P (e.args[1]) | |
809 | && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63)) | |
810 | return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode)); | |
811 | ||
812 | /* Treat svdup_lane as if it were svtbl_n. */ | |
813 | return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); | |
814 | } | |
815 | }; | |
816 | ||
817 | class svdupq_impl : public quiet<function_base> | |
818 | { | |
819 | public: | |
820 | gimple * | |
ff171cb1 | 821 | fold (gimple_folder &f) const override |
624d0f07 RS |
822 | { |
823 | tree vec_type = TREE_TYPE (f.lhs); | |
824 | unsigned int nargs = gimple_call_num_args (f.call); | |
825 | /* For predicates, pad out each argument so that we have one element | |
826 | per bit. */ | |
827 | unsigned int factor = (f.type_suffix (0).bool_p | |
828 | ? f.type_suffix (0).element_bytes : 1); | |
829 | tree_vector_builder builder (vec_type, nargs * factor, 1); | |
830 | for (unsigned int i = 0; i < nargs; ++i) | |
831 | { | |
832 | tree elt = gimple_call_arg (f.call, i); | |
833 | if (!CONSTANT_CLASS_P (elt)) | |
834 | return NULL; | |
835 | builder.quick_push (elt); | |
836 | for (unsigned int j = 1; j < factor; ++j) | |
837 | builder.quick_push (build_zero_cst (TREE_TYPE (vec_type))); | |
838 | } | |
839 | return gimple_build_assign (f.lhs, builder.build ()); | |
840 | } | |
841 | ||
842 | rtx | |
ff171cb1 | 843 | expand (function_expander &e) const override |
624d0f07 RS |
844 | { |
845 | machine_mode mode = e.vector_mode (0); | |
846 | unsigned int elements_per_vq = e.args.length (); | |
847 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) | |
848 | { | |
849 | /* Construct a vector of integers so that we can compare them against | |
850 | zero below. Zero vs. nonzero is the only distinction that | |
851 | matters. */ | |
852 | mode = aarch64_sve_int_mode (mode); | |
853 | for (unsigned int i = 0; i < elements_per_vq; ++i) | |
854 | e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode), | |
855 | e.args[i], QImode); | |
856 | } | |
857 | ||
858 | /* Get the 128-bit Advanced SIMD vector for this data size. */ | |
859 | scalar_mode element_mode = GET_MODE_INNER (mode); | |
860 | machine_mode vq_mode = aarch64_vq_mode (element_mode).require (); | |
861 | gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode))); | |
862 | ||
863 | /* Put the arguments into a 128-bit Advanced SIMD vector. We want | |
864 | argument N to go into architectural lane N, whereas Advanced SIMD | |
865 | vectors are loaded memory lsb to register lsb. We therefore need | |
866 | to reverse the elements for big-endian targets. */ | |
867 | rtx vq_reg = gen_reg_rtx (vq_mode); | |
868 | rtvec vec = rtvec_alloc (elements_per_vq); | |
869 | for (unsigned int i = 0; i < elements_per_vq; ++i) | |
870 | { | |
871 | unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i; | |
872 | RTVEC_ELT (vec, i) = e.args[argno]; | |
873 | } | |
874 | aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec)); | |
875 | ||
876 | /* If the result is a boolean, compare the data vector against zero. */ | |
877 | if (mode != e.vector_mode (0)) | |
878 | { | |
879 | rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg); | |
880 | return aarch64_convert_sve_data_to_pred (e.possible_target, | |
881 | e.vector_mode (0), data_dupq); | |
882 | } | |
883 | ||
884 | return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg); | |
885 | } | |
886 | }; | |
887 | ||
888 | class svdupq_lane_impl : public quiet<function_base> | |
889 | { | |
890 | public: | |
891 | rtx | |
ff171cb1 | 892 | expand (function_expander &e) const override |
624d0f07 RS |
893 | { |
894 | machine_mode mode = e.vector_mode (0); | |
895 | rtx index = e.args[1]; | |
896 | if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3)) | |
897 | { | |
898 | /* Use the .Q form of DUP, which is the native instruction for | |
899 | this function. */ | |
900 | insn_code icode = code_for_aarch64_sve_dupq_lane (mode); | |
901 | unsigned int num_indices = e.elements_per_vq (0); | |
902 | rtx indices = aarch64_gen_stepped_int_parallel | |
903 | (num_indices, INTVAL (index) * num_indices, 1); | |
904 | ||
905 | e.add_output_operand (icode); | |
906 | e.add_input_operand (icode, e.args[0]); | |
907 | e.add_fixed_operand (indices); | |
908 | return e.generate_insn (icode); | |
909 | } | |
910 | ||
911 | /* Build a .D TBL index for the pairs of doublewords that we want to | |
912 | duplicate. */ | |
913 | if (CONST_INT_P (index)) | |
914 | { | |
915 | /* The index vector is a constant. */ | |
916 | rtx_vector_builder builder (VNx2DImode, 2, 1); | |
917 | builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode)); | |
918 | builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode)); | |
919 | index = builder.build (); | |
920 | } | |
921 | else | |
922 | { | |
923 | /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec | |
924 | explicitly allows the top of the index to be dropped. */ | |
925 | index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode, | |
926 | index, const1_rtx)); | |
927 | index = expand_vector_broadcast (VNx2DImode, index); | |
928 | ||
929 | /* Get an alternating 0, 1 predicate. */ | |
930 | rtx_vector_builder builder (VNx2BImode, 2, 1); | |
931 | builder.quick_push (const0_rtx); | |
932 | builder.quick_push (constm1_rtx); | |
933 | rtx pg = force_reg (VNx2BImode, builder.build ()); | |
934 | ||
935 | /* Add one to the odd elements of the index. */ | |
936 | rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode)); | |
937 | rtx target = gen_reg_rtx (VNx2DImode); | |
938 | emit_insn (gen_cond_addvnx2di (target, pg, index, one, index)); | |
939 | index = target; | |
940 | } | |
941 | ||
942 | e.args[0] = gen_lowpart (VNx2DImode, e.args[0]); | |
943 | e.args[1] = index; | |
944 | return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di); | |
945 | } | |
946 | }; | |
947 | ||
624d0f07 RS |
948 | /* Implements svextb, svexth and svextw. */ |
949 | class svext_bhw_impl : public function_base | |
950 | { | |
951 | public: | |
f95d3d5d | 952 | CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode) |
624d0f07 RS |
953 | : m_from_mode (from_mode) {} |
954 | ||
955 | rtx | |
ff171cb1 | 956 | expand (function_expander &e) const override |
624d0f07 RS |
957 | { |
958 | if (e.type_suffix (0).unsigned_p) | |
959 | { | |
960 | /* Convert to an AND. The widest we go is 0xffffffff, which fits | |
961 | in a CONST_INT. */ | |
962 | e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode))); | |
963 | if (e.pred == PRED_m) | |
964 | /* We now have arguments "(inactive, pg, op, mask)". Convert this | |
965 | to "(pg, op, mask, inactive)" so that the order matches svand_m | |
966 | with an extra argument on the end. Take the inactive elements | |
967 | from this extra argument. */ | |
968 | e.rotate_inputs_left (0, 4); | |
969 | return e.map_to_rtx_codes (AND, AND, -1, 3); | |
970 | } | |
971 | ||
972 | machine_mode wide_mode = e.vector_mode (0); | |
973 | poly_uint64 nunits = GET_MODE_NUNITS (wide_mode); | |
974 | machine_mode narrow_mode | |
975 | = aarch64_sve_data_mode (m_from_mode, nunits).require (); | |
976 | if (e.pred == PRED_x) | |
977 | { | |
978 | insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode); | |
979 | return e.use_pred_x_insn (icode); | |
980 | } | |
981 | ||
982 | insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode); | |
983 | return e.use_cond_insn (icode); | |
984 | } | |
985 | ||
986 | /* The element mode that we're extending from. */ | |
987 | scalar_int_mode m_from_mode; | |
988 | }; | |
989 | ||
990 | /* Implements svget2, svget3 and svget4. */ | |
991 | class svget_impl : public quiet<multi_vector_function> | |
992 | { | |
993 | public: | |
7bca7218 | 994 | using quiet<multi_vector_function>::quiet; |
624d0f07 RS |
995 | |
996 | gimple * | |
ff171cb1 | 997 | fold (gimple_folder &f) const override |
624d0f07 RS |
998 | { |
999 | /* Fold into a normal gimple component access. */ | |
1000 | tree rhs_tuple = gimple_call_arg (f.call, 0); | |
1001 | tree index = gimple_call_arg (f.call, 1); | |
1002 | tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); | |
1003 | tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), | |
1004 | rhs_tuple, field, NULL_TREE); | |
1005 | tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs), | |
1006 | rhs_array, index, NULL_TREE, NULL_TREE); | |
1007 | return gimple_build_assign (f.lhs, rhs_vector); | |
1008 | } | |
1009 | ||
1010 | rtx | |
ff171cb1 | 1011 | expand (function_expander &e) const override |
624d0f07 RS |
1012 | { |
1013 | /* Fold the access into a subreg rvalue. */ | |
1014 | return simplify_gen_subreg (e.vector_mode (0), e.args[0], | |
1015 | GET_MODE (e.args[0]), | |
1016 | INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR); | |
1017 | } | |
1018 | }; | |
1019 | ||
1020 | class svindex_impl : public function_base | |
1021 | { | |
1022 | public: | |
1023 | rtx | |
ff171cb1 | 1024 | expand (function_expander &e) const override |
624d0f07 RS |
1025 | { |
1026 | return e.use_exact_insn (e.direct_optab_handler (vec_series_optab)); | |
1027 | } | |
1028 | }; | |
1029 | ||
1030 | class svinsr_impl : public quiet<function_base> | |
1031 | { | |
1032 | public: | |
1033 | gimple * | |
ff171cb1 | 1034 | fold (gimple_folder &f) const override |
624d0f07 RS |
1035 | { |
1036 | gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2, | |
1037 | gimple_call_arg (f.call, 0), | |
1038 | gimple_call_arg (f.call, 1)); | |
1039 | gimple_call_set_lhs (new_call, f.lhs); | |
1040 | return new_call; | |
1041 | } | |
1042 | ||
1043 | rtx | |
ff171cb1 | 1044 | expand (function_expander &e) const override |
624d0f07 RS |
1045 | { |
1046 | insn_code icode = direct_optab_handler (vec_shl_insert_optab, | |
1047 | e.vector_mode (0)); | |
1048 | return e.use_exact_insn (icode); | |
1049 | } | |
1050 | }; | |
1051 | ||
1052 | /* Implements svlasta and svlastb. */ | |
1053 | class svlast_impl : public quiet<function_base> | |
1054 | { | |
1055 | public: | |
f95d3d5d | 1056 | CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {} |
624d0f07 RS |
1057 | |
1058 | rtx | |
ff171cb1 | 1059 | expand (function_expander &e) const override |
624d0f07 RS |
1060 | { |
1061 | return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0))); | |
1062 | } | |
1063 | ||
1064 | /* The unspec code associated with the operation. */ | |
1065 | int m_unspec; | |
1066 | }; | |
1067 | ||
1068 | class svld1_impl : public full_width_access | |
1069 | { | |
1070 | public: | |
1071 | unsigned int | |
ff171cb1 | 1072 | call_properties (const function_instance &) const override |
624d0f07 RS |
1073 | { |
1074 | return CP_READ_MEMORY; | |
1075 | } | |
1076 | ||
1077 | gimple * | |
ff171cb1 | 1078 | fold (gimple_folder &f) const override |
624d0f07 RS |
1079 | { |
1080 | tree vectype = f.vector_type (0); | |
1081 | ||
1082 | /* Get the predicate and base pointer. */ | |
1083 | gimple_seq stmts = NULL; | |
1084 | tree pred = f.convert_pred (stmts, vectype, 0); | |
1085 | tree base = f.fold_contiguous_base (stmts, vectype); | |
1086 | gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
1087 | ||
1088 | tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); | |
1089 | gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3, | |
1090 | base, cookie, pred); | |
1091 | gimple_call_set_lhs (new_call, f.lhs); | |
1092 | return new_call; | |
1093 | } | |
1094 | ||
1095 | rtx | |
ff171cb1 | 1096 | expand (function_expander &e) const override |
624d0f07 RS |
1097 | { |
1098 | insn_code icode = convert_optab_handler (maskload_optab, | |
1099 | e.vector_mode (0), e.gp_mode (0)); | |
1100 | return e.use_contiguous_load_insn (icode); | |
1101 | } | |
1102 | }; | |
1103 | ||
1104 | /* Implements extending contiguous forms of svld1. */ | |
1105 | class svld1_extend_impl : public extending_load | |
1106 | { | |
1107 | public: | |
7bca7218 | 1108 | using extending_load::extending_load; |
624d0f07 RS |
1109 | |
1110 | rtx | |
ff171cb1 | 1111 | expand (function_expander &e) const override |
624d0f07 | 1112 | { |
7bb4b7a5 | 1113 | insn_code icode = code_for_aarch64_load (UNSPEC_LD1_SVE, extend_rtx_code (), |
624d0f07 RS |
1114 | e.vector_mode (0), |
1115 | e.memory_vector_mode ()); | |
1116 | return e.use_contiguous_load_insn (icode); | |
1117 | } | |
1118 | }; | |
1119 | ||
1120 | class svld1_gather_impl : public full_width_access | |
1121 | { | |
1122 | public: | |
1123 | unsigned int | |
ff171cb1 | 1124 | call_properties (const function_instance &) const override |
624d0f07 RS |
1125 | { |
1126 | return CP_READ_MEMORY; | |
1127 | } | |
1128 | ||
1129 | rtx | |
ff171cb1 | 1130 | expand (function_expander &e) const override |
624d0f07 RS |
1131 | { |
1132 | e.prepare_gather_address_operands (1); | |
1133 | /* Put the predicate last, as required by mask_gather_load_optab. */ | |
1134 | e.rotate_inputs_left (0, 5); | |
1135 | machine_mode mem_mode = e.memory_vector_mode (); | |
09eb042a RS |
1136 | machine_mode int_mode = aarch64_sve_int_mode (mem_mode); |
1137 | insn_code icode = convert_optab_handler (mask_gather_load_optab, | |
1138 | mem_mode, int_mode); | |
624d0f07 RS |
1139 | return e.use_exact_insn (icode); |
1140 | } | |
1141 | }; | |
1142 | ||
1143 | /* Implements extending forms of svld1_gather. */ | |
1144 | class svld1_gather_extend_impl : public extending_load | |
1145 | { | |
1146 | public: | |
7bca7218 | 1147 | using extending_load::extending_load; |
624d0f07 RS |
1148 | |
1149 | rtx | |
ff171cb1 | 1150 | expand (function_expander &e) const override |
624d0f07 RS |
1151 | { |
1152 | e.prepare_gather_address_operands (1); | |
1153 | /* Put the predicate last, since the extending gathers use the same | |
1154 | operand order as mask_gather_load_optab. */ | |
1155 | e.rotate_inputs_left (0, 5); | |
87a80d27 RS |
1156 | /* Add a constant predicate for the extension rtx. */ |
1157 | e.args.quick_push (CONSTM1_RTX (VNx16BImode)); | |
624d0f07 RS |
1158 | insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (), |
1159 | e.vector_mode (0), | |
1160 | e.memory_vector_mode ()); | |
1161 | return e.use_exact_insn (icode); | |
1162 | } | |
1163 | }; | |
1164 | ||
9ceec73f | 1165 | class load_replicate : public function_base |
624d0f07 RS |
1166 | { |
1167 | public: | |
1168 | unsigned int | |
ff171cb1 | 1169 | call_properties (const function_instance &) const override |
624d0f07 RS |
1170 | { |
1171 | return CP_READ_MEMORY; | |
1172 | } | |
1173 | ||
1174 | tree | |
ff171cb1 | 1175 | memory_scalar_type (const function_instance &fi) const override |
624d0f07 RS |
1176 | { |
1177 | return fi.scalar_type (0); | |
1178 | } | |
9ceec73f | 1179 | }; |
624d0f07 | 1180 | |
9ceec73f MM |
1181 | class svld1rq_impl : public load_replicate |
1182 | { | |
1183 | public: | |
624d0f07 | 1184 | machine_mode |
ff171cb1 | 1185 | memory_vector_mode (const function_instance &fi) const override |
624d0f07 RS |
1186 | { |
1187 | return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require (); | |
1188 | } | |
1189 | ||
1190 | rtx | |
ff171cb1 | 1191 | expand (function_expander &e) const override |
624d0f07 RS |
1192 | { |
1193 | insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0)); | |
1194 | return e.use_contiguous_load_insn (icode); | |
1195 | } | |
494bec02 PK |
1196 | |
1197 | gimple * | |
1198 | fold (gimple_folder &f) const override | |
1199 | { | |
1200 | tree arg0 = gimple_call_arg (f.call, 0); | |
1201 | tree arg1 = gimple_call_arg (f.call, 1); | |
1202 | ||
1203 | /* Transform: | |
1204 | lhs = svld1rq ({-1, -1, ... }, arg1) | |
1205 | into: | |
1206 | tmp = mem_ref<vectype> [(elem * {ref-all}) arg1] | |
1207 | lhs = vec_perm_expr<tmp, tmp, {0, 1, 2, 3, ...}>. | |
1208 | on little endian target. | |
1209 | vectype is the corresponding ADVSIMD type. */ | |
1210 | ||
1211 | if (!BYTES_BIG_ENDIAN | |
1212 | && integer_all_onesp (arg0)) | |
1213 | { | |
1214 | tree lhs = gimple_call_lhs (f.call); | |
1215 | tree lhs_type = TREE_TYPE (lhs); | |
1216 | poly_uint64 lhs_len = TYPE_VECTOR_SUBPARTS (lhs_type); | |
1217 | tree eltype = TREE_TYPE (lhs_type); | |
1218 | ||
1219 | scalar_mode elmode = GET_MODE_INNER (TYPE_MODE (lhs_type)); | |
1220 | machine_mode vq_mode = aarch64_vq_mode (elmode).require (); | |
1221 | tree vectype = build_vector_type_for_mode (eltype, vq_mode); | |
1222 | ||
1223 | tree elt_ptr_type | |
1224 | = build_pointer_type_for_mode (eltype, VOIDmode, true); | |
1225 | tree zero = build_zero_cst (elt_ptr_type); | |
1226 | ||
1227 | /* Use element type alignment. */ | |
1228 | tree access_type | |
1229 | = build_aligned_type (vectype, TYPE_ALIGN (eltype)); | |
1230 | ||
1231 | tree mem_ref_lhs = make_ssa_name_fn (cfun, access_type, 0); | |
1232 | tree mem_ref_op = fold_build2 (MEM_REF, access_type, arg1, zero); | |
1233 | gimple *mem_ref_stmt | |
1234 | = gimple_build_assign (mem_ref_lhs, mem_ref_op); | |
1235 | gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT); | |
1236 | ||
1237 | int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant (); | |
1238 | vec_perm_builder sel (lhs_len, source_nelts, 1); | |
1239 | for (int i = 0; i < source_nelts; i++) | |
1240 | sel.quick_push (i); | |
1241 | ||
1242 | vec_perm_indices indices (sel, 1, source_nelts); | |
1243 | gcc_checking_assert (can_vec_perm_const_p (TYPE_MODE (lhs_type), | |
1244 | TYPE_MODE (access_type), | |
1245 | indices)); | |
1246 | tree mask_type = build_vector_type (ssizetype, lhs_len); | |
1247 | tree mask = vec_perm_indices_to_tree (mask_type, indices); | |
1248 | return gimple_build_assign (lhs, VEC_PERM_EXPR, | |
1249 | mem_ref_lhs, mem_ref_lhs, mask); | |
1250 | } | |
1251 | ||
1252 | return NULL; | |
1253 | } | |
624d0f07 RS |
1254 | }; |
1255 | ||
9ceec73f MM |
1256 | class svld1ro_impl : public load_replicate |
1257 | { | |
1258 | public: | |
1259 | machine_mode | |
ff171cb1 | 1260 | memory_vector_mode (const function_instance &) const override |
9ceec73f MM |
1261 | { |
1262 | return OImode; | |
1263 | } | |
1264 | ||
1265 | rtx | |
ff171cb1 | 1266 | expand (function_expander &e) const override |
9ceec73f MM |
1267 | { |
1268 | insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0)); | |
1269 | return e.use_contiguous_load_insn (icode); | |
1270 | } | |
1271 | }; | |
1272 | ||
624d0f07 RS |
1273 | /* Implements svld2, svld3 and svld4. */ |
1274 | class svld234_impl : public full_width_access | |
1275 | { | |
1276 | public: | |
7bca7218 | 1277 | using full_width_access::full_width_access; |
624d0f07 RS |
1278 | |
1279 | unsigned int | |
ff171cb1 | 1280 | call_properties (const function_instance &) const override |
624d0f07 RS |
1281 | { |
1282 | return CP_READ_MEMORY; | |
1283 | } | |
1284 | ||
1285 | gimple * | |
ff171cb1 | 1286 | fold (gimple_folder &f) const override |
624d0f07 RS |
1287 | { |
1288 | tree tuple_type = TREE_TYPE (f.lhs); | |
1289 | tree vectype = f.vector_type (0); | |
1290 | ||
1291 | /* Get the predicate and base pointer. */ | |
1292 | gimple_seq stmts = NULL; | |
1293 | tree pred = f.convert_pred (stmts, vectype, 0); | |
1294 | tree base = f.fold_contiguous_base (stmts, vectype); | |
1295 | gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
1296 | ||
1297 | /* Emit two statements: a clobber of the lhs, so that it isn't | |
1298 | upwards exposed, and then the load itself. | |
1299 | ||
1300 | The fold routines expect the replacement statement to have the | |
1301 | same lhs as the original call, so return the clobber statement | |
1302 | rather than the load. */ | |
1303 | gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type)); | |
1304 | ||
1305 | /* View the loaded data as an array of vectors. */ | |
1306 | tree field = tuple_type_field (tuple_type); | |
1307 | tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), | |
1308 | unshare_expr (f.lhs)); | |
1309 | ||
1310 | /* Emit the load itself. */ | |
1311 | tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); | |
1312 | gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, | |
1313 | base, cookie, pred); | |
1314 | gimple_call_set_lhs (new_call, lhs_array); | |
1315 | gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT); | |
1316 | ||
1317 | return clobber; | |
1318 | } | |
1319 | ||
1320 | rtx | |
ff171cb1 | 1321 | expand (function_expander &e) const override |
624d0f07 RS |
1322 | { |
1323 | machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr)); | |
1324 | insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab, | |
1325 | tuple_mode, e.vector_mode (0)); | |
1326 | return e.use_contiguous_load_insn (icode); | |
1327 | } | |
1328 | }; | |
1329 | ||
1330 | class svldff1_gather_impl : public full_width_access | |
1331 | { | |
1332 | public: | |
1333 | unsigned int | |
ff171cb1 | 1334 | call_properties (const function_instance &) const override |
624d0f07 RS |
1335 | { |
1336 | return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; | |
1337 | } | |
1338 | ||
1339 | rtx | |
ff171cb1 | 1340 | expand (function_expander &e) const override |
624d0f07 RS |
1341 | { |
1342 | /* See the block comment in aarch64-sve.md for details about the | |
1343 | FFR handling. */ | |
1344 | emit_insn (gen_aarch64_update_ffr_for_load ()); | |
1345 | ||
1346 | e.prepare_gather_address_operands (1); | |
1347 | /* Put the predicate last, since ldff1_gather uses the same operand | |
1348 | order as mask_gather_load_optab. */ | |
1349 | e.rotate_inputs_left (0, 5); | |
1350 | machine_mode mem_mode = e.memory_vector_mode (); | |
1351 | return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode)); | |
1352 | } | |
1353 | }; | |
1354 | ||
1355 | /* Implements extending forms of svldff1_gather. */ | |
1356 | class svldff1_gather_extend : public extending_load | |
1357 | { | |
1358 | public: | |
7bca7218 | 1359 | using extending_load::extending_load; |
624d0f07 RS |
1360 | |
1361 | rtx | |
ff171cb1 | 1362 | expand (function_expander &e) const override |
624d0f07 RS |
1363 | { |
1364 | /* See the block comment in aarch64-sve.md for details about the | |
1365 | FFR handling. */ | |
1366 | emit_insn (gen_aarch64_update_ffr_for_load ()); | |
1367 | ||
1368 | e.prepare_gather_address_operands (1); | |
1369 | /* Put the predicate last, since ldff1_gather uses the same operand | |
1370 | order as mask_gather_load_optab. */ | |
1371 | e.rotate_inputs_left (0, 5); | |
87a80d27 RS |
1372 | /* Add a constant predicate for the extension rtx. */ |
1373 | e.args.quick_push (CONSTM1_RTX (VNx16BImode)); | |
624d0f07 RS |
1374 | insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (), |
1375 | e.vector_mode (0), | |
1376 | e.memory_vector_mode ()); | |
1377 | return e.use_exact_insn (icode); | |
1378 | } | |
1379 | }; | |
1380 | ||
1381 | class svldnt1_impl : public full_width_access | |
1382 | { | |
1383 | public: | |
1384 | unsigned int | |
ff171cb1 | 1385 | call_properties (const function_instance &) const override |
624d0f07 RS |
1386 | { |
1387 | return CP_READ_MEMORY; | |
1388 | } | |
1389 | ||
1390 | rtx | |
ff171cb1 | 1391 | expand (function_expander &e) const override |
624d0f07 RS |
1392 | { |
1393 | insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0)); | |
1394 | return e.use_contiguous_load_insn (icode); | |
1395 | } | |
1396 | }; | |
1397 | ||
1398 | /* Implements svldff1 and svldnf1. */ | |
1399 | class svldxf1_impl : public full_width_access | |
1400 | { | |
1401 | public: | |
f95d3d5d | 1402 | CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {} |
624d0f07 RS |
1403 | |
1404 | unsigned int | |
ff171cb1 | 1405 | call_properties (const function_instance &) const override |
624d0f07 RS |
1406 | { |
1407 | return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; | |
1408 | } | |
1409 | ||
1410 | rtx | |
ff171cb1 | 1411 | expand (function_expander &e) const override |
624d0f07 RS |
1412 | { |
1413 | /* See the block comment in aarch64-sve.md for details about the | |
1414 | FFR handling. */ | |
1415 | emit_insn (gen_aarch64_update_ffr_for_load ()); | |
1416 | ||
1417 | machine_mode mode = e.vector_mode (0); | |
1418 | return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode)); | |
1419 | } | |
1420 | ||
1421 | /* The unspec associated with the load. */ | |
1422 | int m_unspec; | |
1423 | }; | |
1424 | ||
1425 | /* Implements extending contiguous forms of svldff1 and svldnf1. */ | |
1426 | class svldxf1_extend_impl : public extending_load | |
1427 | { | |
1428 | public: | |
f95d3d5d | 1429 | CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec) |
624d0f07 RS |
1430 | : extending_load (memory_type), m_unspec (unspec) {} |
1431 | ||
1432 | unsigned int | |
ff171cb1 | 1433 | call_properties (const function_instance &) const override |
624d0f07 RS |
1434 | { |
1435 | return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; | |
1436 | } | |
1437 | ||
1438 | rtx | |
ff171cb1 | 1439 | expand (function_expander &e) const override |
624d0f07 RS |
1440 | { |
1441 | /* See the block comment in aarch64-sve.md for details about the | |
1442 | FFR handling. */ | |
1443 | emit_insn (gen_aarch64_update_ffr_for_load ()); | |
1444 | ||
1445 | insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (), | |
1446 | e.vector_mode (0), | |
1447 | e.memory_vector_mode ()); | |
1448 | return e.use_contiguous_load_insn (icode); | |
1449 | } | |
1450 | ||
1451 | /* The unspec associated with the load. */ | |
1452 | int m_unspec; | |
1453 | }; | |
1454 | ||
1455 | class svlen_impl : public quiet<function_base> | |
1456 | { | |
1457 | public: | |
1458 | gimple * | |
ff171cb1 | 1459 | fold (gimple_folder &f) const override |
624d0f07 RS |
1460 | { |
1461 | /* The argument only exists for its type. */ | |
1462 | tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0)); | |
1463 | tree count = build_int_cstu (TREE_TYPE (f.lhs), | |
1464 | TYPE_VECTOR_SUBPARTS (rhs_type)); | |
1465 | return gimple_build_assign (f.lhs, count); | |
1466 | } | |
1467 | ||
1468 | rtx | |
ff171cb1 | 1469 | expand (function_expander &e) const override |
624d0f07 RS |
1470 | { |
1471 | /* The argument only exists for its type. */ | |
1472 | return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode); | |
1473 | } | |
1474 | }; | |
1475 | ||
1476 | class svmad_impl : public function_base | |
1477 | { | |
1478 | public: | |
1479 | rtx | |
ff171cb1 | 1480 | expand (function_expander &e) const override |
624d0f07 RS |
1481 | { |
1482 | return expand_mad (e); | |
1483 | } | |
1484 | }; | |
1485 | ||
1486 | class svmla_impl : public function_base | |
1487 | { | |
1488 | public: | |
1489 | rtx | |
ff171cb1 | 1490 | expand (function_expander &e) const override |
624d0f07 RS |
1491 | { |
1492 | /* Put the accumulator at the end (argument 3), but keep it as the | |
1493 | merge input for _m functions. */ | |
1494 | e.rotate_inputs_left (1, 4); | |
1495 | return expand_mad (e, 3); | |
1496 | } | |
1497 | }; | |
1498 | ||
0a09a948 | 1499 | class svmla_lane_impl : public function_base |
624d0f07 RS |
1500 | { |
1501 | public: | |
624d0f07 | 1502 | rtx |
ff171cb1 | 1503 | expand (function_expander &e) const override |
624d0f07 | 1504 | { |
0a09a948 RS |
1505 | if (e.type_suffix (0).integer_p) |
1506 | { | |
1507 | machine_mode mode = e.vector_mode (0); | |
1508 | return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode)); | |
1509 | } | |
1510 | return expand_mla_mls_lane (e, UNSPEC_FMLA); | |
624d0f07 | 1511 | } |
624d0f07 RS |
1512 | }; |
1513 | ||
1514 | class svmls_impl : public function_base | |
1515 | { | |
1516 | public: | |
1517 | rtx | |
ff171cb1 | 1518 | expand (function_expander &e) const override |
624d0f07 RS |
1519 | { |
1520 | /* Put the accumulator at the end (argument 3), but keep it as the | |
1521 | merge input for _m functions. */ | |
1522 | e.rotate_inputs_left (1, 4); | |
1523 | return expand_msb (e, 3); | |
1524 | } | |
1525 | }; | |
1526 | ||
1527 | class svmov_impl : public function_base | |
1528 | { | |
1529 | public: | |
1530 | gimple * | |
ff171cb1 | 1531 | fold (gimple_folder &f) const override |
624d0f07 RS |
1532 | { |
1533 | return gimple_build_assign (f.lhs, BIT_AND_EXPR, | |
1534 | gimple_call_arg (f.call, 0), | |
1535 | gimple_call_arg (f.call, 1)); | |
1536 | } | |
1537 | ||
1538 | rtx | |
ff171cb1 | 1539 | expand (function_expander &e) const override |
624d0f07 RS |
1540 | { |
1541 | /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B" | |
1542 | is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */ | |
1543 | gcc_assert (e.pred == PRED_z); | |
1544 | e.args.quick_push (e.args[1]); | |
1545 | return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z); | |
1546 | } | |
1547 | }; | |
1548 | ||
0a09a948 RS |
1549 | class svmls_lane_impl : public function_base |
1550 | { | |
1551 | public: | |
1552 | rtx | |
ff171cb1 | 1553 | expand (function_expander &e) const override |
0a09a948 RS |
1554 | { |
1555 | if (e.type_suffix (0).integer_p) | |
1556 | { | |
1557 | machine_mode mode = e.vector_mode (0); | |
1558 | return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode)); | |
1559 | } | |
1560 | return expand_mla_mls_lane (e, UNSPEC_FMLS); | |
1561 | } | |
1562 | }; | |
1563 | ||
36696774 RS |
1564 | class svmmla_impl : public function_base |
1565 | { | |
1566 | public: | |
1567 | rtx | |
ff171cb1 | 1568 | expand (function_expander &e) const override |
36696774 RS |
1569 | { |
1570 | insn_code icode; | |
1571 | if (e.type_suffix (0).integer_p) | |
1572 | { | |
1573 | if (e.type_suffix (0).unsigned_p) | |
1574 | icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0)); | |
1575 | else | |
1576 | icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0)); | |
1577 | } | |
1578 | else | |
1579 | icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0)); | |
1580 | return e.use_exact_insn (icode); | |
1581 | } | |
1582 | }; | |
1583 | ||
624d0f07 RS |
1584 | class svmsb_impl : public function_base |
1585 | { | |
1586 | public: | |
1587 | rtx | |
ff171cb1 | 1588 | expand (function_expander &e) const override |
624d0f07 RS |
1589 | { |
1590 | return expand_msb (e); | |
1591 | } | |
1592 | }; | |
1593 | ||
624d0f07 RS |
1594 | class svnand_impl : public function_base |
1595 | { | |
1596 | public: | |
1597 | rtx | |
ff171cb1 | 1598 | expand (function_expander &e) const override |
624d0f07 RS |
1599 | { |
1600 | gcc_assert (e.pred == PRED_z); | |
1601 | return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z); | |
1602 | } | |
1603 | }; | |
1604 | ||
1605 | class svnor_impl : public function_base | |
1606 | { | |
1607 | public: | |
1608 | rtx | |
ff171cb1 | 1609 | expand (function_expander &e) const override |
624d0f07 RS |
1610 | { |
1611 | gcc_assert (e.pred == PRED_z); | |
1612 | return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z); | |
1613 | } | |
1614 | }; | |
1615 | ||
1616 | class svnot_impl : public rtx_code_function | |
1617 | { | |
1618 | public: | |
f95d3d5d | 1619 | CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {} |
624d0f07 RS |
1620 | |
1621 | rtx | |
ff171cb1 | 1622 | expand (function_expander &e) const override |
624d0f07 RS |
1623 | { |
1624 | if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) | |
1625 | { | |
1626 | /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B" | |
1627 | is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */ | |
1628 | gcc_assert (e.pred == PRED_z); | |
1629 | e.args.quick_insert (1, e.args[0]); | |
1630 | return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z); | |
1631 | } | |
1632 | return rtx_code_function::expand (e); | |
1633 | } | |
1634 | }; | |
1635 | ||
1636 | class svorn_impl : public function_base | |
1637 | { | |
1638 | public: | |
1639 | rtx | |
ff171cb1 | 1640 | expand (function_expander &e) const override |
624d0f07 RS |
1641 | { |
1642 | gcc_assert (e.pred == PRED_z); | |
1643 | return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z); | |
1644 | } | |
1645 | }; | |
1646 | ||
1647 | class svpfalse_impl : public function_base | |
1648 | { | |
1649 | public: | |
1650 | gimple * | |
ff171cb1 | 1651 | fold (gimple_folder &f) const override |
624d0f07 RS |
1652 | { |
1653 | return f.fold_to_pfalse (); | |
1654 | } | |
1655 | ||
1656 | rtx | |
ff171cb1 | 1657 | expand (function_expander &) const override |
624d0f07 RS |
1658 | { |
1659 | return CONST0_RTX (VNx16BImode); | |
1660 | } | |
1661 | }; | |
1662 | ||
1663 | /* Implements svpfirst and svpnext, which share the same .md patterns. */ | |
1664 | class svpfirst_svpnext_impl : public function_base | |
1665 | { | |
1666 | public: | |
f95d3d5d | 1667 | CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {} |
624d0f07 RS |
1668 | |
1669 | rtx | |
ff171cb1 | 1670 | expand (function_expander &e) const override |
624d0f07 RS |
1671 | { |
1672 | machine_mode mode = e.vector_mode (0); | |
1673 | e.add_ptrue_hint (0, mode); | |
1674 | return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode)); | |
1675 | } | |
1676 | ||
1677 | /* The unspec associated with the operation. */ | |
1678 | int m_unspec; | |
1679 | }; | |
1680 | ||
1681 | /* Implements contiguous forms of svprf[bhwd]. */ | |
1682 | class svprf_bhwd_impl : public function_base | |
1683 | { | |
1684 | public: | |
f95d3d5d | 1685 | CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {} |
624d0f07 RS |
1686 | |
1687 | unsigned int | |
ff171cb1 | 1688 | call_properties (const function_instance &) const override |
624d0f07 RS |
1689 | { |
1690 | return CP_PREFETCH_MEMORY; | |
1691 | } | |
1692 | ||
1693 | rtx | |
ff171cb1 | 1694 | expand (function_expander &e) const override |
624d0f07 RS |
1695 | { |
1696 | e.prepare_prefetch_operands (); | |
1697 | insn_code icode = code_for_aarch64_sve_prefetch (m_mode); | |
1698 | return e.use_contiguous_prefetch_insn (icode); | |
1699 | } | |
1700 | ||
1701 | /* The mode that we'd use to hold one vector of prefetched data. */ | |
1702 | machine_mode m_mode; | |
1703 | }; | |
1704 | ||
1705 | /* Implements svprf[bhwd]_gather. */ | |
1706 | class svprf_bhwd_gather_impl : public function_base | |
1707 | { | |
1708 | public: | |
f95d3d5d | 1709 | CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {} |
624d0f07 RS |
1710 | |
1711 | unsigned int | |
ff171cb1 | 1712 | call_properties (const function_instance &) const override |
624d0f07 RS |
1713 | { |
1714 | return CP_PREFETCH_MEMORY; | |
1715 | } | |
1716 | ||
1717 | machine_mode | |
ff171cb1 | 1718 | memory_vector_mode (const function_instance &) const override |
624d0f07 RS |
1719 | { |
1720 | return m_mode; | |
1721 | } | |
1722 | ||
1723 | rtx | |
ff171cb1 | 1724 | expand (function_expander &e) const override |
624d0f07 RS |
1725 | { |
1726 | e.prepare_prefetch_operands (); | |
1727 | e.prepare_gather_address_operands (1); | |
1728 | ||
1729 | /* Insert a zero operand to identify the mode of the memory being | |
1730 | accessed. This goes between the gather operands and prefetch | |
1731 | operands created above. */ | |
1732 | e.args.quick_insert (5, CONST0_RTX (m_mode)); | |
1733 | ||
1734 | machine_mode reg_mode = GET_MODE (e.args[2]); | |
1735 | insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode); | |
1736 | return e.use_exact_insn (icode); | |
1737 | } | |
1738 | ||
1739 | /* The mode that we'd use to hold one vector of prefetched data. */ | |
1740 | machine_mode m_mode; | |
1741 | }; | |
1742 | ||
1743 | /* Implements svptest_any, svptest_first and svptest_last. */ | |
1744 | class svptest_impl : public function_base | |
1745 | { | |
1746 | public: | |
f95d3d5d | 1747 | CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {} |
624d0f07 RS |
1748 | |
1749 | rtx | |
ff171cb1 | 1750 | expand (function_expander &e) const override |
624d0f07 RS |
1751 | { |
1752 | /* See whether GP is an exact ptrue for some predicate mode; | |
1753 | i.e. whether converting the GP to that mode will not drop | |
1754 | set bits and will leave all significant bits set. */ | |
1755 | machine_mode wide_mode; | |
1756 | int hint; | |
1757 | if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode)) | |
1758 | hint = SVE_KNOWN_PTRUE; | |
1759 | else | |
1760 | { | |
1761 | hint = SVE_MAYBE_NOT_PTRUE; | |
1762 | wide_mode = VNx16BImode; | |
1763 | } | |
1764 | ||
1765 | /* Generate the PTEST itself. */ | |
1766 | rtx pg = force_reg (VNx16BImode, e.args[0]); | |
1767 | rtx wide_pg = gen_lowpart (wide_mode, pg); | |
1768 | rtx hint_rtx = gen_int_mode (hint, DImode); | |
1769 | rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1])); | |
1770 | emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op)); | |
1771 | ||
1772 | /* Get the location of the boolean result. We can provide SImode and | |
1773 | DImode values directly; rely on generic code to convert others. */ | |
1774 | rtx target = e.possible_target; | |
1775 | if (!target | |
1776 | || !REG_P (target) | |
1777 | || (GET_MODE (target) != SImode && GET_MODE (target) != DImode)) | |
1778 | target = gen_reg_rtx (DImode); | |
1779 | ||
1780 | /* Generate a CSET to convert the CC result of the PTEST to a boolean. */ | |
1781 | rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); | |
1782 | rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target), | |
1783 | cc_reg, const0_rtx); | |
1784 | emit_insn (gen_rtx_SET (target, compare)); | |
1785 | return target; | |
1786 | } | |
1787 | ||
1788 | /* The comparison code associated with ptest condition. */ | |
1789 | rtx_code m_compare; | |
1790 | }; | |
1791 | ||
1792 | class svptrue_impl : public function_base | |
1793 | { | |
1794 | public: | |
1795 | gimple * | |
ff171cb1 | 1796 | fold (gimple_folder &f) const override |
624d0f07 RS |
1797 | { |
1798 | return f.fold_to_ptrue (); | |
1799 | } | |
1800 | ||
1801 | rtx | |
ff171cb1 | 1802 | expand (function_expander &e) const override |
624d0f07 RS |
1803 | { |
1804 | return aarch64_ptrue_all (e.type_suffix (0).element_bytes); | |
1805 | } | |
1806 | }; | |
1807 | ||
1808 | class svptrue_pat_impl : public function_base | |
1809 | { | |
1810 | public: | |
1811 | gimple * | |
ff171cb1 | 1812 | fold (gimple_folder &f) const override |
624d0f07 RS |
1813 | { |
1814 | tree pattern_arg = gimple_call_arg (f.call, 0); | |
1815 | aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); | |
1816 | ||
1817 | if (pattern == AARCH64_SV_ALL) | |
1818 | /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */ | |
1819 | return f.fold_to_ptrue (); | |
1820 | ||
1821 | /* See whether we can count the number of elements in the pattern | |
1822 | at compile time. If so, construct a predicate with that number | |
1823 | of 1s followed by all 0s. */ | |
1824 | int nelts_per_vq = f.elements_per_vq (0); | |
1825 | HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq); | |
1826 | if (value >= 0) | |
1827 | return f.fold_to_vl_pred (value); | |
1828 | ||
1829 | return NULL; | |
1830 | } | |
1831 | ||
1832 | rtx | |
ff171cb1 | 1833 | expand (function_expander &e) const override |
624d0f07 RS |
1834 | { |
1835 | /* In rtl, the predicate is represented as the constant: | |
1836 | ||
1837 | (const:V16BI (unspec:V16BI [(const_int PATTERN) | |
1838 | (const_vector:VnnBI [zeros])] | |
1839 | UNSPEC_PTRUE)) | |
1840 | ||
1841 | where nn determines the element size. */ | |
1842 | rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0))); | |
1843 | return gen_rtx_CONST (VNx16BImode, | |
1844 | gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE)); | |
1845 | } | |
1846 | }; | |
1847 | ||
624d0f07 RS |
1848 | /* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */ |
1849 | class svqdec_svqinc_bhwd_impl : public function_base | |
1850 | { | |
1851 | public: | |
f95d3d5d | 1852 | CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint, |
624d0f07 RS |
1853 | rtx_code code_for_uint, |
1854 | scalar_int_mode elem_mode) | |
1855 | : m_code_for_sint (code_for_sint), | |
1856 | m_code_for_uint (code_for_uint), | |
1857 | m_elem_mode (elem_mode) | |
1858 | {} | |
1859 | ||
1860 | rtx | |
ff171cb1 | 1861 | expand (function_expander &e) const override |
624d0f07 RS |
1862 | { |
1863 | /* Treat non-_pat functions in the same way as _pat functions with | |
1864 | an SV_ALL argument. */ | |
1865 | if (e.args.length () == 2) | |
1866 | e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode)); | |
1867 | ||
1868 | /* Insert the number of elements per 128-bit block as a fake argument, | |
1869 | between the pattern and the multiplier. Arguments 1, 2 and 3 then | |
1870 | correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see | |
1871 | aarch64_sve_cnt_pat for details. */ | |
1872 | unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode); | |
1873 | e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode)); | |
1874 | ||
1875 | rtx_code code = (e.type_suffix (0).unsigned_p | |
1876 | ? m_code_for_uint | |
1877 | : m_code_for_sint); | |
1878 | ||
1879 | /* Choose between operating on integer scalars or integer vectors. */ | |
1880 | machine_mode mode = e.vector_mode (0); | |
1881 | if (e.mode_suffix_id == MODE_n) | |
1882 | mode = GET_MODE_INNER (mode); | |
1883 | return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode)); | |
1884 | } | |
1885 | ||
1886 | /* The saturating addition or subtraction codes to use for signed and | |
1887 | unsigned values respectively. */ | |
1888 | rtx_code m_code_for_sint; | |
1889 | rtx_code m_code_for_uint; | |
1890 | ||
1891 | /* The integer mode associated with the [bhwd] suffix. */ | |
1892 | scalar_int_mode m_elem_mode; | |
1893 | }; | |
1894 | ||
1895 | /* Implements svqdec[bhwd]{,_pat}. */ | |
1896 | class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl | |
1897 | { | |
1898 | public: | |
f95d3d5d | 1899 | CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode) |
624d0f07 RS |
1900 | : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {} |
1901 | }; | |
1902 | ||
1903 | /* Implements svqinc[bhwd]{,_pat}. */ | |
1904 | class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl | |
1905 | { | |
1906 | public: | |
f95d3d5d | 1907 | CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode) |
624d0f07 RS |
1908 | : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {} |
1909 | }; | |
1910 | ||
1911 | /* Implements svqdecp and svqincp. */ | |
1912 | class svqdecp_svqincp_impl : public function_base | |
1913 | { | |
1914 | public: | |
f95d3d5d | 1915 | CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint, |
624d0f07 RS |
1916 | rtx_code code_for_uint) |
1917 | : m_code_for_sint (code_for_sint), | |
1918 | m_code_for_uint (code_for_uint) | |
1919 | {} | |
1920 | ||
1921 | rtx | |
ff171cb1 | 1922 | expand (function_expander &e) const override |
624d0f07 RS |
1923 | { |
1924 | rtx_code code = (e.type_suffix (0).unsigned_p | |
1925 | ? m_code_for_uint | |
1926 | : m_code_for_sint); | |
1927 | insn_code icode; | |
1928 | if (e.mode_suffix_id == MODE_n) | |
1929 | { | |
1930 | /* Increment or decrement a scalar (whose mode is given by the first | |
1931 | type suffix) by the number of active elements in a predicate | |
1932 | (whose mode is given by the second type suffix). */ | |
1933 | machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); | |
1934 | icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1)); | |
1935 | } | |
1936 | else | |
1937 | /* Increment a vector by the number of active elements in a predicate, | |
1938 | with the vector mode determining the predicate mode. */ | |
1939 | icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0)); | |
1940 | return e.use_exact_insn (icode); | |
1941 | } | |
1942 | ||
1943 | /* The saturating addition or subtraction codes to use for signed and | |
1944 | unsigned values respectively. */ | |
1945 | rtx_code m_code_for_sint; | |
1946 | rtx_code m_code_for_uint; | |
1947 | }; | |
1948 | ||
624d0f07 RS |
1949 | class svrdffr_impl : public function_base |
1950 | { | |
1951 | public: | |
1952 | unsigned int | |
ff171cb1 | 1953 | call_properties (const function_instance &) const override |
624d0f07 RS |
1954 | { |
1955 | return CP_READ_FFR; | |
1956 | } | |
1957 | ||
1958 | rtx | |
ff171cb1 | 1959 | expand (function_expander &e) const override |
624d0f07 RS |
1960 | { |
1961 | /* See the block comment in aarch64-sve.md for details about the | |
1962 | FFR handling. */ | |
1963 | emit_insn (gen_aarch64_copy_ffr_to_ffrt ()); | |
1964 | rtx result = e.use_exact_insn (e.pred == PRED_z | |
1965 | ? CODE_FOR_aarch64_rdffr_z | |
1966 | : CODE_FOR_aarch64_rdffr); | |
1967 | emit_insn (gen_aarch64_update_ffrt ()); | |
1968 | return result; | |
1969 | } | |
1970 | }; | |
1971 | ||
1972 | class svreinterpret_impl : public quiet<function_base> | |
1973 | { | |
1974 | public: | |
1975 | gimple * | |
ff171cb1 | 1976 | fold (gimple_folder &f) const override |
624d0f07 RS |
1977 | { |
1978 | /* Punt to rtl if the effect of the reinterpret on registers does not | |
1979 | conform to GCC's endianness model. */ | |
1980 | if (!targetm.can_change_mode_class (f.vector_mode (0), | |
1981 | f.vector_mode (1), FP_REGS)) | |
1982 | return NULL; | |
1983 | ||
1984 | /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR | |
1985 | reinterpretation. */ | |
1986 | tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs), | |
1987 | gimple_call_arg (f.call, 0)); | |
1988 | return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs); | |
1989 | } | |
1990 | ||
1991 | rtx | |
ff171cb1 | 1992 | expand (function_expander &e) const override |
624d0f07 RS |
1993 | { |
1994 | machine_mode mode = e.vector_mode (0); | |
1995 | return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode)); | |
1996 | } | |
1997 | }; | |
1998 | ||
1999 | class svrev_impl : public permute | |
2000 | { | |
2001 | public: | |
2002 | gimple * | |
ff171cb1 | 2003 | fold (gimple_folder &f) const override |
624d0f07 RS |
2004 | { |
2005 | /* Punt for now on _b16 and wider; we'd need more complex evpc logic | |
2006 | to rerecognize the result. */ | |
2007 | if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) | |
2008 | return NULL; | |
2009 | ||
2010 | /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ | |
2011 | poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); | |
2012 | vec_perm_builder builder (nelts, 1, 3); | |
2013 | for (int i = 0; i < 3; ++i) | |
2014 | builder.quick_push (nelts - i - 1); | |
2015 | return fold_permute (f, builder); | |
2016 | } | |
2017 | ||
2018 | rtx | |
ff171cb1 | 2019 | expand (function_expander &e) const override |
624d0f07 RS |
2020 | { |
2021 | return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0))); | |
2022 | } | |
2023 | }; | |
2024 | ||
2025 | class svsel_impl : public quiet<function_base> | |
2026 | { | |
2027 | public: | |
2028 | gimple * | |
ff171cb1 | 2029 | fold (gimple_folder &f) const override |
624d0f07 RS |
2030 | { |
2031 | /* svsel corresponds exactly to VEC_COND_EXPR. */ | |
2032 | gimple_seq stmts = NULL; | |
2033 | tree pred = f.convert_pred (stmts, f.vector_type (0), 0); | |
2034 | gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
2035 | return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, | |
2036 | gimple_call_arg (f.call, 1), | |
2037 | gimple_call_arg (f.call, 2)); | |
2038 | } | |
2039 | ||
2040 | rtx | |
ff171cb1 | 2041 | expand (function_expander &e) const override |
624d0f07 RS |
2042 | { |
2043 | /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */ | |
2044 | e.rotate_inputs_left (0, 3); | |
2045 | insn_code icode = convert_optab_handler (vcond_mask_optab, | |
2046 | e.vector_mode (0), | |
2047 | e.gp_mode (0)); | |
2048 | return e.use_exact_insn (icode); | |
2049 | } | |
2050 | }; | |
2051 | ||
2052 | /* Implements svset2, svset3 and svset4. */ | |
2053 | class svset_impl : public quiet<multi_vector_function> | |
2054 | { | |
2055 | public: | |
7bca7218 | 2056 | using quiet<multi_vector_function>::quiet; |
624d0f07 RS |
2057 | |
2058 | gimple * | |
ff171cb1 | 2059 | fold (gimple_folder &f) const override |
624d0f07 RS |
2060 | { |
2061 | tree rhs_tuple = gimple_call_arg (f.call, 0); | |
2062 | tree index = gimple_call_arg (f.call, 1); | |
2063 | tree rhs_vector = gimple_call_arg (f.call, 2); | |
2064 | ||
2065 | /* Replace the call with two statements: a copy of the full tuple | |
2066 | to the call result, followed by an update of the individual vector. | |
2067 | ||
2068 | The fold routines expect the replacement statement to have the | |
2069 | same lhs as the original call, so return the copy statement | |
2070 | rather than the field update. */ | |
2071 | gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple); | |
2072 | ||
2073 | /* Get a reference to the individual vector. */ | |
2074 | tree field = tuple_type_field (TREE_TYPE (f.lhs)); | |
2075 | tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), | |
2076 | f.lhs, field, NULL_TREE); | |
2077 | tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), | |
2078 | lhs_array, index, NULL_TREE, NULL_TREE); | |
2079 | gassign *update = gimple_build_assign (lhs_vector, rhs_vector); | |
2080 | gsi_insert_after (f.gsi, update, GSI_SAME_STMT); | |
2081 | ||
2082 | return copy; | |
2083 | } | |
2084 | ||
2085 | rtx | |
ff171cb1 | 2086 | expand (function_expander &e) const override |
624d0f07 RS |
2087 | { |
2088 | rtx rhs_tuple = e.args[0]; | |
2089 | unsigned int index = INTVAL (e.args[1]); | |
2090 | rtx rhs_vector = e.args[2]; | |
2091 | ||
2092 | /* First copy the full tuple to the target register. */ | |
2093 | rtx lhs_tuple = e.get_nonoverlapping_reg_target (); | |
2094 | emit_move_insn (lhs_tuple, rhs_tuple); | |
2095 | ||
2096 | /* ...then update the individual vector. */ | |
2097 | rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector), | |
2098 | lhs_tuple, GET_MODE (lhs_tuple), | |
2099 | index * BYTES_PER_SVE_VECTOR); | |
2100 | emit_move_insn (lhs_vector, rhs_vector); | |
2101 | return lhs_vector; | |
2102 | } | |
2103 | }; | |
2104 | ||
2105 | class svsetffr_impl : public function_base | |
2106 | { | |
2107 | public: | |
2108 | unsigned int | |
ff171cb1 | 2109 | call_properties (const function_instance &) const override |
624d0f07 RS |
2110 | { |
2111 | return CP_WRITE_FFR; | |
2112 | } | |
2113 | ||
2114 | rtx | |
ff171cb1 | 2115 | expand (function_expander &e) const override |
624d0f07 RS |
2116 | { |
2117 | e.args.quick_push (CONSTM1_RTX (VNx16BImode)); | |
2118 | return e.use_exact_insn (CODE_FOR_aarch64_wrffr); | |
2119 | } | |
2120 | }; | |
2121 | ||
624d0f07 RS |
2122 | class svst1_impl : public full_width_access |
2123 | { | |
2124 | public: | |
2125 | unsigned int | |
ff171cb1 | 2126 | call_properties (const function_instance &) const override |
624d0f07 RS |
2127 | { |
2128 | return CP_WRITE_MEMORY; | |
2129 | } | |
2130 | ||
2131 | gimple * | |
ff171cb1 | 2132 | fold (gimple_folder &f) const override |
624d0f07 RS |
2133 | { |
2134 | tree vectype = f.vector_type (0); | |
2135 | ||
2136 | /* Get the predicate and base pointer. */ | |
2137 | gimple_seq stmts = NULL; | |
2138 | tree pred = f.convert_pred (stmts, vectype, 0); | |
2139 | tree base = f.fold_contiguous_base (stmts, vectype); | |
2140 | gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
2141 | ||
2142 | tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); | |
2143 | tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1); | |
2144 | return gimple_build_call_internal (IFN_MASK_STORE, 4, | |
2145 | base, cookie, pred, rhs); | |
2146 | } | |
2147 | ||
2148 | rtx | |
ff171cb1 | 2149 | expand (function_expander &e) const override |
624d0f07 RS |
2150 | { |
2151 | insn_code icode = convert_optab_handler (maskstore_optab, | |
2152 | e.vector_mode (0), e.gp_mode (0)); | |
2153 | return e.use_contiguous_store_insn (icode); | |
2154 | } | |
2155 | }; | |
2156 | ||
2157 | class svst1_scatter_impl : public full_width_access | |
2158 | { | |
2159 | public: | |
2160 | unsigned int | |
ff171cb1 | 2161 | call_properties (const function_instance &) const override |
624d0f07 RS |
2162 | { |
2163 | return CP_WRITE_MEMORY; | |
2164 | } | |
2165 | ||
2166 | rtx | |
ff171cb1 | 2167 | expand (function_expander &e) const override |
624d0f07 RS |
2168 | { |
2169 | e.prepare_gather_address_operands (1); | |
2170 | /* Put the predicate last, as required by mask_scatter_store_optab. */ | |
2171 | e.rotate_inputs_left (0, 6); | |
09eb042a RS |
2172 | machine_mode mem_mode = e.memory_vector_mode (); |
2173 | machine_mode int_mode = aarch64_sve_int_mode (mem_mode); | |
2174 | insn_code icode = convert_optab_handler (mask_scatter_store_optab, | |
2175 | mem_mode, int_mode); | |
624d0f07 RS |
2176 | return e.use_exact_insn (icode); |
2177 | } | |
2178 | }; | |
2179 | ||
2180 | /* Implements truncating forms of svst1_scatter. */ | |
2181 | class svst1_scatter_truncate_impl : public truncating_store | |
2182 | { | |
2183 | public: | |
7bca7218 | 2184 | using truncating_store::truncating_store; |
624d0f07 RS |
2185 | |
2186 | rtx | |
ff171cb1 | 2187 | expand (function_expander &e) const override |
624d0f07 RS |
2188 | { |
2189 | e.prepare_gather_address_operands (1); | |
2190 | /* Put the predicate last, since the truncating scatters use the same | |
2191 | operand order as mask_scatter_store_optab. */ | |
2192 | e.rotate_inputs_left (0, 6); | |
2193 | insn_code icode = code_for_aarch64_scatter_store_trunc | |
2194 | (e.memory_vector_mode (), e.vector_mode (0)); | |
2195 | return e.use_exact_insn (icode); | |
2196 | } | |
2197 | }; | |
2198 | ||
2199 | /* Implements truncating contiguous forms of svst1. */ | |
2200 | class svst1_truncate_impl : public truncating_store | |
2201 | { | |
2202 | public: | |
7bca7218 | 2203 | using truncating_store::truncating_store; |
624d0f07 RS |
2204 | |
2205 | rtx | |
ff171cb1 | 2206 | expand (function_expander &e) const override |
624d0f07 RS |
2207 | { |
2208 | insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (), | |
2209 | e.vector_mode (0)); | |
2210 | return e.use_contiguous_store_insn (icode); | |
2211 | } | |
2212 | }; | |
2213 | ||
2214 | /* Implements svst2, svst3 and svst4. */ | |
2215 | class svst234_impl : public full_width_access | |
2216 | { | |
2217 | public: | |
7bca7218 | 2218 | using full_width_access::full_width_access; |
624d0f07 RS |
2219 | |
2220 | unsigned int | |
ff171cb1 | 2221 | call_properties (const function_instance &) const override |
624d0f07 RS |
2222 | { |
2223 | return CP_WRITE_MEMORY; | |
2224 | } | |
2225 | ||
2226 | gimple * | |
ff171cb1 | 2227 | fold (gimple_folder &f) const override |
624d0f07 RS |
2228 | { |
2229 | tree vectype = f.vector_type (0); | |
2230 | ||
2231 | /* Get the predicate and base pointer. */ | |
2232 | gimple_seq stmts = NULL; | |
2233 | tree pred = f.convert_pred (stmts, vectype, 0); | |
2234 | tree base = f.fold_contiguous_base (stmts, vectype); | |
2235 | gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
2236 | ||
2237 | /* View the stored data as an array of vectors. */ | |
2238 | unsigned int num_args = gimple_call_num_args (f.call); | |
2239 | tree rhs_tuple = gimple_call_arg (f.call, num_args - 1); | |
2240 | tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); | |
2241 | tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple); | |
2242 | ||
2243 | tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); | |
2244 | return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, | |
2245 | base, cookie, pred, rhs_array); | |
2246 | } | |
2247 | ||
2248 | rtx | |
ff171cb1 | 2249 | expand (function_expander &e) const override |
624d0f07 RS |
2250 | { |
2251 | machine_mode tuple_mode = GET_MODE (e.args.last ()); | |
2252 | insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab, | |
2253 | tuple_mode, e.vector_mode (0)); | |
2254 | return e.use_contiguous_store_insn (icode); | |
2255 | } | |
2256 | }; | |
2257 | ||
2258 | class svstnt1_impl : public full_width_access | |
2259 | { | |
2260 | public: | |
2261 | unsigned int | |
ff171cb1 | 2262 | call_properties (const function_instance &) const override |
624d0f07 RS |
2263 | { |
2264 | return CP_WRITE_MEMORY; | |
2265 | } | |
2266 | ||
2267 | rtx | |
ff171cb1 | 2268 | expand (function_expander &e) const override |
624d0f07 RS |
2269 | { |
2270 | insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0)); | |
2271 | return e.use_contiguous_store_insn (icode); | |
2272 | } | |
2273 | }; | |
2274 | ||
2275 | class svsub_impl : public rtx_code_function | |
2276 | { | |
2277 | public: | |
f95d3d5d | 2278 | CONSTEXPR svsub_impl () |
624d0f07 RS |
2279 | : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {} |
2280 | ||
2281 | rtx | |
ff171cb1 | 2282 | expand (function_expander &e) const override |
624d0f07 RS |
2283 | { |
2284 | /* Canonicalize subtractions of constants to additions. */ | |
2285 | machine_mode mode = e.vector_mode (0); | |
2286 | if (e.try_negating_argument (2, mode)) | |
2287 | return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD); | |
2288 | ||
2289 | return rtx_code_function::expand (e); | |
2290 | } | |
2291 | }; | |
2292 | ||
2293 | class svtbl_impl : public permute | |
2294 | { | |
2295 | public: | |
2296 | rtx | |
ff171cb1 | 2297 | expand (function_expander &e) const override |
624d0f07 RS |
2298 | { |
2299 | return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); | |
2300 | } | |
2301 | }; | |
2302 | ||
624d0f07 RS |
2303 | /* Implements svtrn1 and svtrn2. */ |
2304 | class svtrn_impl : public binary_permute | |
2305 | { | |
2306 | public: | |
f95d3d5d | 2307 | CONSTEXPR svtrn_impl (int base) |
624d0f07 RS |
2308 | : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {} |
2309 | ||
2310 | gimple * | |
ff171cb1 | 2311 | fold (gimple_folder &f) const override |
624d0f07 RS |
2312 | { |
2313 | /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... } | |
2314 | svtrn2: as for svtrn1, but with 1 added to each index. */ | |
2315 | poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); | |
2316 | vec_perm_builder builder (nelts, 2, 3); | |
2317 | for (unsigned int i = 0; i < 3; ++i) | |
2318 | { | |
2319 | builder.quick_push (m_base + i * 2); | |
2320 | builder.quick_push (m_base + i * 2 + nelts); | |
2321 | } | |
2322 | return fold_permute (f, builder); | |
2323 | } | |
2324 | ||
2325 | /* 0 for svtrn1, 1 for svtrn2. */ | |
2326 | unsigned int m_base; | |
2327 | }; | |
2328 | ||
2329 | /* Base class for svundef{,2,3,4}. */ | |
2330 | class svundef_impl : public quiet<multi_vector_function> | |
2331 | { | |
2332 | public: | |
7bca7218 | 2333 | using quiet<multi_vector_function>::quiet; |
624d0f07 | 2334 | |
624d0f07 | 2335 | rtx |
ff171cb1 | 2336 | expand (function_expander &e) const override |
624d0f07 RS |
2337 | { |
2338 | rtx target = e.get_reg_target (); | |
2339 | emit_clobber (copy_rtx (target)); | |
2340 | return target; | |
2341 | } | |
2342 | }; | |
2343 | ||
2344 | /* Implements svunpklo and svunpkhi. */ | |
2345 | class svunpk_impl : public quiet<function_base> | |
2346 | { | |
2347 | public: | |
f95d3d5d | 2348 | CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {} |
624d0f07 RS |
2349 | |
2350 | gimple * | |
ff171cb1 | 2351 | fold (gimple_folder &f) const override |
624d0f07 RS |
2352 | { |
2353 | /* Don't fold the predicate ops, since every bit of the svbool_t | |
2354 | result is significant. */ | |
2355 | if (f.type_suffix_ids[0] == TYPE_SUFFIX_b) | |
2356 | return NULL; | |
2357 | ||
2358 | /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian | |
2359 | and VEC_UNPACK_HI_EXPR for big-endian. */ | |
2360 | bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p; | |
2361 | tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR; | |
2362 | return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0)); | |
2363 | } | |
2364 | ||
2365 | rtx | |
ff171cb1 | 2366 | expand (function_expander &e) const override |
624d0f07 RS |
2367 | { |
2368 | machine_mode mode = GET_MODE (e.args[0]); | |
2369 | unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO; | |
2370 | unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO; | |
2371 | insn_code icode; | |
2372 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) | |
2373 | icode = code_for_aarch64_sve_punpk (unpacku, mode); | |
2374 | else | |
2375 | { | |
2376 | int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks; | |
2377 | icode = code_for_aarch64_sve_unpk (unspec, unspec, mode); | |
2378 | } | |
2379 | return e.use_exact_insn (icode); | |
2380 | } | |
2381 | ||
2382 | /* True for svunpkhi, false for svunpklo. */ | |
2383 | bool m_high_p; | |
2384 | }; | |
2385 | ||
36696774 RS |
2386 | /* Also implements svsudot. */ |
2387 | class svusdot_impl : public function_base | |
2388 | { | |
2389 | public: | |
f95d3d5d | 2390 | CONSTEXPR svusdot_impl (bool su) : m_su (su) {} |
36696774 RS |
2391 | |
2392 | rtx | |
ff171cb1 | 2393 | expand (function_expander &e) const override |
36696774 RS |
2394 | { |
2395 | /* The implementation of the ACLE function svsudot (for the non-lane | |
2396 | version) is through the USDOT instruction but with the second and third | |
2397 | inputs swapped. */ | |
2398 | if (m_su) | |
2399 | e.rotate_inputs_left (1, 2); | |
2400 | /* The ACLE function has the same order requirements as for svdot. | |
2401 | While there's no requirement for the RTL pattern to have the same sort | |
2402 | of order as that for <sur>dot_prod, it's easier to read. | |
2403 | Hence we do the same rotation on arguments as svdot_impl does. */ | |
2404 | e.rotate_inputs_left (0, 3); | |
2405 | machine_mode mode = e.vector_mode (0); | |
752045ed | 2406 | insn_code icode = code_for_dot_prod (UNSPEC_USDOT, mode); |
36696774 RS |
2407 | return e.use_exact_insn (icode); |
2408 | } | |
2409 | ||
2410 | private: | |
2411 | bool m_su; | |
2412 | }; | |
2413 | ||
624d0f07 RS |
2414 | /* Implements svuzp1 and svuzp2. */ |
2415 | class svuzp_impl : public binary_permute | |
2416 | { | |
2417 | public: | |
f95d3d5d | 2418 | CONSTEXPR svuzp_impl (unsigned int base) |
624d0f07 RS |
2419 | : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {} |
2420 | ||
2421 | gimple * | |
ff171cb1 | 2422 | fold (gimple_folder &f) const override |
624d0f07 RS |
2423 | { |
2424 | /* svuzp1: { 0, 2, 4, 6, ... } | |
2425 | svuzp2: { 1, 3, 5, 7, ... }. */ | |
2426 | poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); | |
2427 | vec_perm_builder builder (nelts, 1, 3); | |
2428 | for (unsigned int i = 0; i < 3; ++i) | |
2429 | builder.quick_push (m_base + i * 2); | |
2430 | return fold_permute (f, builder); | |
2431 | } | |
2432 | ||
2433 | /* 0 for svuzp1, 1 for svuzp2. */ | |
2434 | unsigned int m_base; | |
2435 | }; | |
2436 | ||
2437 | /* A function_base for svwhilele and svwhilelt functions. */ | |
0a09a948 | 2438 | class svwhilelx_impl : public while_comparison |
624d0f07 RS |
2439 | { |
2440 | public: | |
f95d3d5d | 2441 | CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p) |
0a09a948 | 2442 | : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p) |
624d0f07 RS |
2443 | {} |
2444 | ||
2445 | /* Try to fold a call by treating its arguments as constants of type T. */ | |
2446 | template<typename T> | |
2447 | gimple * | |
2448 | fold_type (gimple_folder &f) const | |
2449 | { | |
2450 | /* Only handle cases in which both operands are constant. */ | |
2451 | T arg0, arg1; | |
2452 | if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0) | |
2453 | || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1)) | |
2454 | return NULL; | |
2455 | ||
2456 | /* Check whether the result is known to be all-false. */ | |
2457 | if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1)) | |
2458 | return f.fold_to_pfalse (); | |
2459 | ||
2460 | /* Punt if we can't tell at compile time whether the result | |
2461 | is all-false. */ | |
2462 | if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1)) | |
2463 | return NULL; | |
2464 | ||
2465 | /* At this point we know the result has at least one set element. */ | |
2466 | poly_uint64 diff = arg1 - arg0; | |
2467 | poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0)); | |
2468 | ||
2469 | /* Canonicalize the svwhilele form to the svwhilelt form. Subtract | |
2470 | from NELTS rather than adding to DIFF, to prevent overflow. */ | |
2471 | if (m_eq_p) | |
2472 | nelts -= 1; | |
2473 | ||
2474 | /* Check whether the result is known to be all-true. */ | |
2475 | if (known_ge (diff, nelts)) | |
2476 | return f.fold_to_ptrue (); | |
2477 | ||
2478 | /* Punt if DIFF might not be the actual number of set elements | |
2479 | in the result. Conditional equality is fine. */ | |
2480 | if (maybe_gt (diff, nelts)) | |
2481 | return NULL; | |
2482 | ||
2483 | /* At this point we know that the predicate will have DIFF set elements | |
2484 | for svwhilelt and DIFF + 1 set elements for svwhilele (which stops | |
2485 | after rather than before ARG1 is reached). See if we can create | |
2486 | the predicate at compile time. */ | |
2487 | unsigned HOST_WIDE_INT vl; | |
2488 | if (diff.is_constant (&vl)) | |
2489 | /* Overflow is no longer possible after the checks above. */ | |
2490 | return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl); | |
2491 | ||
2492 | return NULL; | |
2493 | } | |
2494 | ||
2495 | gimple * | |
ff171cb1 | 2496 | fold (gimple_folder &f) const override |
624d0f07 RS |
2497 | { |
2498 | if (f.type_suffix (1).unsigned_p) | |
2499 | return fold_type<poly_uint64> (f); | |
2500 | else | |
2501 | return fold_type<poly_int64> (f); | |
2502 | } | |
2503 | ||
624d0f07 RS |
2504 | /* True svwhilele, false for svwhilelt. */ |
2505 | bool m_eq_p; | |
2506 | }; | |
2507 | ||
2508 | class svwrffr_impl : public function_base | |
2509 | { | |
2510 | public: | |
2511 | unsigned int | |
ff171cb1 | 2512 | call_properties (const function_instance &) const override |
624d0f07 RS |
2513 | { |
2514 | return CP_WRITE_FFR; | |
2515 | } | |
2516 | ||
2517 | rtx | |
ff171cb1 | 2518 | expand (function_expander &e) const override |
624d0f07 RS |
2519 | { |
2520 | return e.use_exact_insn (CODE_FOR_aarch64_wrffr); | |
2521 | } | |
2522 | }; | |
2523 | ||
2524 | /* Implements svzip1 and svzip2. */ | |
2525 | class svzip_impl : public binary_permute | |
2526 | { | |
2527 | public: | |
f95d3d5d | 2528 | CONSTEXPR svzip_impl (unsigned int base) |
624d0f07 RS |
2529 | : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {} |
2530 | ||
2531 | gimple * | |
ff171cb1 | 2532 | fold (gimple_folder &f) const override |
624d0f07 RS |
2533 | { |
2534 | /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... } | |
2535 | svzip2: as for svzip1, but with nelts / 2 added to each index. */ | |
2536 | poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); | |
2537 | poly_uint64 base = m_base * exact_div (nelts, 2); | |
2538 | vec_perm_builder builder (nelts, 2, 3); | |
2539 | for (unsigned int i = 0; i < 3; ++i) | |
2540 | { | |
2541 | builder.quick_push (base + i); | |
2542 | builder.quick_push (base + i + nelts); | |
2543 | } | |
2544 | return fold_permute (f, builder); | |
2545 | } | |
2546 | ||
2547 | /* 0 for svzip1, 1 for svzip2. */ | |
2548 | unsigned int m_base; | |
2549 | }; | |
2550 | ||
2551 | } /* end anonymous namespace */ | |
2552 | ||
2553 | namespace aarch64_sve { | |
2554 | ||
2555 | FUNCTION (svabd, svabd_impl,) | |
2556 | FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS)) | |
2557 | FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE)) | |
2558 | FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT)) | |
2559 | FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE)) | |
2560 | FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT)) | |
2561 | FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD)) | |
2562 | FUNCTION (svadda, svadda_impl,) | |
2563 | FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV)) | |
2564 | FUNCTION (svadrb, svadr_bhwd_impl, (0)) | |
2565 | FUNCTION (svadrd, svadr_bhwd_impl, (3)) | |
2566 | FUNCTION (svadrh, svadr_bhwd_impl, (1)) | |
2567 | FUNCTION (svadrw, svadr_bhwd_impl, (2)) | |
2568 | FUNCTION (svand, rtx_code_function, (AND, AND)) | |
2569 | FUNCTION (svandv, reduction, (UNSPEC_ANDV)) | |
2570 | FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) | |
2571 | FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE)) | |
0a09a948 | 2572 | FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1)) |
896dff99 RS |
2573 | FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf)) |
2574 | FUNCTION (svbfdot_lane, fixed_insn_function, | |
2575 | (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf)) | |
2576 | FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf)) | |
2577 | FUNCTION (svbfmlalb_lane, fixed_insn_function, | |
2578 | (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf)) | |
2579 | FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf)) | |
2580 | FUNCTION (svbfmlalt_lane, fixed_insn_function, | |
2581 | (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf)) | |
2582 | FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf)) | |
624d0f07 RS |
2583 | FUNCTION (svbic, svbic_impl,) |
2584 | FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA)) | |
2585 | FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB)) | |
2586 | FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN)) | |
2587 | FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA)) | |
2588 | FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB)) | |
2589 | FUNCTION (svcadd, svcadd_impl,) | |
2590 | FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA)) | |
2591 | FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB)) | |
2592 | FUNCTION (svcls, unary_count, (CLRSB)) | |
2593 | FUNCTION (svclz, unary_count, (CLZ)) | |
2594 | FUNCTION (svcmla, svcmla_impl,) | |
2595 | FUNCTION (svcmla_lane, svcmla_lane_impl,) | |
2596 | FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ)) | |
2597 | FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE, | |
2598 | UNSPEC_COND_CMPEQ_WIDE)) | |
2599 | FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE)) | |
2600 | FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE, | |
2601 | UNSPEC_COND_CMPHS_WIDE)) | |
2602 | FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT)) | |
2603 | FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE, | |
2604 | UNSPEC_COND_CMPHI_WIDE)) | |
2605 | FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE)) | |
2606 | FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE, | |
2607 | UNSPEC_COND_CMPLS_WIDE)) | |
2608 | FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT)) | |
2609 | FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE, | |
2610 | UNSPEC_COND_CMPLO_WIDE)) | |
2611 | FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE)) | |
2612 | FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE, | |
2613 | UNSPEC_COND_CMPNE_WIDE)) | |
2614 | FUNCTION (svcmpuo, svcmpuo_impl,) | |
2615 | FUNCTION (svcnot, svcnot_impl,) | |
2616 | FUNCTION (svcnt, unary_count, (POPCOUNT)) | |
2617 | FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode)) | |
2618 | FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode)) | |
2619 | FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode)) | |
2620 | FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode)) | |
2621 | FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode)) | |
2622 | FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode)) | |
2623 | FUNCTION (svcntp, svcntp_impl,) | |
2624 | FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode)) | |
2625 | FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode)) | |
99a3b915 | 2626 | FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),) |
624d0f07 RS |
2627 | FUNCTION (svcreate2, svcreate_impl, (2)) |
2628 | FUNCTION (svcreate3, svcreate_impl, (3)) | |
2629 | FUNCTION (svcreate4, svcreate_impl, (4)) | |
2630 | FUNCTION (svcvt, svcvt_impl,) | |
896dff99 | 2631 | FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),) |
624d0f07 RS |
2632 | FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV)) |
2633 | FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) | |
2634 | FUNCTION (svdot, svdot_impl,) | |
36696774 | 2635 | FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1)) |
624d0f07 RS |
2636 | FUNCTION (svdup, svdup_impl,) |
2637 | FUNCTION (svdup_lane, svdup_lane_impl,) | |
2638 | FUNCTION (svdupq, svdupq_impl,) | |
2639 | FUNCTION (svdupq_lane, svdupq_lane_impl,) | |
2640 | FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1)) | |
2641 | FUNCTION (sveorv, reduction, (UNSPEC_XORV)) | |
2642 | FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA)) | |
99a3b915 | 2643 | FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),) |
624d0f07 RS |
2644 | FUNCTION (svextb, svext_bhw_impl, (QImode)) |
2645 | FUNCTION (svexth, svext_bhw_impl, (HImode)) | |
2646 | FUNCTION (svextw, svext_bhw_impl, (SImode)) | |
2647 | FUNCTION (svget2, svget_impl, (2)) | |
2648 | FUNCTION (svget3, svget_impl, (3)) | |
2649 | FUNCTION (svget4, svget_impl, (4)) | |
2650 | FUNCTION (svindex, svindex_impl,) | |
2651 | FUNCTION (svinsr, svinsr_impl,) | |
2652 | FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA)) | |
2653 | FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB)) | |
2654 | FUNCTION (svld1, svld1_impl,) | |
2655 | FUNCTION (svld1_gather, svld1_gather_impl,) | |
9ceec73f | 2656 | FUNCTION (svld1ro, svld1ro_impl,) |
624d0f07 RS |
2657 | FUNCTION (svld1rq, svld1rq_impl,) |
2658 | FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8)) | |
2659 | FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8)) | |
2660 | FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16)) | |
2661 | FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16)) | |
2662 | FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32)) | |
2663 | FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32)) | |
2664 | FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8)) | |
2665 | FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8)) | |
2666 | FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16)) | |
2667 | FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16)) | |
2668 | FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32)) | |
2669 | FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32)) | |
2670 | FUNCTION (svld2, svld234_impl, (2)) | |
2671 | FUNCTION (svld3, svld234_impl, (3)) | |
2672 | FUNCTION (svld4, svld234_impl, (4)) | |
2673 | FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1)) | |
2674 | FUNCTION (svldff1_gather, svldff1_gather_impl,) | |
2675 | FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1)) | |
2676 | FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8)) | |
2677 | FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1)) | |
2678 | FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16)) | |
2679 | FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1)) | |
2680 | FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32)) | |
2681 | FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1)) | |
2682 | FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8)) | |
2683 | FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1)) | |
2684 | FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16)) | |
2685 | FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1)) | |
2686 | FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32)) | |
2687 | FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1)) | |
2688 | FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1)) | |
2689 | FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1)) | |
2690 | FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1)) | |
2691 | FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1)) | |
2692 | FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1)) | |
2693 | FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1)) | |
2694 | FUNCTION (svldnt1, svldnt1_impl,) | |
2695 | FUNCTION (svlen, svlen_impl,) | |
2696 | FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT)) | |
2697 | FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) | |
2698 | FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) | |
2699 | FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) | |
2700 | FUNCTION (svmad, svmad_impl,) | |
2701 | FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX)) | |
2702 | FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM)) | |
2703 | FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV)) | |
2704 | FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV)) | |
2705 | FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN)) | |
2706 | FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM)) | |
2707 | FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV)) | |
2708 | FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV)) | |
2709 | FUNCTION (svmla, svmla_impl,) | |
0a09a948 | 2710 | FUNCTION (svmla_lane, svmla_lane_impl,) |
624d0f07 | 2711 | FUNCTION (svmls, svmls_impl,) |
0a09a948 | 2712 | FUNCTION (svmls_lane, svmls_lane_impl,) |
36696774 | 2713 | FUNCTION (svmmla, svmmla_impl,) |
624d0f07 RS |
2714 | FUNCTION (svmov, svmov_impl,) |
2715 | FUNCTION (svmsb, svmsb_impl,) | |
2716 | FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL)) | |
99a3b915 | 2717 | FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),) |
624d0f07 RS |
2718 | FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART, |
2719 | UNSPEC_UMUL_HIGHPART, -1)) | |
2720 | FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX)) | |
2721 | FUNCTION (svnand, svnand_impl,) | |
2722 | FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG)) | |
2723 | FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA)) | |
2724 | FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA)) | |
2725 | FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS)) | |
2726 | FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS)) | |
2727 | FUNCTION (svnor, svnor_impl,) | |
2728 | FUNCTION (svnot, svnot_impl,) | |
2729 | FUNCTION (svorn, svorn_impl,) | |
2730 | FUNCTION (svorr, rtx_code_function, (IOR, IOR)) | |
2731 | FUNCTION (svorv, reduction, (UNSPEC_IORV)) | |
2732 | FUNCTION (svpfalse, svpfalse_impl,) | |
2733 | FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST)) | |
2734 | FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT)) | |
2735 | FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode)) | |
2736 | FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode)) | |
2737 | FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode)) | |
2738 | FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode)) | |
2739 | FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode)) | |
2740 | FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode)) | |
2741 | FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode)) | |
2742 | FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode)) | |
2743 | FUNCTION (svptest_any, svptest_impl, (NE)) | |
2744 | FUNCTION (svptest_first, svptest_impl, (LT)) | |
2745 | FUNCTION (svptest_last, svptest_impl, (LTU)) | |
2746 | FUNCTION (svptrue, svptrue_impl,) | |
2747 | FUNCTION (svptrue_pat, svptrue_pat_impl,) | |
694e6b19 | 2748 | FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1)) |
624d0f07 RS |
2749 | FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode)) |
2750 | FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode)) | |
2751 | FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode)) | |
2752 | FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode)) | |
2753 | FUNCTION (svqdech, svqdec_bhwd_impl, (HImode)) | |
2754 | FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode)) | |
2755 | FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS)) | |
2756 | FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode)) | |
2757 | FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode)) | |
2758 | FUNCTION (svqincb, svqinc_bhwd_impl, (QImode)) | |
2759 | FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode)) | |
2760 | FUNCTION (svqincd, svqinc_bhwd_impl, (DImode)) | |
2761 | FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode)) | |
2762 | FUNCTION (svqinch, svqinc_bhwd_impl, (HImode)) | |
2763 | FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode)) | |
2764 | FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS)) | |
2765 | FUNCTION (svqincw, svqinc_bhwd_impl, (SImode)) | |
2766 | FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode)) | |
694e6b19 | 2767 | FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1)) |
624d0f07 RS |
2768 | FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1)) |
2769 | FUNCTION (svrdffr, svrdffr_impl,) | |
0a09a948 | 2770 | FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE)) |
624d0f07 RS |
2771 | FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS)) |
2772 | FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX)) | |
2773 | FUNCTION (svreinterpret, svreinterpret_impl,) | |
2774 | FUNCTION (svrev, svrev_impl,) | |
2775 | FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1)) | |
2776 | FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1)) | |
2777 | FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1)) | |
2778 | FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA)) | |
2779 | FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI)) | |
2780 | FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM)) | |
2781 | FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN)) | |
2782 | FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP)) | |
2783 | FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX)) | |
2784 | FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ)) | |
0a09a948 | 2785 | FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE)) |
624d0f07 RS |
2786 | FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS)) |
2787 | FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE)) | |
2788 | FUNCTION (svsel, svsel_impl,) | |
2789 | FUNCTION (svset2, svset_impl, (2)) | |
2790 | FUNCTION (svset3, svset_impl, (3)) | |
2791 | FUNCTION (svset4, svset_impl, (4)) | |
2792 | FUNCTION (svsetffr, svsetffr_impl,) | |
99a3b915 | 2793 | FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),) |
624d0f07 RS |
2794 | FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT)) |
2795 | FUNCTION (svst1, svst1_impl,) | |
2796 | FUNCTION (svst1_scatter, svst1_scatter_impl,) | |
2797 | FUNCTION (svst1b, svst1_truncate_impl, (QImode)) | |
2798 | FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode)) | |
2799 | FUNCTION (svst1h, svst1_truncate_impl, (HImode)) | |
2800 | FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode)) | |
2801 | FUNCTION (svst1w, svst1_truncate_impl, (SImode)) | |
2802 | FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode)) | |
2803 | FUNCTION (svst2, svst234_impl, (2)) | |
2804 | FUNCTION (svst3, svst234_impl, (3)) | |
2805 | FUNCTION (svst4, svst234_impl, (4)) | |
2806 | FUNCTION (svstnt1, svstnt1_impl,) | |
2807 | FUNCTION (svsub, svsub_impl,) | |
2808 | FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB)) | |
36696774 RS |
2809 | FUNCTION (svsudot, svusdot_impl, (true)) |
2810 | FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1)) | |
624d0f07 | 2811 | FUNCTION (svtbl, svtbl_impl,) |
99a3b915 | 2812 | FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),) |
624d0f07 | 2813 | FUNCTION (svtrn1, svtrn_impl, (0)) |
36696774 RS |
2814 | FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q, |
2815 | UNSPEC_TRN1Q)) | |
624d0f07 | 2816 | FUNCTION (svtrn2, svtrn_impl, (1)) |
36696774 RS |
2817 | FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q, |
2818 | UNSPEC_TRN2Q)) | |
624d0f07 RS |
2819 | FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL)) |
2820 | FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL)) | |
2821 | FUNCTION (svundef, svundef_impl, (1)) | |
2822 | FUNCTION (svundef2, svundef_impl, (2)) | |
2823 | FUNCTION (svundef3, svundef_impl, (3)) | |
2824 | FUNCTION (svundef4, svundef_impl, (4)) | |
2825 | FUNCTION (svunpkhi, svunpk_impl, (true)) | |
2826 | FUNCTION (svunpklo, svunpk_impl, (false)) | |
36696774 RS |
2827 | FUNCTION (svusdot, svusdot_impl, (false)) |
2828 | FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1)) | |
2829 | FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1)) | |
624d0f07 | 2830 | FUNCTION (svuzp1, svuzp_impl, (0)) |
36696774 RS |
2831 | FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q, |
2832 | UNSPEC_UZP1Q)) | |
624d0f07 | 2833 | FUNCTION (svuzp2, svuzp_impl, (1)) |
36696774 RS |
2834 | FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q, |
2835 | UNSPEC_UZP2Q)) | |
0a09a948 RS |
2836 | FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true)) |
2837 | FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false)) | |
624d0f07 RS |
2838 | FUNCTION (svwrffr, svwrffr_impl,) |
2839 | FUNCTION (svzip1, svzip_impl, (0)) | |
36696774 RS |
2840 | FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q, |
2841 | UNSPEC_ZIP1Q)) | |
624d0f07 | 2842 | FUNCTION (svzip2, svzip_impl, (1)) |
36696774 RS |
2843 | FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q, |
2844 | UNSPEC_ZIP2Q)) | |
624d0f07 RS |
2845 | |
2846 | } /* end namespace aarch64_sve */ |