]>
Commit | Line | Data |
---|---|---|
624d0f07 | 1 | /* ACLE support for AArch64 SVE (function_base classes) |
7adcbafe | 2 | Copyright (C) 2018-2022 Free Software Foundation, Inc. |
624d0f07 RS |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING3. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #ifndef GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H | |
21 | #define GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H | |
22 | ||
23 | namespace aarch64_sve { | |
24 | ||
25 | /* Wrap T, which is derived from function_base, and indicate that the | |
26 | function never has side effects. It is only necessary to use this | |
27 | wrapper on functions that might have floating-point suffixes, since | |
28 | otherwise we assume by default that the function has no side effects. */ | |
29 | template<typename T> | |
30 | class quiet : public T | |
31 | { | |
32 | public: | |
33 | CONSTEXPR quiet () : T () {} | |
34 | ||
35 | /* Unfortunately we can't use parameter packs yet. */ | |
36 | template<typename T1> | |
37 | CONSTEXPR quiet (const T1 &t1) : T (t1) {} | |
38 | ||
39 | template<typename T1, typename T2> | |
40 | CONSTEXPR quiet (const T1 &t1, const T2 &t2) : T (t1, t2) {} | |
41 | ||
42 | template<typename T1, typename T2, typename T3> | |
43 | CONSTEXPR quiet (const T1 &t1, const T2 &t2, const T3 &t3) | |
44 | : T (t1, t2, t3) {} | |
45 | ||
46 | unsigned int | |
47 | call_properties (const function_instance &) const OVERRIDE | |
48 | { | |
49 | return 0; | |
50 | } | |
51 | }; | |
52 | ||
53 | /* A function_base that sometimes or always operates on tuples of | |
54 | vectors. */ | |
55 | class multi_vector_function : public function_base | |
56 | { | |
57 | public: | |
58 | CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple) | |
59 | : m_vectors_per_tuple (vectors_per_tuple) {} | |
60 | ||
61 | unsigned int | |
62 | vectors_per_tuple () const OVERRIDE | |
63 | { | |
64 | return m_vectors_per_tuple; | |
65 | } | |
66 | ||
67 | /* The number of vectors in a tuple, or 1 if the function only operates | |
68 | on single vectors. */ | |
69 | unsigned int m_vectors_per_tuple; | |
70 | }; | |
71 | ||
72 | /* A function_base that loads or stores contiguous memory elements | |
73 | without extending or truncating them. */ | |
74 | class full_width_access : public multi_vector_function | |
75 | { | |
76 | public: | |
77 | CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1) | |
78 | : multi_vector_function (vectors_per_tuple) {} | |
79 | ||
80 | tree | |
81 | memory_scalar_type (const function_instance &fi) const OVERRIDE | |
82 | { | |
83 | return fi.scalar_type (0); | |
84 | } | |
85 | ||
86 | machine_mode | |
87 | memory_vector_mode (const function_instance &fi) const OVERRIDE | |
88 | { | |
89 | machine_mode mode = fi.vector_mode (0); | |
90 | if (m_vectors_per_tuple != 1) | |
91 | mode = targetm.array_mode (mode, m_vectors_per_tuple).require (); | |
92 | return mode; | |
93 | } | |
94 | }; | |
95 | ||
96 | /* A function_base that loads elements from memory and extends them | |
97 | to a wider element. The memory element type is a fixed part of | |
98 | the function base name. */ | |
99 | class extending_load : public function_base | |
100 | { | |
101 | public: | |
102 | CONSTEXPR extending_load (type_suffix_index memory_type) | |
103 | : m_memory_type (memory_type) {} | |
104 | ||
105 | unsigned int | |
106 | call_properties (const function_instance &) const OVERRIDE | |
107 | { | |
108 | return CP_READ_MEMORY; | |
109 | } | |
110 | ||
111 | tree | |
112 | memory_scalar_type (const function_instance &) const OVERRIDE | |
113 | { | |
114 | return scalar_types[type_suffixes[m_memory_type].vector_type]; | |
115 | } | |
116 | ||
117 | machine_mode | |
118 | memory_vector_mode (const function_instance &fi) const OVERRIDE | |
119 | { | |
120 | machine_mode mem_mode = type_suffixes[m_memory_type].vector_mode; | |
121 | machine_mode reg_mode = fi.vector_mode (0); | |
122 | return aarch64_sve_data_mode (GET_MODE_INNER (mem_mode), | |
123 | GET_MODE_NUNITS (reg_mode)).require (); | |
124 | } | |
125 | ||
126 | /* Return the rtx code associated with the kind of extension that | |
127 | the load performs. */ | |
128 | rtx_code | |
129 | extend_rtx_code () const | |
130 | { | |
131 | return (type_suffixes[m_memory_type].unsigned_p | |
132 | ? ZERO_EXTEND : SIGN_EXTEND); | |
133 | } | |
134 | ||
135 | /* The type of the memory elements. This is part of the function base | |
136 | name rather than a true type suffix. */ | |
137 | type_suffix_index m_memory_type; | |
138 | }; | |
139 | ||
140 | /* A function_base that truncates vector elements and stores them to memory. | |
141 | The memory element width is a fixed part of the function base name. */ | |
142 | class truncating_store : public function_base | |
143 | { | |
144 | public: | |
145 | CONSTEXPR truncating_store (scalar_int_mode to_mode) : m_to_mode (to_mode) {} | |
146 | ||
147 | unsigned int | |
148 | call_properties (const function_instance &) const OVERRIDE | |
149 | { | |
150 | return CP_WRITE_MEMORY; | |
151 | } | |
152 | ||
153 | tree | |
154 | memory_scalar_type (const function_instance &fi) const OVERRIDE | |
155 | { | |
156 | /* In truncating stores, the signedness of the memory element is defined | |
157 | to be the same as the signedness of the vector element. The signedness | |
158 | doesn't make any difference to the behavior of the function. */ | |
159 | type_class_index tclass = fi.type_suffix (0).tclass; | |
160 | unsigned int element_bits = GET_MODE_BITSIZE (m_to_mode); | |
161 | type_suffix_index suffix = find_type_suffix (tclass, element_bits); | |
162 | return scalar_types[type_suffixes[suffix].vector_type]; | |
163 | } | |
164 | ||
165 | machine_mode | |
166 | memory_vector_mode (const function_instance &fi) const OVERRIDE | |
167 | { | |
168 | poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0)); | |
169 | return aarch64_sve_data_mode (m_to_mode, nunits).require (); | |
170 | } | |
171 | ||
172 | /* The mode of a single memory element. */ | |
173 | scalar_int_mode m_to_mode; | |
174 | }; | |
175 | ||
0a09a948 RS |
176 | /* An incomplete function_base for functions that have an associated rtx code. |
177 | It simply records information about the mapping for derived classes | |
178 | to use. */ | |
179 | class rtx_code_function_base : public function_base | |
624d0f07 RS |
180 | { |
181 | public: | |
0a09a948 RS |
182 | CONSTEXPR rtx_code_function_base (rtx_code code_for_sint, |
183 | rtx_code code_for_uint, | |
184 | int unspec_for_fp = -1) | |
624d0f07 RS |
185 | : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint), |
186 | m_unspec_for_fp (unspec_for_fp) {} | |
187 | ||
624d0f07 RS |
188 | /* The rtx code to use for signed and unsigned integers respectively. |
189 | Can be UNKNOWN for functions that don't have integer forms. */ | |
190 | rtx_code m_code_for_sint; | |
191 | rtx_code m_code_for_uint; | |
192 | ||
193 | /* The UNSPEC_COND_* to use for floating-point operations. Can be -1 | |
194 | for functions that only operate on integers. */ | |
195 | int m_unspec_for_fp; | |
196 | }; | |
197 | ||
0a09a948 RS |
198 | /* A function_base for functions that have an associated rtx code. |
199 | It supports all forms of predication except PRED_implicit. */ | |
200 | class rtx_code_function : public rtx_code_function_base | |
201 | { | |
202 | public: | |
203 | CONSTEXPR rtx_code_function (rtx_code code_for_sint, rtx_code code_for_uint, | |
204 | int unspec_for_fp = -1) | |
205 | : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {} | |
206 | ||
207 | rtx | |
208 | expand (function_expander &e) const OVERRIDE | |
209 | { | |
210 | return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, | |
211 | m_unspec_for_fp); | |
212 | } | |
213 | }; | |
214 | ||
624d0f07 RS |
215 | /* Like rtx_code_function, but for functions that take what is normally |
216 | the final argument first. One use of this class is to handle binary | |
217 | reversed operations; another is to handle MLA-style operations that | |
218 | are normally expressed in GCC as MAD-style operations. */ | |
0a09a948 | 219 | class rtx_code_function_rotated : public rtx_code_function_base |
624d0f07 RS |
220 | { |
221 | public: | |
222 | CONSTEXPR rtx_code_function_rotated (rtx_code code_for_sint, | |
223 | rtx_code code_for_uint, | |
224 | int unspec_for_fp = -1) | |
0a09a948 | 225 | : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {} |
624d0f07 RS |
226 | |
227 | rtx | |
228 | expand (function_expander &e) const OVERRIDE | |
229 | { | |
230 | /* Rotate the inputs into their normal order, but continue to make _m | |
231 | functions merge with what was originally the first vector argument. */ | |
232 | unsigned int nargs = e.args.length (); | |
233 | e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs); | |
234 | return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, | |
235 | m_unspec_for_fp, nargs - 1); | |
236 | } | |
0a09a948 | 237 | }; |
624d0f07 | 238 | |
0a09a948 RS |
239 | /* An incomplete function_base for functions that have an associated |
240 | unspec code, with separate codes for signed integers, unsigned | |
241 | integers and floating-point values. The class simply records | |
242 | information about the mapping for derived classes to use. */ | |
243 | class unspec_based_function_base : public function_base | |
244 | { | |
245 | public: | |
246 | CONSTEXPR unspec_based_function_base (int unspec_for_sint, | |
247 | int unspec_for_uint, | |
248 | int unspec_for_fp) | |
249 | : m_unspec_for_sint (unspec_for_sint), | |
250 | m_unspec_for_uint (unspec_for_uint), | |
251 | m_unspec_for_fp (unspec_for_fp) | |
252 | {} | |
624d0f07 | 253 | |
0a09a948 RS |
254 | /* Return the unspec code to use for INSTANCE, based on type suffix 0. */ |
255 | int | |
256 | unspec_for (const function_instance &instance) const | |
257 | { | |
258 | return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp | |
259 | : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint | |
260 | : m_unspec_for_sint); | |
261 | } | |
262 | ||
263 | /* The unspec code associated with signed-integer, unsigned-integer | |
264 | and floating-point operations respectively. */ | |
265 | int m_unspec_for_sint; | |
266 | int m_unspec_for_uint; | |
624d0f07 RS |
267 | int m_unspec_for_fp; |
268 | }; | |
269 | ||
270 | /* A function_base for functions that have an associated unspec code. | |
271 | It supports all forms of predication except PRED_implicit. */ | |
0a09a948 | 272 | class unspec_based_function : public unspec_based_function_base |
624d0f07 RS |
273 | { |
274 | public: | |
275 | CONSTEXPR unspec_based_function (int unspec_for_sint, int unspec_for_uint, | |
276 | int unspec_for_fp) | |
0a09a948 RS |
277 | : unspec_based_function_base (unspec_for_sint, unspec_for_uint, |
278 | unspec_for_fp) | |
624d0f07 RS |
279 | {} |
280 | ||
281 | rtx | |
282 | expand (function_expander &e) const OVERRIDE | |
283 | { | |
284 | return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint, | |
285 | m_unspec_for_fp); | |
286 | } | |
624d0f07 RS |
287 | }; |
288 | ||
289 | /* Like unspec_based_function, but for functions that take what is normally | |
290 | the final argument first. One use of this class is to handle binary | |
291 | reversed operations; another is to handle MLA-style operations that | |
292 | are normally expressed in GCC as MAD-style operations. */ | |
0a09a948 | 293 | class unspec_based_function_rotated : public unspec_based_function_base |
624d0f07 RS |
294 | { |
295 | public: | |
296 | CONSTEXPR unspec_based_function_rotated (int unspec_for_sint, | |
297 | int unspec_for_uint, | |
298 | int unspec_for_fp) | |
0a09a948 RS |
299 | : unspec_based_function_base (unspec_for_sint, unspec_for_uint, |
300 | unspec_for_fp) | |
624d0f07 RS |
301 | {} |
302 | ||
303 | rtx | |
304 | expand (function_expander &e) const OVERRIDE | |
305 | { | |
306 | /* Rotate the inputs into their normal order, but continue to make _m | |
307 | functions merge with what was originally the first vector argument. */ | |
308 | unsigned int nargs = e.args.length (); | |
309 | e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs); | |
310 | return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint, | |
311 | m_unspec_for_fp, nargs - 1); | |
312 | } | |
0a09a948 | 313 | }; |
624d0f07 | 314 | |
0a09a948 RS |
315 | /* Like unspec_based_function, but map the function directly to |
316 | CODE (UNSPEC, M) instead of using the generic predication-based | |
317 | expansion. where M is the vector mode associated with type suffix 0. | |
318 | This is useful if the unspec doesn't describe the full operation or | |
319 | if the usual predication rules don't apply for some reason. */ | |
320 | template<insn_code (*CODE) (int, machine_mode)> | |
321 | class unspec_based_function_exact_insn : public unspec_based_function_base | |
322 | { | |
323 | public: | |
324 | CONSTEXPR unspec_based_function_exact_insn (int unspec_for_sint, | |
325 | int unspec_for_uint, | |
326 | int unspec_for_fp) | |
327 | : unspec_based_function_base (unspec_for_sint, unspec_for_uint, | |
328 | unspec_for_fp) | |
329 | {} | |
330 | ||
331 | rtx | |
332 | expand (function_expander &e) const OVERRIDE | |
333 | { | |
334 | return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0))); | |
335 | } | |
336 | }; | |
337 | ||
338 | /* A function that performs an unspec and then adds it to another value. */ | |
339 | typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add> | |
340 | unspec_based_add_function; | |
341 | typedef unspec_based_function_exact_insn<code_for_aarch64_sve_add_lane> | |
342 | unspec_based_add_lane_function; | |
343 | ||
344 | /* Generic unspec-based _lane function. */ | |
345 | typedef unspec_based_function_exact_insn<code_for_aarch64_sve_lane> | |
346 | unspec_based_lane_function; | |
347 | ||
348 | /* A functon that uses aarch64_pred* patterns regardless of the | |
349 | predication type. */ | |
350 | typedef unspec_based_function_exact_insn<code_for_aarch64_pred> | |
351 | unspec_based_pred_function; | |
352 | ||
353 | /* Like unspec_based_add_function and unspec_based_add_lane_function, | |
354 | but using saturating addition. */ | |
355 | typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd> | |
356 | unspec_based_qadd_function; | |
357 | typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qadd_lane> | |
358 | unspec_based_qadd_lane_function; | |
359 | ||
360 | /* Like unspec_based_sub_function and unspec_based_sub_lane_function, | |
361 | but using saturating subtraction. */ | |
362 | typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub> | |
363 | unspec_based_qsub_function; | |
364 | typedef unspec_based_function_exact_insn<code_for_aarch64_sve_qsub_lane> | |
365 | unspec_based_qsub_lane_function; | |
366 | ||
367 | /* A function that performs an unspec and then subtracts it from | |
368 | another value. */ | |
369 | typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub> | |
370 | unspec_based_sub_function; | |
371 | typedef unspec_based_function_exact_insn<code_for_aarch64_sve_sub_lane> | |
372 | unspec_based_sub_lane_function; | |
373 | ||
374 | /* A function that acts like unspec_based_function_exact_insn<INT_CODE> | |
375 | when operating on integers, but that expands to an (fma ...)-style | |
376 | aarch64_sve* operation when applied to floats. */ | |
377 | template<insn_code (*INT_CODE) (int, machine_mode)> | |
378 | class unspec_based_fused_function : public unspec_based_function_base | |
379 | { | |
380 | public: | |
381 | CONSTEXPR unspec_based_fused_function (int unspec_for_sint, | |
382 | int unspec_for_uint, | |
383 | int unspec_for_fp) | |
384 | : unspec_based_function_base (unspec_for_sint, unspec_for_uint, | |
385 | unspec_for_fp) | |
386 | {} | |
387 | ||
388 | rtx | |
389 | expand (function_expander &e) const OVERRIDE | |
390 | { | |
391 | int unspec = unspec_for (e); | |
392 | insn_code icode; | |
393 | if (e.type_suffix (0).float_p) | |
394 | { | |
395 | /* Put the operands in the normal (fma ...) order, with the accumulator | |
396 | last. This fits naturally since that's also the unprinted operand | |
397 | in the asm output. */ | |
398 | e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3); | |
399 | icode = code_for_aarch64_sve (unspec, e.vector_mode (0)); | |
400 | } | |
401 | else | |
402 | icode = INT_CODE (unspec, e.vector_mode (0)); | |
403 | return e.use_exact_insn (icode); | |
404 | } | |
405 | }; | |
406 | typedef unspec_based_fused_function<code_for_aarch64_sve_add> | |
407 | unspec_based_mla_function; | |
408 | typedef unspec_based_fused_function<code_for_aarch64_sve_sub> | |
409 | unspec_based_mls_function; | |
410 | ||
411 | /* Like unspec_based_fused_function, but for _lane functions. */ | |
412 | template<insn_code (*INT_CODE) (int, machine_mode)> | |
413 | class unspec_based_fused_lane_function : public unspec_based_function_base | |
414 | { | |
415 | public: | |
416 | CONSTEXPR unspec_based_fused_lane_function (int unspec_for_sint, | |
417 | int unspec_for_uint, | |
418 | int unspec_for_fp) | |
419 | : unspec_based_function_base (unspec_for_sint, unspec_for_uint, | |
420 | unspec_for_fp) | |
421 | {} | |
422 | ||
423 | rtx | |
424 | expand (function_expander &e) const OVERRIDE | |
425 | { | |
426 | int unspec = unspec_for (e); | |
427 | insn_code icode; | |
428 | if (e.type_suffix (0).float_p) | |
429 | { | |
430 | /* Put the operands in the normal (fma ...) order, with the accumulator | |
431 | last. This fits naturally since that's also the unprinted operand | |
432 | in the asm output. */ | |
433 | e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4); | |
434 | icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); | |
435 | } | |
436 | else | |
437 | icode = INT_CODE (unspec, e.vector_mode (0)); | |
438 | return e.use_exact_insn (icode); | |
439 | } | |
624d0f07 | 440 | }; |
0a09a948 RS |
441 | typedef unspec_based_fused_lane_function<code_for_aarch64_sve_add_lane> |
442 | unspec_based_mla_lane_function; | |
443 | typedef unspec_based_fused_lane_function<code_for_aarch64_sve_sub_lane> | |
444 | unspec_based_mls_lane_function; | |
624d0f07 | 445 | |
99a3b915 RS |
446 | /* A function_base that uses CODE_FOR_MODE (M) to get the associated |
447 | instruction code, where M is the vector mode associated with type | |
448 | suffix N. */ | |
449 | template<insn_code (*CODE_FOR_MODE) (machine_mode), unsigned int N> | |
450 | class code_for_mode_function : public function_base | |
451 | { | |
452 | public: | |
453 | rtx | |
454 | expand (function_expander &e) const OVERRIDE | |
455 | { | |
456 | return e.use_exact_insn (CODE_FOR_MODE (e.vector_mode (N))); | |
457 | } | |
458 | }; | |
459 | ||
460 | /* A function that uses code_for_<PATTERN> (M), where M is the vector | |
461 | mode associated with the first type suffix. */ | |
462 | #define CODE_FOR_MODE0(PATTERN) code_for_mode_function<code_for_##PATTERN, 0> | |
463 | ||
0a09a948 RS |
464 | /* Likewise for the second type suffix. */ |
465 | #define CODE_FOR_MODE1(PATTERN) code_for_mode_function<code_for_##PATTERN, 1> | |
466 | ||
99a3b915 RS |
467 | /* Like CODE_FOR_MODE0, but the function doesn't raise exceptions when |
468 | operating on floating-point data. */ | |
469 | #define QUIET_CODE_FOR_MODE0(PATTERN) \ | |
470 | quiet< code_for_mode_function<code_for_##PATTERN, 0> > | |
471 | ||
0a09a948 RS |
472 | /* A function_base for functions that always expand to a fixed insn pattern, |
473 | regardless of what the suffixes are. */ | |
474 | class fixed_insn_function : public function_base | |
475 | { | |
476 | public: | |
477 | CONSTEXPR fixed_insn_function (insn_code code) : m_code (code) {} | |
478 | ||
479 | rtx | |
480 | expand (function_expander &e) const OVERRIDE | |
481 | { | |
482 | return e.use_exact_insn (m_code); | |
483 | } | |
484 | ||
485 | /* The instruction to use. */ | |
486 | insn_code m_code; | |
487 | }; | |
488 | ||
624d0f07 RS |
489 | /* A function_base for functions that permute their arguments. */ |
490 | class permute : public quiet<function_base> | |
491 | { | |
492 | public: | |
493 | /* Fold a unary or binary permute with the permute vector given by | |
494 | BUILDER. */ | |
495 | gimple * | |
496 | fold_permute (const gimple_folder &f, const vec_perm_builder &builder) const | |
497 | { | |
498 | /* Punt for now on _b16 and wider; we'd need more complex evpc logic | |
499 | to rerecognize the result. */ | |
500 | if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) | |
501 | return NULL; | |
502 | ||
503 | unsigned int nargs = gimple_call_num_args (f.call); | |
504 | poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); | |
505 | vec_perm_indices indices (builder, nargs, nelts); | |
506 | tree perm_type = build_vector_type (ssizetype, nelts); | |
507 | return gimple_build_assign (f.lhs, VEC_PERM_EXPR, | |
508 | gimple_call_arg (f.call, 0), | |
509 | gimple_call_arg (f.call, nargs - 1), | |
510 | vec_perm_indices_to_tree (perm_type, indices)); | |
511 | } | |
512 | }; | |
513 | ||
514 | /* A function_base for functions that permute two vectors using a fixed | |
515 | choice of indices. */ | |
516 | class binary_permute : public permute | |
517 | { | |
518 | public: | |
519 | CONSTEXPR binary_permute (int unspec) : m_unspec (unspec) {} | |
520 | ||
521 | rtx | |
522 | expand (function_expander &e) const OVERRIDE | |
523 | { | |
524 | insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0)); | |
525 | return e.use_exact_insn (icode); | |
526 | } | |
527 | ||
528 | /* The unspec code associated with the operation. */ | |
529 | int m_unspec; | |
530 | }; | |
531 | ||
532 | /* A function_base for functions that reduce a vector to a scalar. */ | |
533 | class reduction : public function_base | |
534 | { | |
535 | public: | |
536 | CONSTEXPR reduction (int unspec) | |
537 | : m_unspec_for_sint (unspec), | |
538 | m_unspec_for_uint (unspec), | |
539 | m_unspec_for_fp (unspec) | |
540 | {} | |
541 | ||
542 | CONSTEXPR reduction (int unspec_for_sint, int unspec_for_uint, | |
543 | int unspec_for_fp) | |
544 | : m_unspec_for_sint (unspec_for_sint), | |
545 | m_unspec_for_uint (unspec_for_uint), | |
546 | m_unspec_for_fp (unspec_for_fp) | |
547 | {} | |
548 | ||
549 | rtx | |
550 | expand (function_expander &e) const OVERRIDE | |
551 | { | |
552 | machine_mode mode = e.vector_mode (0); | |
553 | int unspec = (!e.type_suffix (0).integer_p ? m_unspec_for_fp | |
554 | : e.type_suffix (0).unsigned_p ? m_unspec_for_uint | |
555 | : m_unspec_for_sint); | |
556 | /* There's no distinction between SADDV and UADDV for 64-bit elements; | |
557 | the signed versions only exist for narrower elements. */ | |
558 | if (GET_MODE_UNIT_BITSIZE (mode) == 64 && unspec == UNSPEC_SADDV) | |
559 | unspec = UNSPEC_UADDV; | |
560 | return e.use_exact_insn (code_for_aarch64_pred_reduc (unspec, mode)); | |
561 | } | |
562 | ||
563 | /* The unspec code associated with signed-integer, unsigned-integer | |
564 | and floating-point operations respectively. */ | |
565 | int m_unspec_for_sint; | |
566 | int m_unspec_for_uint; | |
567 | int m_unspec_for_fp; | |
568 | }; | |
569 | ||
570 | /* A function_base for functions that shift narrower-than-64-bit values | |
571 | by 64-bit amounts. */ | |
572 | class shift_wide : public function_base | |
573 | { | |
574 | public: | |
575 | CONSTEXPR shift_wide (rtx_code code, int wide_unspec) | |
576 | : m_code (code), m_wide_unspec (wide_unspec) {} | |
577 | ||
578 | rtx | |
579 | expand (function_expander &e) const OVERRIDE | |
580 | { | |
581 | machine_mode mode = e.vector_mode (0); | |
582 | machine_mode elem_mode = GET_MODE_INNER (mode); | |
583 | ||
584 | /* If the argument is a constant that the normal shifts can handle | |
585 | directly, use them instead. */ | |
586 | rtx shift = unwrap_const_vec_duplicate (e.args.last ()); | |
587 | if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT)) | |
588 | { | |
589 | e.args.last () = shift; | |
590 | return e.map_to_rtx_codes (m_code, m_code, -1); | |
591 | } | |
592 | ||
593 | if (e.pred == PRED_x) | |
594 | return e.use_unpred_insn (code_for_aarch64_sve (m_wide_unspec, mode)); | |
595 | ||
596 | return e.use_cond_insn (code_for_cond (m_wide_unspec, mode)); | |
597 | } | |
598 | ||
599 | /* The rtx code associated with a "normal" shift. */ | |
600 | rtx_code m_code; | |
601 | ||
602 | /* The unspec code associated with the wide shift. */ | |
603 | int m_wide_unspec; | |
604 | }; | |
605 | ||
606 | /* A function_base for unary functions that count bits. */ | |
607 | class unary_count : public quiet<function_base> | |
608 | { | |
609 | public: | |
610 | CONSTEXPR unary_count (rtx_code code) : m_code (code) {} | |
611 | ||
612 | rtx | |
613 | expand (function_expander &e) const OVERRIDE | |
614 | { | |
615 | /* The md patterns treat the operand as an integer. */ | |
86194087 | 616 | machine_mode mode = aarch64_sve_int_mode (e.vector_mode (0)); |
624d0f07 RS |
617 | e.args.last () = gen_lowpart (mode, e.args.last ()); |
618 | ||
619 | if (e.pred == PRED_x) | |
620 | return e.use_pred_x_insn (code_for_aarch64_pred (m_code, mode)); | |
621 | ||
622 | return e.use_cond_insn (code_for_cond (m_code, mode)); | |
623 | } | |
624 | ||
625 | /* The rtx code associated with the operation. */ | |
626 | rtx_code m_code; | |
627 | }; | |
628 | ||
0a09a948 RS |
629 | /* A function_base for svwhile* functions. */ |
630 | class while_comparison : public function_base | |
631 | { | |
632 | public: | |
633 | CONSTEXPR while_comparison (int unspec_for_sint, int unspec_for_uint) | |
634 | : m_unspec_for_sint (unspec_for_sint), | |
635 | m_unspec_for_uint (unspec_for_uint) | |
636 | {} | |
637 | ||
638 | rtx | |
639 | expand (function_expander &e) const OVERRIDE | |
640 | { | |
641 | /* Suffix 0 determines the predicate mode, suffix 1 determines the | |
642 | scalar mode and signedness. */ | |
643 | int unspec = (e.type_suffix (1).unsigned_p | |
644 | ? m_unspec_for_uint | |
645 | : m_unspec_for_sint); | |
646 | machine_mode pred_mode = e.vector_mode (0); | |
647 | scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1)); | |
648 | return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode)); | |
649 | } | |
650 | ||
651 | /* The unspec codes associated with signed and unsigned operations | |
652 | respectively. */ | |
653 | int m_unspec_for_sint; | |
654 | int m_unspec_for_uint; | |
655 | }; | |
656 | ||
624d0f07 RS |
657 | } |
658 | ||
659 | /* Declare the global function base NAME, creating it from an instance | |
660 | of class CLASS with constructor arguments ARGS. */ | |
661 | #define FUNCTION(NAME, CLASS, ARGS) \ | |
662 | namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \ | |
663 | namespace functions { const function_base *const NAME = &NAME##_obj; } | |
664 | ||
665 | #endif |