1 /* ACLE support for AArch64 SVE (function_base classes)
2 Copyright (C) 2018-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #ifndef GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H
21 #define GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H
23 namespace aarch64_sve
{
25 /* Wrap T, which is derived from function_base, and indicate that the
26 function never has side effects. It is only necessary to use this
27 wrapper on functions that might have floating-point suffixes, since
28 otherwise we assume by default that the function has no side effects. */
30 class quiet
: public T
33 CONSTEXPR
quiet () : T () {}
35 /* Unfortunately we can't use parameter packs yet. */
37 CONSTEXPR
quiet (const T1
&t1
) : T (t1
) {}
39 template<typename T1
, typename T2
>
40 CONSTEXPR
quiet (const T1
&t1
, const T2
&t2
) : T (t1
, t2
) {}
42 template<typename T1
, typename T2
, typename T3
>
43 CONSTEXPR
quiet (const T1
&t1
, const T2
&t2
, const T3
&t3
)
47 call_properties (const function_instance
&) const OVERRIDE
53 /* A function_base that sometimes or always operates on tuples of
55 class multi_vector_function
: public function_base
58 CONSTEXPR
multi_vector_function (unsigned int vectors_per_tuple
)
59 : m_vectors_per_tuple (vectors_per_tuple
) {}
62 vectors_per_tuple () const OVERRIDE
64 return m_vectors_per_tuple
;
67 /* The number of vectors in a tuple, or 1 if the function only operates
69 unsigned int m_vectors_per_tuple
;
72 /* A function_base that loads or stores contiguous memory elements
73 without extending or truncating them. */
74 class full_width_access
: public multi_vector_function
77 CONSTEXPR
full_width_access (unsigned int vectors_per_tuple
= 1)
78 : multi_vector_function (vectors_per_tuple
) {}
81 memory_scalar_type (const function_instance
&fi
) const OVERRIDE
83 return fi
.scalar_type (0);
87 memory_vector_mode (const function_instance
&fi
) const OVERRIDE
89 machine_mode mode
= fi
.vector_mode (0);
90 if (m_vectors_per_tuple
!= 1)
91 mode
= targetm
.array_mode (mode
, m_vectors_per_tuple
).require ();
96 /* A function_base that loads elements from memory and extends them
97 to a wider element. The memory element type is a fixed part of
98 the function base name. */
99 class extending_load
: public function_base
102 CONSTEXPR
extending_load (type_suffix_index memory_type
)
103 : m_memory_type (memory_type
) {}
106 call_properties (const function_instance
&) const OVERRIDE
108 return CP_READ_MEMORY
;
112 memory_scalar_type (const function_instance
&) const OVERRIDE
114 return scalar_types
[type_suffixes
[m_memory_type
].vector_type
];
118 memory_vector_mode (const function_instance
&fi
) const OVERRIDE
120 machine_mode mem_mode
= type_suffixes
[m_memory_type
].vector_mode
;
121 machine_mode reg_mode
= fi
.vector_mode (0);
122 return aarch64_sve_data_mode (GET_MODE_INNER (mem_mode
),
123 GET_MODE_NUNITS (reg_mode
)).require ();
126 /* Return the rtx code associated with the kind of extension that
127 the load performs. */
129 extend_rtx_code () const
131 return (type_suffixes
[m_memory_type
].unsigned_p
132 ? ZERO_EXTEND
: SIGN_EXTEND
);
135 /* The type of the memory elements. This is part of the function base
136 name rather than a true type suffix. */
137 type_suffix_index m_memory_type
;
140 /* A function_base that truncates vector elements and stores them to memory.
141 The memory element width is a fixed part of the function base name. */
142 class truncating_store
: public function_base
145 CONSTEXPR
truncating_store (scalar_int_mode to_mode
) : m_to_mode (to_mode
) {}
148 call_properties (const function_instance
&) const OVERRIDE
150 return CP_WRITE_MEMORY
;
154 memory_scalar_type (const function_instance
&fi
) const OVERRIDE
156 /* In truncating stores, the signedness of the memory element is defined
157 to be the same as the signedness of the vector element. The signedness
158 doesn't make any difference to the behavior of the function. */
159 type_class_index tclass
= fi
.type_suffix (0).tclass
;
160 unsigned int element_bits
= GET_MODE_BITSIZE (m_to_mode
);
161 type_suffix_index suffix
= find_type_suffix (tclass
, element_bits
);
162 return scalar_types
[type_suffixes
[suffix
].vector_type
];
166 memory_vector_mode (const function_instance
&fi
) const OVERRIDE
168 poly_uint64 nunits
= GET_MODE_NUNITS (fi
.vector_mode (0));
169 return aarch64_sve_data_mode (m_to_mode
, nunits
).require ();
172 /* The mode of a single memory element. */
173 scalar_int_mode m_to_mode
;
176 /* An incomplete function_base for functions that have an associated rtx code.
177 It simply records information about the mapping for derived classes
179 class rtx_code_function_base
: public function_base
182 CONSTEXPR
rtx_code_function_base (rtx_code code_for_sint
,
183 rtx_code code_for_uint
,
184 int unspec_for_fp
= -1)
185 : m_code_for_sint (code_for_sint
), m_code_for_uint (code_for_uint
),
186 m_unspec_for_fp (unspec_for_fp
) {}
188 /* The rtx code to use for signed and unsigned integers respectively.
189 Can be UNKNOWN for functions that don't have integer forms. */
190 rtx_code m_code_for_sint
;
191 rtx_code m_code_for_uint
;
193 /* The UNSPEC_COND_* to use for floating-point operations. Can be -1
194 for functions that only operate on integers. */
198 /* A function_base for functions that have an associated rtx code.
199 It supports all forms of predication except PRED_implicit. */
200 class rtx_code_function
: public rtx_code_function_base
203 CONSTEXPR
rtx_code_function (rtx_code code_for_sint
, rtx_code code_for_uint
,
204 int unspec_for_fp
= -1)
205 : rtx_code_function_base (code_for_sint
, code_for_uint
, unspec_for_fp
) {}
208 expand (function_expander
&e
) const OVERRIDE
210 return e
.map_to_rtx_codes (m_code_for_sint
, m_code_for_uint
,
215 /* Like rtx_code_function, but for functions that take what is normally
216 the final argument first. One use of this class is to handle binary
217 reversed operations; another is to handle MLA-style operations that
218 are normally expressed in GCC as MAD-style operations. */
219 class rtx_code_function_rotated
: public rtx_code_function_base
222 CONSTEXPR
rtx_code_function_rotated (rtx_code code_for_sint
,
223 rtx_code code_for_uint
,
224 int unspec_for_fp
= -1)
225 : rtx_code_function_base (code_for_sint
, code_for_uint
, unspec_for_fp
) {}
228 expand (function_expander
&e
) const OVERRIDE
230 /* Rotate the inputs into their normal order, but continue to make _m
231 functions merge with what was originally the first vector argument. */
232 unsigned int nargs
= e
.args
.length ();
233 e
.rotate_inputs_left (e
.pred
!= PRED_none
? 1 : 0, nargs
);
234 return e
.map_to_rtx_codes (m_code_for_sint
, m_code_for_uint
,
235 m_unspec_for_fp
, nargs
- 1);
239 /* An incomplete function_base for functions that have an associated
240 unspec code, with separate codes for signed integers, unsigned
241 integers and floating-point values. The class simply records
242 information about the mapping for derived classes to use. */
243 class unspec_based_function_base
: public function_base
246 CONSTEXPR
unspec_based_function_base (int unspec_for_sint
,
249 : m_unspec_for_sint (unspec_for_sint
),
250 m_unspec_for_uint (unspec_for_uint
),
251 m_unspec_for_fp (unspec_for_fp
)
254 /* Return the unspec code to use for INSTANCE, based on type suffix 0. */
256 unspec_for (const function_instance
&instance
) const
258 return (!instance
.type_suffix (0).integer_p
? m_unspec_for_fp
259 : instance
.type_suffix (0).unsigned_p
? m_unspec_for_uint
260 : m_unspec_for_sint
);
263 /* The unspec code associated with signed-integer, unsigned-integer
264 and floating-point operations respectively. */
265 int m_unspec_for_sint
;
266 int m_unspec_for_uint
;
270 /* A function_base for functions that have an associated unspec code.
271 It supports all forms of predication except PRED_implicit. */
272 class unspec_based_function
: public unspec_based_function_base
275 CONSTEXPR
unspec_based_function (int unspec_for_sint
, int unspec_for_uint
,
277 : unspec_based_function_base (unspec_for_sint
, unspec_for_uint
,
282 expand (function_expander
&e
) const OVERRIDE
284 return e
.map_to_unspecs (m_unspec_for_sint
, m_unspec_for_uint
,
289 /* Like unspec_based_function, but for functions that take what is normally
290 the final argument first. One use of this class is to handle binary
291 reversed operations; another is to handle MLA-style operations that
292 are normally expressed in GCC as MAD-style operations. */
293 class unspec_based_function_rotated
: public unspec_based_function_base
296 CONSTEXPR
unspec_based_function_rotated (int unspec_for_sint
,
299 : unspec_based_function_base (unspec_for_sint
, unspec_for_uint
,
304 expand (function_expander
&e
) const OVERRIDE
306 /* Rotate the inputs into their normal order, but continue to make _m
307 functions merge with what was originally the first vector argument. */
308 unsigned int nargs
= e
.args
.length ();
309 e
.rotate_inputs_left (e
.pred
!= PRED_none
? 1 : 0, nargs
);
310 return e
.map_to_unspecs (m_unspec_for_sint
, m_unspec_for_uint
,
311 m_unspec_for_fp
, nargs
- 1);
315 /* Like unspec_based_function, but map the function directly to
316 CODE (UNSPEC, M) instead of using the generic predication-based
317 expansion. where M is the vector mode associated with type suffix 0.
318 This is useful if the unspec doesn't describe the full operation or
319 if the usual predication rules don't apply for some reason. */
320 template<insn_code (*CODE
) (int, machine_mode
)>
321 class unspec_based_function_exact_insn
: public unspec_based_function_base
324 CONSTEXPR
unspec_based_function_exact_insn (int unspec_for_sint
,
327 : unspec_based_function_base (unspec_for_sint
, unspec_for_uint
,
332 expand (function_expander
&e
) const OVERRIDE
334 return e
.use_exact_insn (CODE (unspec_for (e
), e
.vector_mode (0)));
338 /* A function that performs an unspec and then adds it to another value. */
339 typedef unspec_based_function_exact_insn
<code_for_aarch64_sve_add
>
340 unspec_based_add_function
;
341 typedef unspec_based_function_exact_insn
<code_for_aarch64_sve_add_lane
>
342 unspec_based_add_lane_function
;
344 /* Generic unspec-based _lane function. */
345 typedef unspec_based_function_exact_insn
<code_for_aarch64_sve_lane
>
346 unspec_based_lane_function
;
348 /* A functon that uses aarch64_pred* patterns regardless of the
350 typedef unspec_based_function_exact_insn
<code_for_aarch64_pred
>
351 unspec_based_pred_function
;
353 /* Like unspec_based_add_function and unspec_based_add_lane_function,
354 but using saturating addition. */
355 typedef unspec_based_function_exact_insn
<code_for_aarch64_sve_qadd
>
356 unspec_based_qadd_function
;
357 typedef unspec_based_function_exact_insn
<code_for_aarch64_sve_qadd_lane
>
358 unspec_based_qadd_lane_function
;
360 /* Like unspec_based_sub_function and unspec_based_sub_lane_function,
361 but using saturating subtraction. */
362 typedef unspec_based_function_exact_insn
<code_for_aarch64_sve_qsub
>
363 unspec_based_qsub_function
;
364 typedef unspec_based_function_exact_insn
<code_for_aarch64_sve_qsub_lane
>
365 unspec_based_qsub_lane_function
;
367 /* A function that performs an unspec and then subtracts it from
369 typedef unspec_based_function_exact_insn
<code_for_aarch64_sve_sub
>
370 unspec_based_sub_function
;
371 typedef unspec_based_function_exact_insn
<code_for_aarch64_sve_sub_lane
>
372 unspec_based_sub_lane_function
;
374 /* A function that acts like unspec_based_function_exact_insn<INT_CODE>
375 when operating on integers, but that expands to an (fma ...)-style
376 aarch64_sve* operation when applied to floats. */
377 template<insn_code (*INT_CODE
) (int, machine_mode
)>
378 class unspec_based_fused_function
: public unspec_based_function_base
381 CONSTEXPR
unspec_based_fused_function (int unspec_for_sint
,
384 : unspec_based_function_base (unspec_for_sint
, unspec_for_uint
,
389 expand (function_expander
&e
) const OVERRIDE
391 int unspec
= unspec_for (e
);
393 if (e
.type_suffix (0).float_p
)
395 /* Put the operands in the normal (fma ...) order, with the accumulator
396 last. This fits naturally since that's also the unprinted operand
397 in the asm output. */
398 e
.rotate_inputs_left (0, e
.pred
!= PRED_none
? 4 : 3);
399 icode
= code_for_aarch64_sve (unspec
, e
.vector_mode (0));
402 icode
= INT_CODE (unspec
, e
.vector_mode (0));
403 return e
.use_exact_insn (icode
);
406 typedef unspec_based_fused_function
<code_for_aarch64_sve_add
>
407 unspec_based_mla_function
;
408 typedef unspec_based_fused_function
<code_for_aarch64_sve_sub
>
409 unspec_based_mls_function
;
411 /* Like unspec_based_fused_function, but for _lane functions. */
412 template<insn_code (*INT_CODE
) (int, machine_mode
)>
413 class unspec_based_fused_lane_function
: public unspec_based_function_base
416 CONSTEXPR
unspec_based_fused_lane_function (int unspec_for_sint
,
419 : unspec_based_function_base (unspec_for_sint
, unspec_for_uint
,
424 expand (function_expander
&e
) const OVERRIDE
426 int unspec
= unspec_for (e
);
428 if (e
.type_suffix (0).float_p
)
430 /* Put the operands in the normal (fma ...) order, with the accumulator
431 last. This fits naturally since that's also the unprinted operand
432 in the asm output. */
433 e
.rotate_inputs_left (0, e
.pred
!= PRED_none
? 5 : 4);
434 icode
= code_for_aarch64_lane (unspec
, e
.vector_mode (0));
437 icode
= INT_CODE (unspec
, e
.vector_mode (0));
438 return e
.use_exact_insn (icode
);
441 typedef unspec_based_fused_lane_function
<code_for_aarch64_sve_add_lane
>
442 unspec_based_mla_lane_function
;
443 typedef unspec_based_fused_lane_function
<code_for_aarch64_sve_sub_lane
>
444 unspec_based_mls_lane_function
;
446 /* A function_base that uses CODE_FOR_MODE (M) to get the associated
447 instruction code, where M is the vector mode associated with type
449 template<insn_code (*CODE_FOR_MODE
) (machine_mode
), unsigned int N
>
450 class code_for_mode_function
: public function_base
454 expand (function_expander
&e
) const OVERRIDE
456 return e
.use_exact_insn (CODE_FOR_MODE (e
.vector_mode (N
)));
460 /* A function that uses code_for_<PATTERN> (M), where M is the vector
461 mode associated with the first type suffix. */
462 #define CODE_FOR_MODE0(PATTERN) code_for_mode_function<code_for_##PATTERN, 0>
464 /* Likewise for the second type suffix. */
465 #define CODE_FOR_MODE1(PATTERN) code_for_mode_function<code_for_##PATTERN, 1>
467 /* Like CODE_FOR_MODE0, but the function doesn't raise exceptions when
468 operating on floating-point data. */
469 #define QUIET_CODE_FOR_MODE0(PATTERN) \
470 quiet< code_for_mode_function<code_for_##PATTERN, 0> >
472 /* A function_base for functions that always expand to a fixed insn pattern,
473 regardless of what the suffixes are. */
474 class fixed_insn_function
: public function_base
477 CONSTEXPR
fixed_insn_function (insn_code code
) : m_code (code
) {}
480 expand (function_expander
&e
) const OVERRIDE
482 return e
.use_exact_insn (m_code
);
485 /* The instruction to use. */
489 /* A function_base for functions that permute their arguments. */
490 class permute
: public quiet
<function_base
>
493 /* Fold a unary or binary permute with the permute vector given by
496 fold_permute (const gimple_folder
&f
, const vec_perm_builder
&builder
) const
498 /* Punt for now on _b16 and wider; we'd need more complex evpc logic
499 to rerecognize the result. */
500 if (f
.type_suffix (0).bool_p
&& f
.type_suffix (0).element_bits
> 8)
503 unsigned int nargs
= gimple_call_num_args (f
.call
);
504 poly_uint64 nelts
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (f
.lhs
));
505 vec_perm_indices
indices (builder
, nargs
, nelts
);
506 tree perm_type
= build_vector_type (ssizetype
, nelts
);
507 return gimple_build_assign (f
.lhs
, VEC_PERM_EXPR
,
508 gimple_call_arg (f
.call
, 0),
509 gimple_call_arg (f
.call
, nargs
- 1),
510 vec_perm_indices_to_tree (perm_type
, indices
));
514 /* A function_base for functions that permute two vectors using a fixed
515 choice of indices. */
516 class binary_permute
: public permute
519 CONSTEXPR
binary_permute (int unspec
) : m_unspec (unspec
) {}
522 expand (function_expander
&e
) const OVERRIDE
524 insn_code icode
= code_for_aarch64_sve (m_unspec
, e
.vector_mode (0));
525 return e
.use_exact_insn (icode
);
528 /* The unspec code associated with the operation. */
532 /* A function_base for functions that reduce a vector to a scalar. */
533 class reduction
: public function_base
536 CONSTEXPR
reduction (int unspec
)
537 : m_unspec_for_sint (unspec
),
538 m_unspec_for_uint (unspec
),
539 m_unspec_for_fp (unspec
)
542 CONSTEXPR
reduction (int unspec_for_sint
, int unspec_for_uint
,
544 : m_unspec_for_sint (unspec_for_sint
),
545 m_unspec_for_uint (unspec_for_uint
),
546 m_unspec_for_fp (unspec_for_fp
)
550 expand (function_expander
&e
) const OVERRIDE
552 machine_mode mode
= e
.vector_mode (0);
553 int unspec
= (!e
.type_suffix (0).integer_p
? m_unspec_for_fp
554 : e
.type_suffix (0).unsigned_p
? m_unspec_for_uint
555 : m_unspec_for_sint
);
556 /* There's no distinction between SADDV and UADDV for 64-bit elements;
557 the signed versions only exist for narrower elements. */
558 if (GET_MODE_UNIT_BITSIZE (mode
) == 64 && unspec
== UNSPEC_SADDV
)
559 unspec
= UNSPEC_UADDV
;
560 return e
.use_exact_insn (code_for_aarch64_pred_reduc (unspec
, mode
));
563 /* The unspec code associated with signed-integer, unsigned-integer
564 and floating-point operations respectively. */
565 int m_unspec_for_sint
;
566 int m_unspec_for_uint
;
570 /* A function_base for functions that shift narrower-than-64-bit values
571 by 64-bit amounts. */
572 class shift_wide
: public function_base
575 CONSTEXPR
shift_wide (rtx_code code
, int wide_unspec
)
576 : m_code (code
), m_wide_unspec (wide_unspec
) {}
579 expand (function_expander
&e
) const OVERRIDE
581 machine_mode mode
= e
.vector_mode (0);
582 machine_mode elem_mode
= GET_MODE_INNER (mode
);
584 /* If the argument is a constant that the normal shifts can handle
585 directly, use them instead. */
586 rtx shift
= unwrap_const_vec_duplicate (e
.args
.last ());
587 if (aarch64_simd_shift_imm_p (shift
, elem_mode
, m_code
== ASHIFT
))
589 e
.args
.last () = shift
;
590 return e
.map_to_rtx_codes (m_code
, m_code
, -1);
593 if (e
.pred
== PRED_x
)
594 return e
.use_unpred_insn (code_for_aarch64_sve (m_wide_unspec
, mode
));
596 return e
.use_cond_insn (code_for_cond (m_wide_unspec
, mode
));
599 /* The rtx code associated with a "normal" shift. */
602 /* The unspec code associated with the wide shift. */
606 /* A function_base for unary functions that count bits. */
607 class unary_count
: public quiet
<function_base
>
610 CONSTEXPR
unary_count (rtx_code code
) : m_code (code
) {}
613 expand (function_expander
&e
) const OVERRIDE
615 /* The md patterns treat the operand as an integer. */
616 machine_mode mode
= aarch64_sve_int_mode (e
.vector_mode (0));
617 e
.args
.last () = gen_lowpart (mode
, e
.args
.last ());
619 if (e
.pred
== PRED_x
)
620 return e
.use_pred_x_insn (code_for_aarch64_pred (m_code
, mode
));
622 return e
.use_cond_insn (code_for_cond (m_code
, mode
));
625 /* The rtx code associated with the operation. */
629 /* A function_base for svwhile* functions. */
630 class while_comparison
: public function_base
633 CONSTEXPR
while_comparison (int unspec_for_sint
, int unspec_for_uint
)
634 : m_unspec_for_sint (unspec_for_sint
),
635 m_unspec_for_uint (unspec_for_uint
)
639 expand (function_expander
&e
) const OVERRIDE
641 /* Suffix 0 determines the predicate mode, suffix 1 determines the
642 scalar mode and signedness. */
643 int unspec
= (e
.type_suffix (1).unsigned_p
645 : m_unspec_for_sint
);
646 machine_mode pred_mode
= e
.vector_mode (0);
647 scalar_mode reg_mode
= GET_MODE_INNER (e
.vector_mode (1));
648 return e
.use_exact_insn (code_for_while (unspec
, reg_mode
, pred_mode
));
651 /* The unspec codes associated with signed and unsigned operations
653 int m_unspec_for_sint
;
654 int m_unspec_for_uint
;
659 /* Declare the global function base NAME, creating it from an instance
660 of class CLASS with constructor arguments ARGS. */
661 #define FUNCTION(NAME, CLASS, ARGS) \
662 namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \
663 namespace functions { const function_base *const NAME = &NAME##_obj; }