In PR121604 the problem was noted that currently the SVE intrinsics
infrastructure assumes that for any predicated operation that the GP is at the
first argument position which has a svbool_t or for a unary merging operation
that it's in the second position.
However you have intrinsics like fmov_lane which have an svbool_t but it's not
a GP.
You also have instructions like BRKB which work only on predicates so it
incorrectly determines the first operand to be the GP, while that's the
inactive lanes.
However during apply_predication we do have the information about where the GP
is. This patch re-organizes the code to record this information into the
function_instance such that folders have access to this information.
For functions that are outliers like pmov_lane we can now override the
availability of the intrinsics having a GP.
gcc/ChangeLog:
PR target/121604
* config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication):
Store gp_index.
(struct pmov_to_vector_lane_def): Mark instruction as has no GP.
* config/aarch64/aarch64-sve-builtins.h (function_instance::gp_value,
function_instance::inactive_values, function_instance::gp_index,
function_shape::has_gp_argument_p): New.
* config/aarch64/aarch64-sve-builtins.cc (gimple_folder::fold_pfalse):
Simplify code and use GP helpers.
gcc/testsuite/ChangeLog:
PR target/121604
* gcc.target/aarch64/sve/pr121604_brk.c: New test.
* gcc.target/aarch64/sve2/pr121604_pmov.c: New test.
Co-authored-by: Jennifer Schmitz <jschmitz@nvidia.com>
(cherry picked from commit
d1965b1fd8938f35f78be503e36b98b406751e21)
#include "system.h"
#include "coretypes.h"
#include "tm.h"
+#include "basic-block.h"
#include "tree.h"
+#include "function.h"
+#include "gimple.h"
#include "rtl.h"
#include "tm_p.h"
#include "memmodel.h"
types in ARGUMENT_TYPES. RETURN_TYPE is the type returned by the
function. */
static void
-apply_predication (const function_instance &instance, tree return_type,
+apply_predication (function_instance &instance, tree return_type,
vec<tree> &argument_types)
{
+ /* Initially mark the function as not being predicated. */
+ instance.gp_index = -1;
+
/* There are currently no SME ZA instructions that have both merging and
unpredicated forms, so for simplicity, the predicates are always included
in the original format string. */
if (instance.pred != PRED_none && instance.pred != PRED_za_m)
{
argument_types.quick_insert (0, instance.gp_type ());
+ instance.gp_index = 0;
/* For unary merge operations, the first argument is a vector with
the same type as the result. For unary_convert_narrowt it also
provides the "bottom" half of active elements, and is present
for all types of predication. */
auto nargs = argument_types.length () - 1;
if (instance.shape->has_merge_argument_p (instance, nargs))
- argument_types.quick_insert (0, return_type);
+ {
+ argument_types.quick_insert (0, return_type);
+ instance.gp_index = 1;
+ }
}
+
+ /* In this case the predicate type we added above is a non-governing
+ predicate operand (and there is no GP), so update the gp_index value
+ accordingly. */
+ if (!instance.shape->has_gp_argument_p (instance))
+ instance.gp_index = -1;
}
/* Parse and move past an element type in FORMAT and return it as a type
but it doesn't currently have the necessary information. */
return c.require_immediate_range (1, 1, bytes - 1);
}
+
+ /* This function has a predicate argument, and is a merging instruction, but
+ the predicate is not a GP. */
+ bool
+ has_gp_argument_p (const function_instance &) const override
+ {
+ return false;
+ }
};
SHAPE (pmov_to_vector_lane)
gimple *
gimple_folder::fold_pfalse ()
{
- if (pred == PRED_none)
+ tree gp = gp_value (call);
+ /* If there isn't a GP then we can't do any folding as the instruction isn't
+ predicated. */
+ if (!gp)
return nullptr;
- tree arg0 = gimple_call_arg (call, 0);
+
if (pred == PRED_m)
{
- /* Unary function shapes with _m predication are folded to the
- inactive vector (arg0), while other function shapes are folded
- to op1 (arg1). */
- tree arg1 = gimple_call_arg (call, 1);
- if (is_pfalse (arg1))
- return fold_call_to (arg0);
- if (is_pfalse (arg0))
- return fold_call_to (arg1);
+ tree val = inactive_values (call);
+ if (is_pfalse (gp))
+ return fold_call_to (val);
return nullptr;
}
- if ((pred == PRED_x || pred == PRED_z) && is_pfalse (arg0))
+ if ((pred == PRED_x || pred == PRED_z) && is_pfalse (gp))
return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
- if (pred == PRED_implicit && is_pfalse (arg0))
+ if (pred == PRED_implicit && is_pfalse (gp))
{
unsigned int flags = call_properties ();
/* Folding to lhs = {0, ...} is not appropriate for intrinsics with
bool could_trap_p () const;
vector_type_index gp_type_index () const;
+ tree gp_value (gcall *) const;
+ tree inactive_values (gcall *) const;
tree gp_type () const;
unsigned int vectors_per_tuple () const;
group_suffix_index group_suffix_id;
predication_index pred;
fpm_mode_index fpm_mode;
+ int gp_index;
};
class registered_function;
virtual bool has_merge_argument_p (const function_instance &,
unsigned int) const;
+ virtual bool has_gp_argument_p (const function_instance &) const;
+
virtual bool explicit_type_suffix_p (unsigned int) const = 0;
/* True if the group suffix is present in overloaded names.
return acle_vector_types[0][gp_type_index ()];
}
+/* Return the tree value that should be used as the governing predicate of
+ this function. If none then return NULL_TREE. */
+inline tree
+function_instance::gp_value (gcall *call) const
+{
+ if (gp_index < 0)
+ return NULL_TREE;
+
+ return gimple_call_arg (call, gp_index);
+}
+
+/* Return the tree value that should be used for the inactive lanes should this
+ function be a predicated function with a gp. Otherwise return NULL_TREE. */
+inline tree
+function_instance::inactive_values (gcall *call) const
+{
+ if (gp_index < 0)
+ return NULL_TREE;
+
+ /* Function is unary with m predicate. */
+ if (gp_index == 1)
+ return gimple_call_arg (call, 0);
+
+ /* Else the inactive values are the next element. */
+ return gimple_call_arg (call, 1);
+}
+
/* If the function operates on tuples of vectors, return the number
of vectors in the tuples, otherwise return 1. */
inline unsigned int
return nargs == 1 && instance.pred == PRED_m;
}
+/* Return true if INSTANCE has an predicate argument that can be used as the global
+ predicate. */
+inline bool
+function_shape::has_gp_argument_p (const function_instance &instance) const
+{
+ return instance.pred != PRED_none;
+}
+
/* Return the mode of the result of a call. */
inline machine_mode
function_expander::result_mode () const
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** foo:
+** ptrue p0\.b, all
+** brkb p0\.b, p0/z, p0\.b
+** ret
+*/
+svbool_t foo () {
+ return svbrkb_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ());
+}
+
+/*
+** bar:
+** ptrue p0\.b, all
+** brka p0\.b, p0/z, p0\.b
+** ret
+*/
+svbool_t bar () {
+ return svbrka_b_m (svpfalse (), svptrue_b8 (), svptrue_b8 ());
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.2-a+sve2p1" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** f:
+** pfalse p([0-7]+)\.b
+** mov z0\.b, #-1
+** pmov z0\[1\], p\1\.d
+** ret
+*/
+svuint64_t f () {
+ return svpmov_lane_u64_m (svdup_u64 (~0UL), svpfalse (), 1);
+}