--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target glibc } */
+/* { dg-options "-O3 -fno-math-errno -ftrapping-math -march=armv8-a+sve" } */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include <fenv.h>
+#include <signal.h>
+
+#pragma STDC FENV_ACCESS ON
+
+__attribute__((noinline))
+void f(float *__restrict c, int *__restrict d, int n)
+{
+ for (int i = 0; i < n; i++) {
+ if (d[i] > 1000)
+ c[i] = __builtin_sqrtf(c[i]);
+ }
+}
+
+static void on_fpe(int sig)
+{
+ (void)sig;
+ puts("SIGFPE: trapped FP exception (likely FE_INVALID from sqrt on a negative/sNaN lane)");
+ fflush(stdout);
+ __builtin_abort ();
+}
+
+int main(void)
+{
+ signal(SIGFPE, on_fpe);
+
+ // Clear flags and enable trap on invalid operations.
+ feclearexcept(FE_ALL_EXCEPT);
+ feenableexcept(FE_INVALID);
+
+ // Choose a length that is NOT a multiple of typical SVE VL (unknown at runtime),
+ // and includes plenty of inactive lanes.
+ const int n = 37;
+
+ float *c = aligned_alloc(64, (size_t)n * sizeof(float));
+ int *d = aligned_alloc(64, (size_t)n * sizeof(int));
+ if (!c || !d) return 1;
+
+ // Construct data:
+ // - For lanes where d<=1000, put negative values in c (sqrt would be FE_INVALID if executed).
+ // - For lanes where d>1000, put positive values in c (legal sqrt).
+ for (int i = 0; i < n; i++) {
+ if ((i % 3) == 0) {
+ d[i] = 1001; // active
+ c[i] = 4.0f; // sqrt OK
+ } else {
+ d[i] = 0; // inactive
+ c[i] = -1.0f; // sqrt would be invalid if wrongly executed
+ }
+ }
+
+ // Call f. Correct behavior: no SIGFPE, and only positions with d>1000 are modified.
+ f(c, d, n);
+
+ // If traps are unavailable, at least report raised flags.
+ int raised = fetestexcept(FE_ALL_EXCEPT);
+ if (raised) {
+ printf("FP flags raised: 0x%x\n", raised);
+ } else {
+ puts("No FP flags raised.");
+ }
+
+ // Check results.
+ int ok = 1;
+ for (int i = 0; i < n; i++) {
+ if (d[i] > 1000) {
+ if (!(c[i] == 2.0f)) { // sqrt(4) = 2
+ printf("Mismatch at %d: expected 2.0, got %g\n", i, c[i]);
+ ok = 0;
+ }
+ } else {
+ if (!(c[i] == -1.0f)) { // must remain unchanged
+ printf("Clobber at %d: expected -1.0 unchanged, got %g\n", i, c[i]);
+ ok = 0;
+ }
+ }
+ }
+
+ puts(ok ? "OK" : "FAIL");
+ free(c);
+ free(d);
+ return ok ? 0 : 2;
+}
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-math-errno -ftrapping-math -march=armv9-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f:
+** ...
+** whilelo p([0-9]+).s, wzr, w[0-9]+
+** ...
+** ld1w z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** cmpgt p\1.s, p\1/z, z[0-9]+.s, z[0-9]+.s
+** ld1w z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** fsqrt z[0-9]+.s, p\1/m, z[0-9]+.s
+** st1w z[0-9]+.s, p\1, \[x[0-9]+, x[0-9]+, lsl 2\]
+** incw x[0-9]+
+** whilelo p\1.s, w[0-9]+, w[0-9]+
+** ...
+*/
+void f (float *__restrict c, int *__restrict d, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ if (d[i] > 1000)
+ c[i] = __builtin_sqrtf (c[i]);
+ }
+}
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-math-errno -fno-trapping-math -march=armv9-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f:
+** ...
+** whilelo p([0-9]+).s, wzr, w[0-9]+
+** ...
+** ld1w z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** cmpgt p\1.s, p\1/z, z[0-9]+.s, z[0-9]+.s
+** ld1w z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+** fsqrt z[0-9]+.s, p[0-9]+/m, z[0-9]+.s
+** st1w z[0-9]+.s, p\1, \[x[0-9]+, x[0-9]+, lsl 2\]
+** incw x[0-9]+
+** whilelo p\1.s, w[0-9]+, w[0-9]+
+** ...
+*/
+void f (float *__restrict c, int *__restrict d, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ if (d[i] > 1000)
+ c[i] = __builtin_sqrtf (c[i]);
+ }
+}
+
if (gimple_assign_single_p (stmt))
return ifcvt_can_use_mask_load_store (stmt);
+ if (gimple_call_builtin_p (stmt))
+ if (tree callee = gimple_call_fndecl (stmt))
+ {
+ auto ifn = associated_internal_fn (callee);
+ auto cond_ifn = get_conditional_internal_fn (ifn);
+ tree type = TREE_TYPE (gimple_call_fntype (stmt));
+ return (cond_ifn != IFN_LAST
+ && vectorized_internal_fn_supported_p (cond_ifn, type));
+ }
+
+ if (!is_gimple_assign (stmt))
+ return false;
+
tree_code code = gimple_assign_rhs_code (stmt);
tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
}
}
+ /* Check if the call can trap and if so require predication. */
+ if (gimple_could_trap_p (stmt))
+ {
+ if (ifcvt_can_predicate (stmt))
+ {
+ gimple_set_plf (stmt, GF_PLF_2, true);
+ need_to_predicate = true;
+ return true;
+ }
+ else
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "stmt could trap...\n");
+ return false;
+ }
+ }
+
/* There are some IFN_s that are used to replace builtins but have the
same semantics. Even if MASK_CALL cannot handle them vectorable_call
will insert the proper selection, so do not block conversion. */
SSA names defined earlier in STMT's block. */
static gimple *
-predicate_rhs_code (gassign *stmt, tree mask, tree cond,
+predicate_rhs_code (gimple *stmt, tree mask, tree cond,
hash_set<tree_ssa_name_hash> *ssa_names)
{
- tree lhs = gimple_assign_lhs (stmt);
- tree_code code = gimple_assign_rhs_code (stmt);
- unsigned int nops = gimple_num_ops (stmt);
- internal_fn cond_fn = get_conditional_internal_fn (code);
+ internal_fn cond_fn;
+ if (is_gimple_assign (stmt))
+ {
+ tree_code code = gimple_assign_rhs_code (stmt);
+ cond_fn = get_conditional_internal_fn (code);
+ }
+ else if (tree callee = gimple_call_fndecl (stmt))
+ {
+ auto ifn = associated_internal_fn (callee);
+ cond_fn = get_conditional_internal_fn (ifn);
+ }
+ else
+ return NULL;
+
+ if (cond_fn == IFN_LAST)
+ {
+ gcc_assert (!gimple_could_trap_p (stmt));
+ return NULL;
+ }
+
+ tree lhs = gimple_get_lhs (stmt);
+ unsigned int nops = gimple_num_args (stmt) + 1;
/* Construct the arguments to the conditional internal function. */
auto_vec<tree, 8> args;
args.safe_grow (nops + 1, true);
args[0] = mask;
- for (unsigned int i = 1; i < nops; ++i)
- args[i] = gimple_op (stmt, i);
+ for (unsigned int i = 0; i < nops - 1; ++i)
+ args[i+1] = gimple_arg (stmt, i);
args[nops] = NULL_TREE;
/* Look for uses of the result to see whether they are COND_EXPRs that can
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
{
- gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
- if (!stmt)
+ gimple *stmt = gsi_stmt (gsi);
+ if (!is_gimple_assign (stmt)
+ && !gimple_call_builtin_p (stmt))
;
else if (is_false_predicate (cond)
&& gimple_vdef (stmt))
continue;
}
else if (gimple_plf (stmt, GF_PLF_2)
- && is_gimple_assign (stmt))
+ && (is_gimple_assign (stmt)
+ || gimple_call_builtin_p (stmt)))
{
- tree lhs = gimple_assign_lhs (stmt);
+ tree lhs = gimple_get_lhs (stmt);
+ /* ?? Assume that calls without an LHS are not data processing
+ and so no issues with traps. */
+ if (!lhs)
+ continue;
tree mask;
gimple *new_stmt;
gimple_seq stmts = NULL;
vect_masks.safe_push (mask);
}
if (gimple_assign_single_p (stmt))
- new_stmt = predicate_load_or_store (&gsi, stmt, mask);
+ new_stmt = predicate_load_or_store (&gsi,
+ as_a <gassign *> (stmt),
+ mask);
else
new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
- gsi_replace (&gsi, new_stmt, true);
+ if (new_stmt)
+ gsi_replace (&gsi, new_stmt, true);
}
else if (gimple_needing_rewrite_undefined (stmt))
rewrite_to_defined_unconditional (&gsi);