vect: teach vectorizable_call to predicate calls when they can trap [PR122103]

author Tamar Christina <tamar.christina@arm.com>

Mon, 5 Jan 2026 20:56:03 +0000 (20:56 +0000)

committer Tamar Christina <tamar.christina@arm.com>

Mon, 5 Jan 2026 20:56:03 +0000 (20:56 +0000)
author Tamar Christina <tamar.christina@arm.com>
Mon, 5 Jan 2026 20:56:03 +0000 (20:56 +0000)
committer Tamar Christina <tamar.christina@arm.com>
Mon, 5 Jan 2026 20:56:03 +0000 (20:56 +0000)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122103_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_4.c

new file mode 100644 (file)

index 0000000..cac485b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_4.c
@@ -0,0 +1,78 @@
+/* { dg-do run }  */
+/* { dg-require-effective-target glibc }  */
+/* { dg-options "-O3 -fno-math-errno -ftrapping-math -march=armv8-a+sve" }  */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include <fenv.h>
+#include <signal.h>
+
+#pragma STDC FENV_ACCESS ON
+
+__attribute__((noinline))
+void f(float *__restrict c, int n)
+{
+  for (int i = 0; i < n; i++)
+    c[i] = __builtin_sqrtf (c[i] - 2.0f);
+}
+
+static void on_fpe (int sig)
+{
+  (void) sig;
+  puts ("SIGFPE: trapped FP exception (unexpected invalid from sqrt)");
+  fflush (stdout);
+  __builtin_abort ();
+}
+
+int
+main (void)
+{
+  signal (SIGFPE, on_fpe);
+
+  /* Clear flags and enable trap on invalid operations.  */
+  feclearexcept (FE_ALL_EXCEPT);
+  feenableexcept (FE_INVALID);
+
+  /* Choose a length that is NOT a multiple of typical SVE VL (unknown at
+     runtime), and includes plenty of extra lanes.  */
+  const int n = 37;
+
+  float *c = aligned_alloc (64, (size_t) n * sizeof (float));
+  if (!c)
+    return 1;
+
+  /* Populate c so that (c[i] - 2) is a perfect square; this avoids FE_INVALID
+     while giving deterministic results.  */
+  for (int i = 0; i < n; i++)
+    {
+      int t = i & 3;
+      c[i] = (float) (t * t) + 2.0f;
+    }
+
+  f (c, n);
+
+  /* Only FE_INVALID would indicate a wrong extra-lane computation here.  */
+  if (fetestexcept (FE_INVALID))
+    {
+      puts ("Unexpected FE_INVALID");
+      return 2;
+    }
+
+  int ok = 1;
+  for (int i = 0; i < n; i++)
+    {
+      float expected = (float) (i & 3);
+      if (!(c[i] == expected))
+       {
+         printf ("Mismatch at %d: expected %g, got %g\n", i, expected, c[i]);
+         ok = 0;
+       }
+    }
+
+  puts (ok ? "OK" : "FAIL");
+  free (c);
+  return ok ? 0 : 3;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122103_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_5.c

new file mode 100644 (file)

index 0000000..ca1bfb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_5.c
@@ -0,0 +1,24 @@
+/* { dg-do compile }  */
+/* { dg-options "-O3 -fno-math-errno -ftrapping-math -march=armv9-a" }  */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f:
+**     ...
+**     whilelo p([0-9]+).s, wzr, w[0-9]+
+**     ...
+**     ld1w    z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     fadd    z[0-9]+.s, p\1/m, z[0-9]+.s, z[0-9]+.s
+**     fsqrt   z[0-9]+.s, p\1/m, z[0-9]+.s
+**     st1w    z[0-9]+.s, p\1, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     incw    x[0-9]+
+**     whilelo p\1.s, w[0-9]+, w[0-9]+
+**     ...
+*/
+void
+f (float *__restrict c, int n)
+{
+  for (int i = 0; i < n; i++)
+    c[i] = __builtin_sqrtf (c[i] - 2.0f);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122103_6.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_6.c

new file mode 100644 (file)

index 0000000..9c51121
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_6.c
@@ -0,0 +1,23 @@
+/* { dg-do compile }  */
+/* { dg-options "-O3 -fno-math-errno -fno-trapping-math -march=armv9-a" }  */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f:
+**     ...
+**     whilelo p([0-9]+).s, wzr, w[0-9]+
+**     ...
+**     ld1w    z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     fadd    z[0-9]+.s, z[0-9]+.s, z[0-9]+.s
+**     fsqrt   z[0-9]+.s, p[0-9]+/m, z[0-9]+.s
+**     st1w    z[0-9]+.s, p\1, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     incw    x[0-9]+
+**     whilelo p\1.s, w[0-9]+, w[0-9]+
+**     ...
+*/
+void
+f (float *__restrict c, int n)
+{
+  for (int i = 0; i < n; i++)
+    c[i] = __builtin_sqrtf (c[i] - 2.0f);
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index ed2486451ca07849d4e516198f9a01151dd2f948..8086d4c35c9a7ea585e8996ed51fdd39db551de6 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3438,9 +3438,9 @@ vectorizable_call (vec_info *vinfo,
    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
    bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
    tree fndecl, new_temp, rhs_type;
-  enum vect_def_type dt[4]
+  enum vect_def_type dt[5]
      = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
-       vect_unknown_def_type };
+       vect_unknown_def_type, vect_unknown_def_type };
    tree vectypes[ARRAY_SIZE (dt)] = {};
    slp_tree slp_op[ARRAY_SIZE (dt)] = {};
    auto_vec<tree, 8> vargs;
@@ -3481,8 +3481,8 @@ vectorizable_call (vec_info *vinfo,
  
    /* Bail out if the function has more than four arguments, we do not have
       interesting builtin functions to vectorize with more than two arguments
-     except for fma.  No arguments is also not good.  */
-  if (nargs == 0 || nargs > 4)
+     except for fma (cond_fma has more).  No arguments is also not good.  */
+  if (nargs == 0 || nargs > 5)
      return false;
  
    /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
@@ -3625,6 +3625,33 @@ vectorizable_call (vec_info *vinfo,
      ifn = vectorizable_internal_function (cfn, callee, vectype_out,
                                           vectype_in);
  
+  /* Check if the operation traps.  */
+  bool could_trap = gimple_could_trap_p (STMT_VINFO_STMT (stmt_info));
+  if (could_trap && cost_vec && loop_vinfo)
+    {
+      /* If the operation can trap it must be conditional, otherwise fail.  */
+      internal_fn cond_fn = get_conditional_internal_fn (ifn);
+      internal_fn cond_len_fn = get_len_internal_fn (ifn);
+      if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+       {
+         /* We assume that BB SLP fills all lanes, so no inactive lanes can
+            cause issues.  */
+         if ((cond_fn == IFN_LAST
+              || !direct_internal_fn_supported_p (cond_fn, vectype_out,
+                                                  OPTIMIZE_FOR_SPEED))
+             && (cond_len_fn == IFN_LAST
+                 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
+                                                     OPTIMIZE_FOR_SPEED)))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "can't use a fully-masked loop because no"
+                                " conditional operation is available.\n");
+             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+           }
+       }
+    }
+
    /* If that fails, try asking for a target-specific built-in function.  */
    if (ifn == IFN_LAST)
      {
@@ -3749,7 +3776,7 @@ vectorizable_call (vec_info *vinfo,
        else if (reduc_idx >= 0)
         gcc_unreachable ();
      }
-  else if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0)
+  else if (masked_loop_p && mask_opno == -1 && (reduc_idx >= 0 || could_trap))
      {
        ifn = cond_fn;
        vect_nargs += 2;
@@ -3793,7 +3820,8 @@ vectorizable_call (vec_info *vinfo,
             {
               int varg = 0;
               /* Add the mask if necessary.  */
-             if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0)
+             if (masked_loop_p && mask_opno == -1
+                 && (reduc_idx >= 0 || could_trap))
                 {
                   gcc_assert (internal_fn_mask_index (ifn) == varg);
                   unsigned int vec_num = vec_oprnds0.length ();
@@ -3807,10 +3835,18 @@ vectorizable_call (vec_info *vinfo,
                   vargs[varg++] = vec_oprndsk[i];
                 }
               /* Add the else value if necessary.  */
-             if (masked_loop_p && mask_opno == -1 && reduc_idx >= 0)
+             if (masked_loop_p && mask_opno == -1
+                && (reduc_idx >= 0 || could_trap))
                 {
                   gcc_assert (internal_fn_else_index (ifn) == varg);
-                 vargs[varg++] = vargs[reduc_idx + 1];
+                 if (reduc_idx >= 0)
+                   vargs[varg++] = vargs[reduc_idx + 1];
+                 else
+                   {
+                     auto else_value = targetm.preferred_else_value
+                       (cond_fn, vectype_out, varg - 1, &vargs[1]);
+                     vargs[varg++] = else_value;
+                   }
                 }
               if (clz_ctz_arg1)
                 vargs[varg++] = clz_ctz_arg1;
author	Tamar Christina <tamar.christina@arm.com>
	Mon, 5 Jan 2026 20:56:03 +0000 (20:56 +0000)
committer	Tamar Christina <tamar.christina@arm.com>
	Mon, 5 Jan 2026 20:56:03 +0000 (20:56 +0000)
gcc/testsuite/gcc.target/aarch64/sve/pr122103_4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/pr122103_5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/pr122103_6.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-stmts.cc		patch \| blob \| blame \| history