]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/122778 - missed loop masking in OMP SIMD call handling
authorRichard Biener <rguenther@suse.de>
Fri, 21 Nov 2025 09:32:12 +0000 (10:32 +0100)
committerRichard Biener <rguenther@suse.de>
Fri, 21 Nov 2025 15:17:30 +0000 (16:17 +0100)
For AVX512 style masking we fail to apply loop masking to a conditional
OMP SIMD call.

PR tree-optimization/122778
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Honor
a loop mask when passing the conditional mask with AVX512
style masking.

* gcc.dg/vect/vect-simd-clone-22.c: New testcase.
* gcc.dg/vect/vect-simd-clone-22a.c: Likewise.

gcc/testsuite/gcc.dg/vect/vect-simd-clone-22.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-simd-clone-22a.c [new file with mode: 0644]
gcc/tree-vect-stmts.cc

diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-22.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-22.c
new file mode 100644 (file)
index 0000000..70fa825
--- /dev/null
@@ -0,0 +1,46 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd --param vect-partial-vector-usage=2" } */
+/* { dg-additional-options "-mavx512f" { target avx512f_runtime } } */
+/* { dg-add-options ieee } */
+/* { dg-require-effective-target fenv_exceptions } */
+/* { dg-additional-sources vect-simd-clone-22a.c linkonly } */
+
+#include <fenv.h>
+#include "tree-vect.h"
+
+#pragma omp declare simd simdlen(16) inbranch
+float __attribute__((const)) baz (float x, float y);
+
+float a[1024];
+int c[1024];
+
+void __attribute__((noipa))
+foo (int n, float * __restrict b)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      float aa = a[i];
+      float bb = b[i];
+      if (c[i] == 0)
+       aa = baz (aa, bb);
+      a[i] = aa;
+    }
+}
+
+float b[1024];
+
+int main()
+{
+  check_vect ();
+
+#pragma GCC novector
+  for (int i = 0; i < 1020; ++i)
+    a[i] = b[i] = 2;
+  foo (1020, b);
+  if (fetestexcept (FE_DIVBYZERO) || fetestexcept (FE_INVALID))
+    abort ();
+#pragma GCC novector
+  for (int i = 0; i < 1020; ++i)
+    if (a[i] != 1)
+      abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-22a.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-22a.c
new file mode 100644 (file)
index 0000000..88bda07
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+
+#pragma omp declare simd simdlen(16) inbranch
+float baz (float x, float y)
+{
+  return x / y;
+}
index 748b3bcb0ab038e602394c05a7b6eb526961ba8d..b8e36d4ee0904522f3f3a919999578aa316ac07a 100644 (file)
@@ -4748,10 +4748,25 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
                      else if (known_eq (atype_subparts,
                                         TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
                        {
+                         vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+                         if (loop_vinfo
+                             && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+                           {
+                             vec_loop_masks *loop_masks
+                               = &LOOP_VINFO_MASKS (loop_vinfo);
+                             tree loop_mask
+                               = vect_get_loop_mask (loop_vinfo, gsi,
+                                                     loop_masks, ncopies,
+                                                     vectype, j);
+                             vec_oprnd0
+                               = prepare_vec_mask (loop_vinfo,
+                                                   TREE_TYPE (loop_mask),
+                                                   loop_mask, vec_oprnd0,
+                                                   gsi);
+                           }
                          /* The vector mask argument matches the input
                             in the number of lanes, but not necessarily
                             in the mode.  */
-                         vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
                          tree st = lang_hooks.types.type_for_mode
                                      (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
                          vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,