]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Select both inbranch and notinbranch clone during SIMD call analysis
authorRichard Biener <rguenther@suse.de>
Sun, 23 Nov 2025 13:01:03 +0000 (14:01 +0100)
committerRichard Biener <rguenth@gcc.gnu.org>
Thu, 4 Dec 2025 14:30:13 +0000 (15:30 +0100)
The following recors both a possibly notinbranch and an inbranch
SIMD clone during analysis so that we can properly handle the
late decision on loop masking.  Recording of linear-clause data
from analysis is extended to cover linear-clause arguments from
both clones.

This also fixes AVX512 masked loop code generation in line with
the previous fixes.

PR tree-optimization/122776
* tree-vectorizer.h (vect_simd_clone_data::clone,
vect_simd_clone_data::clone_inbranch): New fields for
the two selected clones.
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Record
both a possibly notinbranch and a inbranch clone.  Delay
the choice between both to code generation based on
LOOP_VINFO_FULLY_MASKED_P.

* gcc.dg/vect/vect-simd-clone-24.c: New testcase.
* gcc.dg/gomp/pr110485.c: Adjust.

gcc/testsuite/gcc.dg/gomp/pr110485.c
gcc/testsuite/gcc.dg/vect/vect-simd-clone-24.c [new file with mode: 0644]
gcc/tree-vect-stmts.cc
gcc/tree-vectorizer.h

index ba6817a127f40246071e32ccebf692cc4d121d15..5183f3f403c7d06dec7bc73819d7b494a1a10c3c 100644 (file)
@@ -16,4 +16,4 @@ void foo (int n)
 }
 
 /* { dg-final { scan-tree-dump-not "MASK_LOAD" "vect" } } */
-/* { dg-final { scan-tree-dump "can't use a fully-masked loop because a non-masked simd clone was selected." "vect" { target x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "can't use a fully-masked loop because no masked simd clone was available" "vect" { target x86_64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-24.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-24.c
new file mode 100644 (file)
index 0000000..35459d5
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd --param vect-partial-vector-usage=1 -fdump-tree-dce6 -w" } */
+/* { dg-additional-options "-mavx512f" { target avx512f } } */
+
+#pragma omp declare simd simdlen(16)
+int __attribute__((const)) baz (int x);
+
+int a[1024];
+
+void foo (int n, int * __restrict b)
+{
+  for (int i = 0; i < n; ++i)
+    if (baz (a[i]))
+      b[i] = baz (b[i]);
+}
+
+/* One notinbranch SIMD call, one inbranch in the main vector loop and two
+   inbranch in the masked epilog.  */
+/* { dg-final { scan-tree-dump-times "simdclone\.\[0-9\] \\\(\[^,\]\+\\\)" 1 "dce6" { target avx512f } } } */
+/* { dg-final { scan-tree-dump-times "simdclone\.\[0-9\] \\\(\[^,\]\+,\[^,\]\+\\\)" 3 "dce6" { target avx512f } } } */
index 641b2835693fd5a5133c7488433e1aae7cf8aff4..12eb5ea5b5e3a8b92cafdc0bb6f05e7c72c3fe05 100644 (file)
@@ -4201,9 +4201,12 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
   poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
   unsigned group_size = SLP_TREE_LANES (slp_node);
   unsigned int badness = 0;
+  unsigned int badness_inbranch = 0;
   struct cgraph_node *bestn = NULL;
+  struct cgraph_node *bestn_inbranch = NULL;
   if (!cost_vec)
-    bestn = cgraph_node::get (simd_clone_info[0]);
+    bestn = ((loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+            ? data.clone_inbranch : data.clone);
   else
     for (struct cgraph_node *n = node->simd_clones; n != NULL;
         n = n->simdclone->next_clone)
@@ -4334,14 +4337,19 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
                        SIMD_CLONE_ARG_TYPE_MASK);
            /* Penalize using a masked SIMD clone in a non-masked loop, that is
               not in a branch, as we'd have to construct an all-true mask.  */
-           if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
-             this_badness += 64;
+           this_badness += 64;
          }
        if (bestn == NULL || this_badness < badness)
          {
            bestn = n;
            badness = this_badness;
          }
+       if (n->simdclone->inbranch
+           && (bestn_inbranch == NULL || this_badness < badness_inbranch))
+         {
+           bestn_inbranch = n;
+           badness_inbranch = this_badness;
+         }
       }
 
   if (bestn == NULL)
@@ -4377,6 +4385,17 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
                               "incompatible vector types for invariants\n");
            return false;
          }
+
+      if (!bestn_inbranch && loop_vinfo)
+       {
+         if (dump_enabled_p ()
+             && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "can't use a fully-masked loop because no"
+                            " masked simd clone was available.\n");
+         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+       }
+
       /* When the original call is pure or const but the SIMD ABI dictates
         an aggregate return we will have to use a virtual definition and
         in a loop eventually even need to add a virtual PHI.  That's
@@ -4390,75 +4409,71 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
         so automagic virtual operand updating doesn't work.  */
       if (gimple_vuse (stmt))
        vinfo->any_known_not_updated_vssa = true;
-      simd_clone_info.safe_push (bestn->decl);
-      for (i = 0; i < bestn->simdclone->nargs; i++)
+
+      data.clone = bestn;
+      data.clone_inbranch = bestn_inbranch;
+
+      simd_clone_info.safe_push (NULL_TREE);
+      for (i = 0;
+          i < (bestn_inbranch ? bestn_inbranch : bestn)->simdclone->nargs; i++)
        {
-         switch (bestn->simdclone->args[i].arg_type)
+         if (loop_vinfo
+             && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+             && (bestn_inbranch->simdclone->args[i].arg_type
+                 == SIMD_CLONE_ARG_TYPE_MASK))
            {
-           default:
-             continue;
-           case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
-           case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
-             {
-               simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
-               simd_clone_info.safe_push (arginfo[i].op);
-               tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
-                          ? size_type_node : TREE_TYPE (arginfo[i].op);
-               tree ls = build_int_cst (lst, arginfo[i].linear_step);
-               simd_clone_info.safe_push (ls);
-               tree sll = arginfo[i].simd_lane_linear
-                          ? boolean_true_node : boolean_false_node;
-               simd_clone_info.safe_push (sll);
-             }
-             break;
-           case SIMD_CLONE_ARG_TYPE_MASK:
-             if (loop_vinfo
-                 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+             if (masked_call_offset)
+               /* When there is an explicit mask we require the
+                  number of elements to match up.  */
+               vect_record_loop_mask (loop_vinfo,
+                                      &LOOP_VINFO_MASKS (loop_vinfo),
+                                      ncopies_in, vectype, NULL_TREE);
+             else
                {
-                 if (masked_call_offset)
-                   /* When there is an explicit mask we require the
-                      number of elements to match up.  */
-                   vect_record_loop_mask (loop_vinfo,
-                                          &LOOP_VINFO_MASKS (loop_vinfo),
-                                          ncopies_in, vectype, NULL_TREE);
+                 /* When there is no explicit mask on the call we have
+                    more relaxed requirements.  */
+                 tree masktype;
+                 poly_uint64 callee_nelements;
+                 if (SCALAR_INT_MODE_P (bestn_inbranch->simdclone->mask_mode))
+                   {
+                     callee_nelements
+                         = exact_div (bestn_inbranch->simdclone->simdlen,
+                                      bestn_inbranch->simdclone->args[i].linear_step);
+                     masktype = get_related_vectype_for_scalar_type
+                         (vinfo->vector_mode, TREE_TYPE (vectype),
+                          callee_nelements);
+                   }
                  else
                    {
-                     /* When there is no explicit mask on the call we have
-                        more relaxed requirements.  */
-                     tree masktype;
-                     poly_uint64 callee_nelements;
-                     if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
-                       {
-                         callee_nelements
-                           = exact_div (bestn->simdclone->simdlen,
-                                        bestn->simdclone->args[i].linear_step);
-                         masktype = get_related_vectype_for_scalar_type
-                             (vinfo->vector_mode, TREE_TYPE (vectype),
-                              callee_nelements);
-                       }
-                     else
-                       {
-                         masktype = bestn->simdclone->args[i].vector_type;
-                         callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
-                       }
-                     auto o = vector_unroll_factor (nunits, callee_nelements);
-                     vect_record_loop_mask (loop_vinfo,
-                                            &LOOP_VINFO_MASKS (loop_vinfo),
-                                            ncopies  * o, masktype, NULL_TREE);
+                     masktype = bestn_inbranch->simdclone->args[i].vector_type;
+                     callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
                    }
+                 auto o = vector_unroll_factor (nunits, callee_nelements);
+                 vect_record_loop_mask (loop_vinfo,
+                                        &LOOP_VINFO_MASKS (loop_vinfo),
+                                        ncopies  * o, masktype, NULL_TREE);
                }
-             break;
            }
-       }
-
-      if (!bestn->simdclone->inbranch && loop_vinfo)
-       {
-         if (dump_enabled_p ()
-             && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-           dump_printf_loc (MSG_NOTE, vect_location,
-                            "can't use a fully-masked loop because a"
-                            " non-masked simd clone was selected.\n");
-         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+         else if ((bestn->simdclone->args[i].arg_type
+                   == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
+                  || (bestn->simdclone->args[i].arg_type
+                      == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)
+                  || (bestn_inbranch
+                      && ((bestn_inbranch->simdclone->args[i].arg_type
+                           == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
+                          || (bestn_inbranch->simdclone->args[i].arg_type
+                              == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))))
+           {
+             simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
+             simd_clone_info.safe_push (arginfo[i].op);
+             tree lst = (POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
+                         ? size_type_node : TREE_TYPE (arginfo[i].op));
+             tree ls = build_int_cst (lst, arginfo[i].linear_step);
+             simd_clone_info.safe_push (ls);
+             tree sll = (arginfo[i].simd_lane_linear
+                         ? boolean_true_node : boolean_false_node);
+             simd_clone_info.safe_push (sll);
+           }
        }
 
       SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type;
index 0356b129e36f825c6504fca99b0cf65b9c09e325..606133f9172e71394dead0b80c081ce4590ed186 100644 (file)
@@ -265,8 +265,12 @@ struct vect_simd_clone_data : vect_data {
   vect_simd_clone_data () = default;
   vect_simd_clone_data (vect_simd_clone_data &&other) = default;
 
+  /* Selected SIMD clone and clone for in-branch.  */
+  cgraph_node *clone;
+  cgraph_node *clone_inbranch;
+
   /* Selected SIMD clone's function info.  First vector element
-     is SIMD clone's function decl, followed by a pair of trees (base + step)
+     is NULL_TREE, followed by a pair of trees (base + step)
      for linear arguments (pair of NULLs for other arguments).  */
   auto_vec<tree> simd_clone_info;
 };