tree-optimization/123190 - fix costing of permuted contiguous loads

author Richard Biener <rguenther@suse.de>

Wed, 14 Jan 2026 11:45:19 +0000 (12:45 +0100)

committer Richard Biener <rguenth@gcc.gnu.org>

Wed, 14 Jan 2026 13:44:00 +0000 (14:44 +0100)
author Richard Biener <rguenther@suse.de>
Wed, 14 Jan 2026 11:45:19 +0000 (12:45 +0100)
committer Richard Biener <rguenth@gcc.gnu.org>
Wed, 14 Jan 2026 13:44:00 +0000 (14:44 +0100)
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr123190-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr123190-1.c

index 4265ac80a43df9188ef0ec4e4dfc1a1eb782ebac..098468627f05696516016a007790de3a14c95b3b 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr123190-1.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr123190-1.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-additional-options "-O3 -mavx2 -mno-avx512f -mtune=generic" } */
+/* { dg-additional-options "-O3 -mavx2 -mno-avx512f" } */
  
  typedef struct {
     double real;
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr123190-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr123190-2.c

new file mode 100644 (file)

index 0000000..abc63b2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr123190-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -mavx2 -mno-avx512f" } */
+
+#include "costmodel-pr123190-1.c"
+
+/* { dg-final { scan-tree-dump "optimized: loop vectorized using 32" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: epilogue loop vectorized using 16 byte vectors and unroll factor 1" "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc

index a563238c4be036da2d5dd9f6b582216bb98e9679..83983742467c2bbbff9095e4c46859e5ac9ba14d 100644 (file)
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2087,6 +2087,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
    tree *ls_type = &ls->ls_type;
    bool *slp_perm = &ls->slp_perm;
    unsigned *n_perms = &ls->n_perms;
+  unsigned *n_loads = &ls->n_loads;
    tree *supported_offset_vectype = &ls->supported_offset_vectype;
    int *supported_scale = &ls->supported_scale;
    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
@@ -2103,6 +2104,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
    *ls_type = NULL_TREE;
    *slp_perm = false;
    *n_perms = -1U;
+  *n_loads = -1U;
    ls->subchain_p = false;
  
    bool perm_ok = true;
@@ -2110,7 +2112,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
  
    if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
      perm_ok = vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
-                                           vf, true, n_perms);
+                                           vf, true, n_perms, n_loads);
  
    if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
      {
@@ -11880,18 +11882,20 @@ vectorizable_load (vec_info *vinfo,
          in PR101120 and friends.  */
        if (costing_p)
         {
-         gcc_assert (ls.n_perms != -1U);
+         gcc_assert (ls.n_perms != -1U && ls.n_loads != -1U);
           if (ls.n_perms != 0)
             inside_cost = record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
                                             slp_node, 0, vect_body);
+         if (n_adjacent_loads > 0)
+           n_adjacent_loads = ls.n_loads;
         }
        else
         {
-         unsigned n_perms2;
+         unsigned n_perms2, n_loads2;
           bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
                                                   gsi, vf, false, &n_perms2,
-                                                 nullptr, true);
-         gcc_assert (ok && ls.n_perms == n_perms2);
+                                                 &n_loads2, true);
+         gcc_assert (ok && ls.n_perms == n_perms2 && ls.n_loads == n_loads2);
         }
      }
  
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h

index 7a38d4969cf2a62b8dfb1cf45c84f79bfb42bed1..2cbf752e4e769dbecb286c2047923171cc244dac 100644 (file)
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -307,6 +307,7 @@ struct vect_load_store_data : vect_data {
    /* True if the load requires a load permutation.  */
    bool slp_perm;    // SLP_TREE_LOAD_PERMUTATION
    unsigned n_perms; // SLP_TREE_LOAD_PERMUTATION
+  unsigned n_loads; // SLP_TREE_LOAD_PERMUTATION
    /* Whether the load permutation is consecutive and simple.  */
    bool subchain_p; // VMAT_STRIDED_SLP and VMAT_GATHER_SCATTER
  };
author	Richard Biener <rguenther@suse.de>
	Wed, 14 Jan 2026 11:45:19 +0000 (12:45 +0100)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Wed, 14 Jan 2026 13:44:00 +0000 (14:44 +0100)
gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr123190-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr123190-2.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-stmts.cc		patch \| blob \| blame \| history
gcc/tree-vectorizer.h		patch \| blob \| blame \| history