]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
vect: Enhance cost evaluation in vect_transform_slp_perm_load_1
authorKewen Lin <linkw@linux.ibm.com>
Wed, 24 May 2023 05:05:01 +0000 (00:05 -0500)
committerKewen Lin <linkw@linux.ibm.com>
Wed, 24 May 2023 05:05:01 +0000 (00:05 -0500)
Following Richi's suggestion in [1], I'm working on deferring
cost evaluation next to the transformation, this patch is
to enhance function vect_transform_slp_perm_load_1 which
could under-cost for vector permutation, since the costing
doesn't try to consider nvectors_per_build, it's inconsistent
with the transformation part.

Basically it changes the below

  if (index == count)
    {
       if (!noop_p)
         {
           // A ...
           // ++*n_perms;

           if (!analyze_only)
             {
                // B1 ...
                // B2 ...
                for ...
                   // B3 building VEC_PERM_EXPR
             }
         }
       else if (!analyze_only)
         {
            // no B2 since no any further uses here.
            for ...
              // B4 building nothing
         }
        // B5 ...
    }

to:

  if (index == count)
    {
       if (!noop_p)
         {
           // A ...

           if (!analyze_only)
             // B1 ...

           // B2 ... (trivial computations during analyze_only or not)

           for ...
             {
                // now n_perms is consistent with building VEC_PERM_EXPR
                // ++*n_perms;
                if (analyze_only)
                   continue;
                // B3 building VEC_PERM_EXPR
             }
         }
       else if (!analyze_only)
         {
            // no B2 since no any further uses here.
            for ...
              // B4 building nothing
         }
        // B5 ...
    }

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563624.html

gcc/ChangeLog:

* tree-vect-slp.cc (vect_transform_slp_perm_load_1): Adjust the
calculation on n_perms by considering nvectors_per_build.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c: New test.

gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c [new file with mode: 0644]
gcc/tree-vect-slp.cc

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-slp-perm.c
new file mode 100644 (file)
index 0000000..e5c4dce
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* Specify power9 to ensure the vectorization is profitable
+   and test point stands, otherwise it could be not profitable
+   to vectorize.  */
+/* { dg-additional-options "-mdejagnu-cpu=power9 -mpower9-vector" } */
+
+/* Verify we cost the exact count for required vec_perm.  */
+
+int x[1024], y[1024];
+
+void
+foo ()
+{
+  for (int i = 0; i < 512; ++i)
+    {
+      x[2 * i] = y[1023 - (2 * i)];
+      x[2 * i + 1] = y[1023 - (2 * i + 1)];
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "2 times vec_perm" 1 "vect" } } */
index a6f277c5e210804f89548ff7979ad638204b2778..ab89a82f1b31b9671d0e0c34a9e25c15a5eb2f06 100644 (file)
@@ -8124,12 +8124,12 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
 
   mode = TYPE_MODE (vectype);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+  unsigned int nstmts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
 
   /* Initialize the vect stmts of NODE to properly insert the generated
      stmts later.  */
   if (! analyze_only)
-    for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
-        i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
+    for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); i < nstmts; i++)
       SLP_TREE_VEC_STMTS (node).quick_push (NULL);
 
   /* Generate permutation masks for every NODE. Number of masks for each NODE
@@ -8170,7 +8170,10 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
         (b) the permutes only need a single vector input.  */
       mask.new_vector (nunits, group_size, 3);
       nelts_to_build = mask.encoded_nelts ();
-      nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
+      /* It's possible to obtain zero nstmts during analyze_only, so make
+        it at least one to ensure the later computation for n_perms
+        proceed.  */
+      nvectors_per_build = nstmts > 0 ? nstmts : 1;
       in_nlanes = DR_GROUP_SIZE (stmt_info) * 3;
     }
   else
@@ -8261,40 +8264,39 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
                  return false;
                }
 
-             ++*n_perms;
-
+             tree mask_vec = NULL_TREE;
              if (!analyze_only)
-               {
-                 tree mask_vec = vect_gen_perm_mask_checked (vectype, indices);
+               mask_vec = vect_gen_perm_mask_checked (vectype, indices);
 
-                 if (second_vec_index == -1)
-                   second_vec_index = first_vec_index;
+             if (second_vec_index == -1)
+               second_vec_index = first_vec_index;
 
-                 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
+             for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
+               {
+                 ++*n_perms;
+                 if (analyze_only)
+                   continue;
+                 /* Generate the permute statement if necessary.  */
+                 tree first_vec = dr_chain[first_vec_index + ri];
+                 tree second_vec = dr_chain[second_vec_index + ri];
+                 gassign *stmt = as_a<gassign *> (stmt_info->stmt);
+                 tree perm_dest
+                   = vect_create_destination_var (gimple_assign_lhs (stmt),
+                                                  vectype);
+                 perm_dest = make_ssa_name (perm_dest);
+                 gimple *perm_stmt
+                   = gimple_build_assign (perm_dest, VEC_PERM_EXPR, first_vec,
+                                          second_vec, mask_vec);
+                 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
+                                              gsi);
+                 if (dce_chain)
                    {
-                     /* Generate the permute statement if necessary.  */
-                     tree first_vec = dr_chain[first_vec_index + ri];
-                     tree second_vec = dr_chain[second_vec_index + ri];
-                     gassign *stmt = as_a<gassign *> (stmt_info->stmt);
-                     tree perm_dest
-                       = vect_create_destination_var (gimple_assign_lhs (stmt),
-                                                      vectype);
-                     perm_dest = make_ssa_name (perm_dest);
-                     gimple *perm_stmt
-                       = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
-                                              first_vec, second_vec, mask_vec);
-                     vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
-                                                  gsi);
-                     if (dce_chain)
-                       {
-                         bitmap_set_bit (used_defs, first_vec_index + ri);
-                         bitmap_set_bit (used_defs, second_vec_index + ri);
-                       }
-
-                     /* Store the vector statement in NODE.  */
-                     SLP_TREE_VEC_STMTS (node) [vect_stmts_counter++]
-                       = perm_stmt;
+                     bitmap_set_bit (used_defs, first_vec_index + ri);
+                     bitmap_set_bit (used_defs, second_vec_index + ri);
                    }
+
+                 /* Store the vector statement in NODE.  */
+                 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
                }
            }
          else if (!analyze_only)