]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/117874 - optimize SLP discovery budget use
authorRichard Biener <rguenther@suse.de>
Tue, 3 Dec 2024 07:56:35 +0000 (08:56 +0100)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 3 Dec 2024 11:29:14 +0000 (12:29 +0100)
The following tries to avoid eating into the SLP discovery limit
when we can do cheaper checks first.  Together with the previous
patch this allows to use two-lane SLP discovery for mult_su3_an
in 433.milc.

PR tree-optimization/117874
* tree-vect-slp.cc (vect_build_slp_tree_2): Perform early
reassoc checks before eating into discovery limit.

gcc/tree-vect-slp.cc

index 1799d5a619b1d20e0393021b22f5a8b9cd51841e..425135a9ee0afd409c6c371fe4faab9c9723beba 100644 (file)
@@ -2292,6 +2292,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                }
            }
          /* 2. try to build children nodes, associating as necessary.  */
+         /* 2a. prepare and perform early checks to avoid eating into
+            discovery limit unnecessarily.  */
+         vect_def_type *dts = XALLOCAVEC (vect_def_type, chain_len);
          for (unsigned n = 0; n < chain_len; ++n)
            {
              vect_def_type dt = chains[0][n].dt;
@@ -2319,6 +2322,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                    matches[0] = false;
                  goto out;
                }
+             dts[n] = dt;
              if (dt == vect_constant_def
                  || dt == vect_external_def)
                {
@@ -2333,16 +2337,6 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                      matches[0] = false;
                      goto out;
                    }
-                 vec<tree> ops;
-                 ops.create (group_size);
-                 for (lane = 0; lane < group_size; ++lane)
-                   if (stmts[lane])
-                     ops.quick_push (chains[lane][n].op);
-                   else
-                     ops.quick_push (NULL_TREE);
-                 slp_tree child = vect_create_new_slp_node (ops);
-                 SLP_TREE_DEF_TYPE (child) = dt;
-                 children.safe_push (child);
                }
              else if (dt != vect_internal_def)
                {
@@ -2354,6 +2348,26 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                  hard_fail = false;
                  goto out;
                }
+           }
+         /* 2b. do the actual build.  */
+         for (unsigned n = 0; n < chain_len; ++n)
+           {
+             vect_def_type dt = dts[n];
+             unsigned lane;
+             if (dt == vect_constant_def
+                 || dt == vect_external_def)
+               {
+                 vec<tree> ops;
+                 ops.create (group_size);
+                 for (lane = 0; lane < group_size; ++lane)
+                   if (stmts[lane])
+                     ops.quick_push (chains[lane][n].op);
+                   else
+                     ops.quick_push (NULL_TREE);
+                 slp_tree child = vect_create_new_slp_node (ops);
+                 SLP_TREE_DEF_TYPE (child) = dt;
+                 children.safe_push (child);
+               }
              else
                {
                  vec<stmt_vec_info> op_stmts;
@@ -2396,6 +2410,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                                term = true;
                                break;
                              }
+                           if (dump_enabled_p ())
+                             dump_printf_loc (MSG_NOTE, vect_location,
+                                              "swapping operand %d and %d "
+                                              "of lane %d\n",
+                                              n, n + perms[lane] + 1, lane);
                            std::swap (chains[lane][n],
                                       chains[lane][n + perms[lane] + 1]);
                            perms[lane]++;