]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/116573 - .SELECT_VL for SLP
authorRichard Biener <rguenther@suse.de>
Tue, 17 Sep 2024 09:20:10 +0000 (11:20 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Thu, 19 Sep 2024 11:28:16 +0000 (13:28 +0200)
The following restores the use of .SELECT_VL for testcases where it
is safe to use even when using SLP.  I've for now restricted it
to single-lane SLP plus optimistically allow store-lane nodes
and assume single-lane roots are not widened but at most to
load-lane who should be fine.

PR tree-optimization/116573
* tree-vect-loop.cc (vect_analyze_loop_2): Allow .SELECV_VL
for SLP but disable it when there's multi-lane instances.
* tree-vect-stmts.cc (vectorizable_store): Only compute the
ptr increment when generating code.
(vectorizable_load): Likewise.

gcc/tree-vect-loop.cc
gcc/tree-vect-stmts.cc

index d42694d19747d33f301f7ac016cbd8ac8fddcb73..c6778ab5f154c62bdabc358a4f96a26ddbc0af10 100644 (file)
@@ -3084,10 +3084,23 @@ start_over:
       if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type,
                                          OPTIMIZE_FOR_SPEED)
          && LOOP_VINFO_LENS (loop_vinfo).length () == 1
-         && LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1 && !slp
+         && LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1
          && (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
              || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()))
        LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true;
+
+      /* If any of the SLP instances cover more than a single lane
+        we cannot use .SELECT_VL at the moment, even if the number
+        of lanes is uniform throughout the SLP graph.  */
+      if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+       for (slp_instance inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
+         if (SLP_TREE_LANES (SLP_INSTANCE_TREE (inst)) != 1
+             && !(SLP_INSTANCE_KIND (inst) == slp_inst_kind_store
+                  && SLP_INSTANCE_TREE (inst)->ldst_lanes))
+           {
+             LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = false;
+             break;
+           }
     }
 
   /* Decide whether this loop_vinfo should use partial vectors or peeling,
index 495f45e40e631575484ae6c160dee629651a5ca7..33cdccae784912535a27cd56b28db7b6d64df9d0 100644 (file)
@@ -8744,8 +8744,9 @@ vectorizable_store (vec_info *vinfo,
        aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
       else
        aggr_type = vectype;
-      bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
-                                         memory_access_type, loop_lens);
+      if (!costing_p)
+       bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+                                           memory_access_type, loop_lens);
     }
 
   if (mask && !costing_p)
@@ -10820,8 +10821,9 @@ vectorizable_load (vec_info *vinfo,
        aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
       else
        aggr_type = vectype;
-      bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
-                                         memory_access_type, loop_lens);
+      if (!costing_p)
+       bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+                                           memory_access_type, loop_lens);
     }
 
   auto_vec<tree> vec_offsets;