]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/116609 - SLP live lane vectorization with partial vectors
authorRichard Biener <rguenther@suse.de>
Thu, 5 Sep 2024 08:46:58 +0000 (10:46 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Fri, 6 Sep 2024 07:16:03 +0000 (09:16 +0200)
The following implements the simple case of single-lane SLP when
using partial vectors which can use the VEC_EXTRACT_LAST code
generation without changes.  I'll keep the PR open for further
enhancements.

This avoids FAILs of gcc.target/aarch64/sve/live_1.c when using
single-lane SLP for non-grouped stores.

PR tree-optimization/116609
* tree-vect-loop.cc (vectorizable_live_operation_1): Support
partial vectors for single-lane SLP.

gcc/tree-vect-loop.cc

index 242d5e2d9169387e44b426fa22186a3f1850d594..31cdc4bf53dc7d4c848fbd97a76231110aee1555 100644 (file)
@@ -10961,7 +10961,8 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
 
         where VEC_LHS is the vectorized live-out result and MASK is
         the loop mask for the final iteration.  */
-      gcc_assert (ncopies == 1 && !slp_node);
+      gcc_assert (ncopies == 1
+                 && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
       gimple_seq tem = NULL;
       gimple_stmt_iterator gsi = gsi_last (tem);
       tree len = vect_get_loop_len (loop_vinfo, &gsi,
@@ -10995,7 +10996,7 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
 
         where VEC_LHS is the vectorized live-out result and MASK is
         the loop mask for the final iteration.  */
-      gcc_assert (!slp_node);
+      gcc_assert (!slp_node || SLP_TREE_LANES (slp_node) == 1);
       tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
       gimple_seq tem = NULL;
       gimple_stmt_iterator gsi = gsi_last (tem);
@@ -11147,7 +11148,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
       /* No transformation required.  */
       if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
        {
-         if (slp_node)
+         if (slp_node && SLP_TREE_LANES (slp_node) != 1)
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11156,7 +11157,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
                                 "the loop.\n");
              LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
-         else if (ncopies > 1)
+         else if (ncopies > 1
+                  || (slp_node && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1))
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11166,7 +11168,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
            }
          else
            {
-             gcc_assert (ncopies == 1 && !slp_node);
+             gcc_assert (ncopies == 1
+                         && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
              if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
                                                  OPTIMIZE_FOR_SPEED))
                vect_record_loop_mask (loop_vinfo,
@@ -11213,8 +11216,9 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
   if (slp_node)
     {
       gcc_assert (!loop_vinfo
-                 || (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
-                     && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)));
+                 || ((!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+                      && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+                     || SLP_TREE_LANES (slp_node) == 1));
 
       /* Get the correct slp vectorized stmt.  */
       vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];