]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/117558 - peeling for gaps and VL vectors
authorRichard Biener <rguenther@suse.de>
Fri, 15 Nov 2024 07:42:04 +0000 (08:42 +0100)
committerRichard Biener <rguenth@gcc.gnu.org>
Sat, 16 Nov 2024 13:13:28 +0000 (14:13 +0100)
The following ensures that peeling a single iteration for gaps is
sufficient by enforcing niter masking (partial vector use) given
we cannot (always) statically decide when the vector size isn't known.
The condition guarding this and thus statically giving a pass in
some cases for VL vectors is questionable, the patch doesn't address
this.

This fixes a set of known failout from enabling
--param vect-force-slp=1 by default.

PR tree-optimization/117558
* tree-vectorizer.h (_loop_vec_info::must_use_partial_vectors_p): New.
(LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P): Likewise.
* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize
must_use_partial_vectors_p.
(vect_determine_partial_vectors_and_peeling): Enforce it.
(vect_analyze_loop_2): Reset before restarting.
* tree-vect-stmts.cc (get_group_load_store_type): When peeling
a single gap iteration cannot be determined safe statically
enforce the use of partial vectors.

gcc/tree-vect-loop.cc
gcc/tree-vect-stmts.cc
gcc/tree-vectorizer.h

index c67248e851ded9d09c6d5d1e314985f14af74e60..18c4fa1d000a9f06090c19e8ed36ad0ecbe5b1ac 100644 (file)
@@ -1059,6 +1059,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
     inner_loop_cost_factor (param_vect_inner_loop_cost_factor),
     vectorizable (false),
     can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
+    must_use_partial_vectors_p (false),
     using_partial_vectors_p (false),
     using_decrementing_iv_p (false),
     using_select_vl_p (false),
@@ -2679,7 +2680,10 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo)
   LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
   LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
   if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
-      && need_peeling_or_partial_vectors_p)
+      && LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo))
+    LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
+  else if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+          && need_peeling_or_partial_vectors_p)
     {
       /* For partial-vector-usage=1, try to push the handling of partial
         vectors to the epilogue, with the main loop continuing to operate
@@ -2702,6 +2706,12 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo)
        LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
     }
 
+  if (LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo)
+      && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+    return opt_result::failure_at (vect_location,
+                                  "not vectorized: loop needs but cannot "
+                                  "use partial vectors\n");
+
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                     "operating on %s vectors%s.\n",
@@ -3387,6 +3397,7 @@ again:
   LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
     = saved_can_use_partial_vectors_p;
+  LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
   LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
   if (loop_vinfo->scan_map)
     loop_vinfo->scan_map->empty ();
index 458056dd13dc70bfc0408d4012f883e427459ab2..f4a4d5a554c224c129526ee8a360ee5337b84a60 100644 (file)
@@ -2202,11 +2202,25 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
                               (vectype, cnunits / cpart_size,
                                &half_vtype) == NULL_TREE)))
                {
-                 if (dump_enabled_p ())
-                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                    "peeling for gaps insufficient for "
-                                    "access\n");
-                 return false;
+                 /* If all fails we can still resort to niter masking, so
+                    enforce the use of partial vectors.  */
+                 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+                   {
+                     if (dump_enabled_p ())
+                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                        "peeling for gaps insufficient for "
+                                        "access unless using partial "
+                                        "vectors\n");
+                     LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
+                   }
+                 else
+                   {
+                     if (dump_enabled_p ())
+                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                        "peeling for gaps insufficient for "
+                                        "access\n");
+                     return false;
+                   }
                }
            }
        }
index 273e8c644e74ae78f7380b7a11f456424dc54d36..ebd1d8920f5c156e81d9aac2356eef04f9ab746e 100644 (file)
@@ -913,6 +913,9 @@ public:
      fewer than VF scalars.  */
   bool can_use_partial_vectors_p;
 
+  /* Records whether we must use niter masking for correctness reasons.  */
+  bool must_use_partial_vectors_p;
+
   /* True if we've decided to use partially-populated vectors, so that
      the vector loop can handle fewer than VF scalars.  */
   bool using_partial_vectors_p;
@@ -1051,6 +1054,7 @@ public:
 #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
 #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
 #define LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P(L) (L)->can_use_partial_vectors_p
+#define LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P(L) (L)->must_use_partial_vectors_p
 #define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p
 #define LOOP_VINFO_USING_DECREMENTING_IV_P(L) (L)->using_decrementing_iv_p
 #define LOOP_VINFO_USING_SELECT_VL_P(L) (L)->using_select_vl_p