]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/118558 - fix alignment compute with VMAT_CONTIGUOUS_REVERSE
authorRichard Biener <rguenther@suse.de>
Tue, 21 Jan 2025 13:58:43 +0000 (14:58 +0100)
committerRichard Biener <rguenth@gcc.gnu.org>
Thu, 23 Jan 2025 07:43:39 +0000 (08:43 +0100)
There are calls to dr_misalignment left that do not correct for the
offset (which is vector type dependent) when the stride is negative.
Notably vect_known_alignment_in_bytes doesn't allow to pass through
such offset which the following adds (computing the offset in
vect_known_alignment_in_bytes would be possible as well, but the
offset can be shared as seen).  Eventually this function could go away.

This leads to peeling for gaps not considerd, nor shortening of the
access applied which is what fixes the testcase on x86_64.

PR tree-optimization/118558
* tree-vectorizer.h (vect_known_alignment_in_bytes): Pass
through offset to dr_misalignment.
* tree-vect-stmts.cc (get_group_load_store_type): Compute
offset applied for negative stride and use it when querying
alignment of accesses.
(vectorizable_load): Likewise.

* gcc.dg/vect/pr118558.c: New testcase.

gcc/testsuite/gcc.dg/vect/pr118558.c [new file with mode: 0644]
gcc/tree-vect-stmts.cc
gcc/tree-vectorizer.h

diff --git a/gcc/testsuite/gcc.dg/vect/pr118558.c b/gcc/testsuite/gcc.dg/vect/pr118558.c
new file mode 100644 (file)
index 0000000..5483328
--- /dev/null
@@ -0,0 +1,15 @@
+#include "tree-vect.h"
+
+static unsigned long g_270[5][2] = {{123}};
+static short g_2312 = 0;
+int main()
+{
+  check_vect ();
+  int g_1168 = 0;
+  unsigned t = 4;
+  for (g_1168 = 3; g_1168 >= 0; g_1168 -= 1)
+    for (g_2312 = 0; g_2312 <= 1; g_2312 += 1)
+      t = g_270[g_1168][0];
+  if (t != 123) __builtin_abort();
+}
+
index 21fb5cf5bd47ad9e37762909c6103adbf8752e2a..c0550acf6b2b231d1800a331a352668a3daf3d10 100644 (file)
@@ -2198,14 +2198,20 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
                               " non-consecutive accesses\n");
              return false;
            }
+
+         unsigned HOST_WIDE_INT dr_size
+           = vect_get_scalar_dr_size (first_dr_info);
+         poly_int64 off = 0;
+         if (*memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+           off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
+
          /* An overrun is fine if the trailing elements are smaller
             than the alignment boundary B.  Every vector access will
             be a multiple of B and so we are guaranteed to access a
             non-gap element in the same B-sized block.  */
          if (overrun_p
              && gap < (vect_known_alignment_in_bytes (first_dr_info,
-                                                      vectype)
-                       / vect_get_scalar_dr_size (first_dr_info)))
+                                                      vectype, off) / dr_size))
            overrun_p = false;
 
          /* When we have a contiguous access across loop iterations
@@ -2230,7 +2236,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
             by simply loading half of the vector only.  Usually
             the construction with an upper zero half will be elided.  */
          dr_alignment_support alss;
-         int misalign = dr_misalignment (first_dr_info, vectype);
+         int misalign = dr_misalignment (first_dr_info, vectype, off);
          tree half_vtype;
          poly_uint64 remain;
          unsigned HOST_WIDE_INT tem, num;
@@ -11991,8 +11997,14 @@ vectorizable_load (vec_info *vinfo,
                    tree ltype = vectype;
                    tree new_vtype = NULL_TREE;
                    unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
+                   unsigned HOST_WIDE_INT dr_size
+                     = vect_get_scalar_dr_size (first_dr_info);
+                   poly_int64 off = 0;
+                   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                     off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
                    unsigned int vect_align
-                     = vect_known_alignment_in_bytes (first_dr_info, vectype);
+                     = vect_known_alignment_in_bytes (first_dr_info, vectype,
+                                                      off);
                    /* Try to use a single smaller load when we are about
                       to load excess elements compared to the unrolled
                       scalar loop.  */
@@ -12013,9 +12025,7 @@ vectorizable_load (vec_info *vinfo,
                             scalar loop.  */
                          ;
                        else if (known_gt (vect_align,
-                                          ((nunits - remain)
-                                           * vect_get_scalar_dr_size
-                                               (first_dr_info))))
+                                          ((nunits - remain) * dr_size)))
                          /* Aligned access to the gap area when there's
                             at least one element in it is OK.  */
                          ;
index 79db02a39a8f7d100d4dce18ff9a7a60c87db20b..44d3a1d46c409597f1e67a275211a1da414fc7c7 100644 (file)
@@ -2028,9 +2028,10 @@ known_alignment_for_access_p (dr_vec_info *dr_info, tree vectype)
    of DR_INFO is guaranteed to have.  */
 
 inline unsigned int
-vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype)
+vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype,
+                              poly_int64 offset = 0)
 {
-  int misalignment = dr_misalignment (dr_info, vectype);
+  int misalignment = dr_misalignment (dr_info, vectype, offset);
   if (misalignment == DR_MISALIGNMENT_UNKNOWN)
     return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
   else if (misalignment == 0)