]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/119155 - wrong aligned access for vectorized packed access
authorRichard Biener <rguenther@suse.de>
Fri, 7 Mar 2025 11:57:42 +0000 (12:57 +0100)
committerRichard Biener <rguenth@gcc.gnu.org>
Fri, 14 Mar 2025 09:18:34 +0000 (10:18 +0100)
When doing strided SLP vectorization we use the wrong alignment for
the possibly piecewise access of the vector elements for loads and
stores.  While we are carefully using element aligned loads and
stores that isn't enough for the case the original scalar accesses
are packed.  The following instead honors larger alignment when
present but correctly falls back to the original scalar alignment
used.

PR tree-optimization/119155
* tree-vect-stmts.cc (vectorizable_store): Do not always
use vector element alignment for VMAT_STRIDED_SLP but
a more correct alignment towards both ends.
(vectorizable_load): Likewise.

* gcc.dg/vect/pr119155.c: New testcase.

gcc/testsuite/gcc.dg/vect/pr119155.c [new file with mode: 0644]
gcc/tree-vect-stmts.cc

diff --git a/gcc/testsuite/gcc.dg/vect/pr119155.c b/gcc/testsuite/gcc.dg/vect/pr119155.c
new file mode 100644 (file)
index 0000000..b860cf2
--- /dev/null
@@ -0,0 +1,26 @@
+#include <stdlib.h>
+#include "tree-vect.h"
+
+struct s { int x; } __attribute__((packed));
+
+void __attribute__((noipa))
+f (char *xc, char *yc, int z)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      struct s *x = (struct s *) xc;
+      struct s *y = (struct s *) yc;
+      x->x += y->x;
+      xc += z;
+      yc += z;
+    }
+}
+
+int main ()
+{
+  check_vect ();
+  char *x = malloc (100 * sizeof (struct s) + 1);
+  char *y = malloc (100 * sizeof (struct s) + 1);
+  f (x + 1, y + 1, sizeof (struct s));
+  return 0;
+}
index f894787f7bfb707d7344fe13c5d81d8811213c9b..17e3b1db894a0a8b6350d44f85a4a512498e2deb 100644 (file)
@@ -8904,7 +8904,15 @@ vectorizable_store (vec_info *vinfo,
                    }
                }
            }
-         ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
+         unsigned align;
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         /* Alignment is at most the access size if we do multiple stores.  */
+         if (nstores > 1)
+           align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
+         ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
          ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
        }
 
@@ -10851,7 +10859,7 @@ vectorizable_load (vec_info *vinfo,
                                                  &ptype);
              if (vtype != NULL_TREE)
                {
-                 dr_alignment_support dr_align = dr_aligned;
+                 dr_alignment_support dr_align;
                  int mis_align = 0;
                  if (VECTOR_TYPE_P (ptype))
                    {
@@ -10860,6 +10868,8 @@ vectorizable_load (vec_info *vinfo,
                        = vect_supportable_dr_alignment (vinfo, dr_info, ptype,
                                                         mis_align);
                    }
+                 else
+                   dr_align = dr_unaligned_supported;
                  if (dr_align == dr_aligned
                      || dr_align == dr_unaligned_supported)
                    {
@@ -10872,8 +10882,15 @@ vectorizable_load (vec_info *vinfo,
                    }
                }
            }
-         /* Else fall back to the default element-wise access.  */
-         ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
+         unsigned align;
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         /* Alignment is at most the access size if we do multiple loads.  */
+         if (nloads > 1)
+           align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
+         ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
        }
 
       if (slp)