]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/119589 - alignment analysis for VF > 1 and VMAT_STRIDED_SLP
authorRichard Biener <rguenther@suse.de>
Tue, 6 May 2025 11:29:42 +0000 (13:29 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Thu, 8 May 2025 06:36:53 +0000 (08:36 +0200)
The following fixes the alignment analysis done by the VMAT_STRIDED_SLP
code which for the case of VF > 1 currently relies on dataref analysis
which assumes consecutive accesses.  But the code generation advances
by DR_STEP between each iteration which requires us to assess that
individual DR_STEP preserve the alignment rather than only VF * DR_STEP.
This allows us to use vector aligned accesses in some cases.

PR tree-optimization/119589
PR tree-optimization/119586
PR tree-optimization/119155
* tree-vect-stmts.cc (vectorizable_store): Verify
DR_STEP_ALIGNMENT preserves DR_TARGET_ALIGNMENT when
VF > 1 and VMAT_STRIDED_SLP.  Use vector aligned accesses when
we can.
(vectorizable_load): Likewise.

gcc/tree-vect-stmts.cc

index af7114d419238c10767870e2113a6f1911bed323..a8762baa076c4e78422937b882461519a951c33e 100644 (file)
@@ -8791,6 +8791,15 @@ vectorizable_store (vec_info *vinfo,
          if (n == const_nunits)
            {
              int mis_align = dr_misalignment (first_dr_info, vectype);
+             /* With VF > 1 we advance the DR by step, if that is constant
+                and only aligned when performed VF times, DR alignment
+                analysis can analyze this as aligned since it assumes
+                contiguous accesses.  But that is not how we code generate
+                here, so adjust for this.  */
+             if (maybe_gt (vf, 1u)
+                 && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                 DR_TARGET_ALIGNMENT (first_dr_info)))
+               mis_align = -1;
              dr_alignment_support dr_align
                = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
                                                 mis_align);
@@ -8812,6 +8821,10 @@ vectorizable_store (vec_info *vinfo,
              ltype = build_vector_type (elem_type, n);
              lvectype = vectype;
              int mis_align = dr_misalignment (first_dr_info, ltype);
+             if (maybe_gt (vf, 1u)
+                 && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                 DR_TARGET_ALIGNMENT (first_dr_info)))
+               mis_align = -1;
              dr_alignment_support dr_align
                = vect_supportable_dr_alignment (vinfo, dr_info, ltype,
                                                 mis_align);
@@ -8872,17 +8885,10 @@ vectorizable_store (vec_info *vinfo,
                }
            }
          unsigned align;
-         /* ???  We'd want to use
-              if (alignment_support_scheme == dr_aligned)
-                align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
-            since doing that is what we assume we can in the above checks.
-            But this interferes with groups with gaps where for example
-            VF == 2 makes the group in the unrolled loop aligned but the
-            fact that we advance with step between the two subgroups
-            makes the access to the second unaligned.  See PR119586.
-            We have to anticipate that here or adjust code generation to
-            avoid the misaligned loads by means of permutations.  */
-         align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
          /* Alignment is at most the access size if we do multiple stores.  */
          if (nstores > 1)
            align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
@@ -10810,6 +10816,15 @@ vectorizable_load (vec_info *vinfo,
          if (n == const_nunits)
            {
              int mis_align = dr_misalignment (first_dr_info, vectype);
+             /* With VF > 1 we advance the DR by step, if that is constant
+                and only aligned when performed VF times, DR alignment
+                analysis can analyze this as aligned since it assumes
+                contiguous accesses.  But that is not how we code generate
+                here, so adjust for this.  */
+             if (maybe_gt (vf, 1u)
+                 && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                 DR_TARGET_ALIGNMENT (first_dr_info)))
+               mis_align = -1;
              dr_alignment_support dr_align
                = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
                                                 mis_align);
@@ -10838,6 +10853,10 @@ vectorizable_load (vec_info *vinfo,
                  if (VECTOR_TYPE_P (ptype))
                    {
                      mis_align = dr_misalignment (first_dr_info, ptype);
+                     if (maybe_gt (vf, 1u)
+                         && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                         DR_TARGET_ALIGNMENT (first_dr_info)))
+                       mis_align = -1;
                      dr_align
                        = vect_supportable_dr_alignment (vinfo, dr_info, ptype,
                                                         mis_align);
@@ -10857,8 +10876,10 @@ vectorizable_load (vec_info *vinfo,
                }
            }
          unsigned align;
-         /* ???  The above is still wrong, see vectorizable_store.  */
-         align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
          /* Alignment is at most the access size if we do multiple loads.  */
          if (nloads > 1)
            align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);