]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Fix fallout of peeling for gap improvements
authorRichard Biener <rguenther@suse.de>
Fri, 14 Jun 2024 05:54:15 +0000 (07:54 +0200)
committerRichard Biener <rguenther@suse.de>
Fri, 14 Jun 2024 07:04:18 +0000 (09:04 +0200)
The following hopefully addresses an observed bootstrap issue on aarch64
where maybe-uninit diagnostics occur.  It also fixes bogus napkin math
from myself when I was confusing rounded up size of a single access
with rounded up size of the group accessed in a single scalar iteration.
So the following puts in a correctness check, leaving a set of peeling
for gaps as insufficient.  This could be rectified by splitting the
last load into multiple ones but I'm leaving this for a followup, better
quickly fix the reported wrong-code.

* tree-vect-stmts.cc (get_group_load_store_type): Do not
re-use poly-int remain but re-compute with non-poly values.
Verify the shortened load is good enough to be covered with
a single scalar gap iteration before accepting it.

* gcc.dg/vect/pr115385.c: Enable AVX2 if available.

gcc/testsuite/gcc.dg/vect/pr115385.c
gcc/tree-vect-stmts.cc

index a18cd665d7d08e71b0fd150c27e849f86c0b8219..baea0b2473fe60c3a5228f43e79ed3371fbe3be5 100644 (file)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target mmap } */
+/* { dg-additional-options "-mavx2" { target avx2_runtime } } */
 
 #include <sys/mman.h>
 #include <stdio.h>
index e32d44050e535daf06e463c06110bb787da7c680..ca6052662a3054a0ec4fa04c91f804ffdd3039b5 100644 (file)
@@ -2148,15 +2148,17 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
            {
              /* But peeling a single scalar iteration is enough if
                 we can use the next power-of-two sized partial
-                access.  */
+                access and that is sufficiently small to be covered
+                by the single scalar iteration.  */
              unsigned HOST_WIDE_INT cnunits, cvf, cremain, cpart_size;
              if (!nunits.is_constant (&cnunits)
                  || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
-                 || ((cremain = remain.to_constant (), true)
+                 || (((cremain = group_size * cvf - gap % cnunits), true)
                      && ((cpart_size = (1 << ceil_log2 (cremain))) != cnunits)
-                     && vector_vector_composition_type
-                          (vectype, cnunits / cpart_size,
-                           &half_vtype) == NULL_TREE))
+                     && (cremain + group_size < cpart_size
+                         || vector_vector_composition_type
+                              (vectype, cnunits / cpart_size,
+                               &half_vtype) == NULL_TREE)))
                {
                  if (dump_enabled_p ())
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,