]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/116274 - overzealous SLP vectorization
authorRichard Biener <rguenther@suse.de>
Thu, 8 Aug 2024 09:36:43 +0000 (11:36 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 20 Aug 2024 11:01:40 +0000 (13:01 +0200)
The following tries to address that the vectorizer fails to have
precise knowledge of argument and return calling conventions and
views some accesses as loads and stores that are not.
This is mainly important when doing basic-block vectorization as
otherwise loop indexing would force such arguments to memory.

On x86 the reduction in the number of apparent loads and stores
often dominates cost analysis so the following tries to mitigate
this aggressively by adjusting only the scalar load and store
cost, reducing them to the cost of a simple scalar statement,
but not touching the vector access cost which would be much
harder to estimate.  Thereby we error on the side of not performing
basic-block vectorization.

PR tree-optimization/116274
* tree-vect-slp.cc (vect_bb_slp_scalar_cost): Cost scalar loads
and stores as simple scalar stmts when they access a non-global,
not address-taken variable that doesn't have BLKmode assigned.

* gcc.target/i386/pr116274-2.c: New testcase.

gcc/testsuite/gcc.target/i386/pr116274-2.c [new file with mode: 0644]
gcc/tree-vect-slp.cc

diff --git a/gcc/testsuite/gcc.target/i386/pr116274-2.c b/gcc/testsuite/gcc.target/i386/pr116274-2.c
new file mode 100644 (file)
index 0000000..d581134
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-slp2-optimized" } */
+
+struct a { long x,y; };
+long test(struct a a) { return a.x+a.y; }
+
+/* { dg-final { scan-tree-dump-not "basic block part vectorized" "slp2" } } */
+/* { dg-final { scan-assembler-times "addl|leaq" 1 } } */
+/* { dg-final { scan-assembler-not "padd" } } */
index 43ecd2689701451b706b41d73ba60773af4cf8a5..d6f34d0b73d06a7c76cd8b81493f45e624834bc7 100644 (file)
@@ -7618,7 +7618,17 @@ next_lane:
       vect_cost_for_stmt kind;
       if (STMT_VINFO_DATA_REF (orig_stmt_info))
        {
-         if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info)))
+         data_reference_p dr = STMT_VINFO_DATA_REF (orig_stmt_info);
+         tree base = get_base_address (DR_REF (dr));
+         /* When the scalar access is to a non-global not address-taken
+            decl that is not BLKmode assume we can access it with a single
+            non-load/store instruction.  */
+         if (DECL_P (base)
+             && !is_global_var (base)
+             && !TREE_ADDRESSABLE (base)
+             && DECL_MODE (base) != BLKmode)
+           kind = scalar_stmt;
+         else if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info)))
            kind = scalar_load;
          else
            kind = scalar_store;