]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
sra: Do not leave work for DSE (that it can sometimes not perform)
authorMartin Jambor <mjambor@suse.cz>
Thu, 9 May 2024 14:39:44 +0000 (16:39 +0200)
committerMartin Jambor <mjambor@suse.cz>
Thu, 9 May 2024 14:51:15 +0000 (16:51 +0200)
When looking again at the g++.dg/tree-ssa/pr109849.C testcase we
discovered that it generates terrible store-to-load forwarding stalls
because SRA was leaving behind aggregate loads but all the stores were
by scalar parts and DSE failed to remove the useless load.  SRA has
all the knowledge to remove the statement even now, so this small
patch makes it do so.

With this patch, the g++.dg/tree-ssa/pr109849.C micro-benchmark runs 9
times faster (on an AMD EPYC 75F3 machine).

gcc/ChangeLog:

2024-04-18  Martin Jambor  <mjambor@suse.cz>

* tree-sra.cc (sra_modify_assign): Remove the original statement
also when dealing with a store to a fully covered aggregate from a
non-candidate.

gcc/testsuite/ChangeLog:

2024-04-23  Martin Jambor  <mjambor@suse.cz>

* g++.dg/tree-ssa/pr109849.C: Also check that the aggeegate store
to cur disappears.
* gcc.dg/tree-ssa/ssa-dse-26.c: Instead of relying on DSE,
check that the unwanted stores were removed at early SRA time.

gcc/testsuite/g++.dg/tree-ssa/pr109849.C
gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c
gcc/tree-sra.cc

index cd348c0f5906ac4b20acaae20840f8ecaa330e8c..d06dbb10482947c448e5fab64bbe581c8b80ad8e 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-sra" } */
+/* { dg-options "-O2 -fdump-tree-sra -fdump-tree-optimized" } */
 
 #include <vector>
 typedef unsigned int uint32_t;
@@ -29,3 +29,4 @@ main()
 }
 
 /* { dg-final { scan-tree-dump "Created a replacement for stack offset" "sra"} } */
+/* { dg-final { scan-tree-dump-not "cur = MEM" "optimized"} } */
index 43152de561632b1d05a5f74612b90aa5665feb68..1d01392c5957b3125f17425992228da53884a1fe 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-dse1-details -fno-short-enums -fno-tree-fre" } */
+/* { dg-options "-O2 -fdump-tree-esra -fno-short-enums -fno-tree-fre" } */
 /* { dg-skip-if "we want a BIT_FIELD_REF from fold_truth_andor" { ! lp64 } } */
 /* { dg-skip-if "temporary variable names are not x and y" { mmix-knuth-mmixware } } */
 
@@ -31,5 +31,5 @@ constraint_equal (struct constraint a, struct constraint b)
     && constraint_expr_equal (a.rhs, b.rhs);
 }
 
-/* { dg-final { scan-tree-dump-times "Deleted dead store: x = " 2 "dse1" } } */
-/* { dg-final { scan-tree-dump-times "Deleted dead store: y = " 2 "dse1" } } */
+/* { dg-final { scan-tree-dump-not "x = " "esra" } } */
+/* { dg-final { scan-tree-dump-not "y = " "esra" } } */
index 32fa28911f2d5796411adc3ba63fc09f08d17ad3..8040b0c564517c2a4e97e88aadbdb227d2719685 100644 (file)
@@ -4854,8 +4854,18 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator *gsi)
             But use the RHS aggregate to load from to expose more
             optimization opportunities.  */
          if (access_has_children_p (lacc))
-           generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
-                                    0, 0, gsi, true, true, loc);
+           {
+             generate_subtree_copies (lacc->first_child, rhs, lacc->offset,
+                                      0, 0, gsi, true, true, loc);
+             if (lacc->grp_covered)
+               {
+                 unlink_stmt_vdef (stmt);
+                 gsi_remove (& orig_gsi, true);
+                 release_defs (stmt);
+                 sra_stats.deleted++;
+                 return SRA_AM_REMOVED;
+               }
+           }
        }
 
       return SRA_AM_NONE;