]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Fix epilogue loop profile
authorJan Hubicka <jh@suse.cz>
Fri, 7 Jul 2023 16:22:11 +0000 (18:22 +0200)
committerJan Hubicka <jh@suse.cz>
Fri, 7 Jul 2023 16:22:11 +0000 (18:22 +0200)
Fix two bugs in scale_loop_profile which crept in during my
cleanups and curiously enoug did not show on the testcases we have so far.
The patch also adds the missing call to cap iteration count of the vectorized
loop epilogues.

Vectorizer profile needs more work, but I am trying to chase out obvious bugs first
so the profile quality statistics become meaningful and we can try to improve on them.

Now we get:

Pass dump id and name            |static mismatcdynamic mismatch
                                 |in count     |in count
107t cunrolli                    |      3    +3|        17251       +17251
116t vrp                         |      5    +2|        30908       +16532
118t dce                         |      3    -2|        17251       -13657
127t ch                          |     13   +10|        17251
131t dom                         |     39   +26|        17251
133t isolate-paths               |     47    +8|        17251
134t reassoc                     |     49    +2|        17251
136t forwprop                    |     53    +4|       202501      +185250
159t cddce                       |     61    +8|       216211       +13710
161t ldist                       |     62    +1|       216211
172t ifcvt                       |     66    +4|       373711      +157500
173t vect                        |    143   +77|      9801947     +9428236
176t cunroll                     |    149    +6|     12006408     +2204461
183t loopdone                    |    146    -3|     11944469       -61939
195t fre                         |    142    -4|     11944469
197t dom                         |    141    -1|     13038435     +1093966
199t threadfull                  |    143    +2|     13246410      +207975
200t vrp                         |    145    +2|     13444579      +198169
204t dce                         |    143    -2|     13371315       -73264
206t sink                        |    141    -2|     13371315
211t cddce                       |    147    +6|     13372755        +1440
255t optimized                   |    145    -2|     13372755
256r expand                      |    141    -4|     13371197        -1558
258r into_cfglayout              |    139    -2|     13371197
275r loop2_unroll                |    143    +4|     16792056     +3420859
291r ce2                         |    141    -2|     16811462
312r pro_and_epilogue            |    161   +20|     16873400       +61938
315r jump2                       |    167    +6|     20910158     +4036758
323r bbro                        |    160    -7|     16559844     -4350314

Vect still introduces 77 profile mismatches (same as without this patch)
however subsequent cunroll works much better with 6 new mismatches compared to
78.  Overall it reduces 229 mismatches to 160.

Also overall runtime estimate is now reduced by 6.9%.
Previously the overall runtime estimate grew by 11% which was result of the fat
that the epilogue profile was pretty much the same as profile of the original
loop.

Bootstrapped/regtested x86_64-linux, comitted.

gcc/ChangeLog:

* cfgloopmanip.cc (scale_loop_profile): Fix computation of count_in and scaling blocks
after exit.
* tree-vect-loop-manip.cc (vect_do_peeling): Scale loop profile of the epilogue if bound
is known.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/vect-profile-upate.c: New test.

gcc/cfgloopmanip.cc
gcc/testsuite/gcc.dg/tree-ssa/vect-profile-upate.c [new file with mode: 0644]
gcc/tree-vect-loop-manip.cc

index 524b979a546ff3147fbfb9676357afb8311b971b..f56a9b87d1c27bacbcede2c74b5f837991e342c5 100644 (file)
@@ -548,18 +548,23 @@ scale_loop_profile (class loop *loop, profile_probability p,
   profile_count count_in = profile_count::zero ();
   edge e;
   edge_iterator ei;
+  bool found_latch = false;
   FOR_EACH_EDGE (e, ei, loop->header->preds)
-    count_in += e->count ();
+    if (e->src != loop->latch)
+      count_in += e->count ();
+    else
+      found_latch = true;
+  gcc_checking_assert (found_latch);
 
   /* Now scale the loop body so header count is
      count_in * (iteration_bound + 1)  */
   profile_probability scale_prob
-    = (count_in *= iteration_bound).probability_in (loop->header->count);
+    = (count_in * (iteration_bound + 1)).probability_in (loop->header->count);
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
       fprintf (dump_file, ";; Scaling loop %i with scale ",
               loop->num);
-      p.dump (dump_file);
+      scale_prob.dump (dump_file);
       fprintf (dump_file, " to reach upper bound %i\n",
               (int)iteration_bound);
     }
@@ -593,7 +598,6 @@ scale_loop_profile (class loop *loop, profile_probability p,
       bool found = false;
       FOR_EACH_EDGE (e, ei, exit_edge->src->succs)
        if (!(e->flags & EDGE_FAKE)
-           && !(e->probability == profile_probability::never ())
            && !loop_exit_edge_p (loop, e))
          {
            if (found)
@@ -617,7 +621,8 @@ scale_loop_profile (class loop *loop, profile_probability p,
          for (unsigned int i = 0; i < loop->num_nodes; i++)
            if (body[i] != exit_edge->src
                && dominated_by_p (CDI_DOMINATORS, body[i], exit_edge->src))
-             body[i]->count.apply_scale (new_count, old_count);
+             body[i]->count = body[i]->count.apply_scale (new_count,
+                                                          old_count);
 
          free (body);
        }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vect-profile-upate.c b/gcc/testsuite/gcc.dg/tree-ssa/vect-profile-upate.c
new file mode 100644 (file)
index 0000000..72cc428
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks" } */
+int a[99];
+void test()
+{
+       for (int i = 0; i < 99; i++)
+               a[i]++;
+}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized"} } */
index 2361cb328ab17a2aa20f87d68e162d47ee45ba37..30baac6db446a6051afbd641ef5c057e6b395655 100644 (file)
@@ -3389,6 +3389,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
          gcc_assert (bound != 0);
          /* -1 to convert loop iterations to latch iterations.  */
          record_niter_bound (epilog, bound - 1, false, true);
+         scale_loop_profile (epilog, profile_probability::always (),
+                             bound - 1);
        }
 
       delete_update_ssa ();