From: Jan Hubicka Date: Thu, 27 Jul 2023 18:06:37 +0000 (+0200) Subject: Fix profile update after RTL unrolling X-Git-Tag: basepoints/gcc-15~7312 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a7d4310aed539b04345894ebafb49ca364780653;p=thirdparty%2Fgcc.git Fix profile update after RTL unrolling This patch fixes profile update after RTL unroll, that is now done same way as in tree one. We still produce (slightly) corrupted profile for multiple exit loops I can try to fix incrementally. I also updated testcases to look for profile mismatches so they do not creep back in again. gcc/ChangeLog: * cfgloop.h (single_dom_exit): Declare. * cfgloopmanip.h (update_exit_probability_after_unrolling): Declare. * cfgrtl.cc (struct cfg_hooks): Fix comment. * loop-unroll.cc (unroll_loop_constant_iterations): Update exit edge. * tree-ssa-loop-ivopts.h (single_dom_exit): Do not declare it here. * tree-ssa-loop-manip.cc (update_exit_probability_after_unrolling): Break out from ... (tree_transform_and_unroll_loop): ... here; gcc/testsuite/ChangeLog: * gcc.dg/tree-prof/peel-1.c: Test for profile mismatches. * gcc.dg/tree-prof/unroll-1.c: Test for profile mismatches. * gcc.dg/tree-ssa/peel1.c: Test for profile mismatches. * gcc.dg/unroll-1.c: Test for profile mismatches. * gcc.dg/unroll-3.c: Test for profile mismatches. * gcc.dg/unroll-4.c: Test for profile mismatches. * gcc.dg/unroll-5.c: Test for profile mismatches. * gcc.dg/unroll-6.c: Test for profile mismatches. --- diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 22293e1c2374..c4622d4b8538 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -921,6 +921,7 @@ extern bool get_estimated_loop_iterations (class loop *loop, widest_int *nit); extern bool get_max_loop_iterations (const class loop *loop, widest_int *nit); extern bool get_likely_max_loop_iterations (class loop *loop, widest_int *nit); extern int bb_loop_depth (const_basic_block); +extern edge single_dom_exit (class loop *); /* Converts VAL to widest_int. */ diff --git a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h index af6a29f70c42..dab7b31c1e77 100644 --- a/gcc/cfgloopmanip.h +++ b/gcc/cfgloopmanip.h @@ -68,5 +68,6 @@ class loop * loop_version (class loop *, void *, void adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise); void scale_dominated_blocks_in_loop (class loop *loop, basic_block bb, profile_count num, profile_count den); +void update_exit_probability_after_unrolling (class loop *loop, edge new_exit); #endif /* GCC_CFGLOOPMANIP_H */ diff --git a/gcc/cfgrtl.cc b/gcc/cfgrtl.cc index 36e43d0d7373..abcb472e2a2d 100644 --- a/gcc/cfgrtl.cc +++ b/gcc/cfgrtl.cc @@ -5409,7 +5409,7 @@ struct cfg_hooks cfg_layout_rtl_cfg_hooks = { rtl_flow_call_edges_add, NULL, /* execute_on_growing_pred */ NULL, /* execute_on_shrinking_pred */ - duplicate_loop_body_to_header_edge, /* duplicate loop for trees */ + duplicate_loop_body_to_header_edge, /* duplicate loop for rtl */ rtl_lv_add_condition_to_bb, /* lv_add_condition_to_bb */ NULL, /* lv_adjust_loop_header_phi*/ rtl_extract_cond_bb_edges, /* extract_cond_bb_edges */ diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc index 93333d8ba116..bbfa6ccc7700 100644 --- a/gcc/loop-unroll.cc +++ b/gcc/loop-unroll.cc @@ -487,6 +487,7 @@ unroll_loop_constant_iterations (class loop *loop) bool exit_at_end = loop_exit_at_end_p (loop); struct opt_info *opt_info = NULL; bool ok; + bool flat = maybe_flat_loop_profile (loop); niter = desc->niter; @@ -603,9 +604,14 @@ unroll_loop_constant_iterations (class loop *loop) ok = duplicate_loop_body_to_header_edge ( loop, loop_latch_edge (loop), max_unroll, wont_exit, desc->out_edge, &remove_edges, - DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0)); + DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0) + | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0)); gcc_assert (ok); + edge new_exit = single_dom_exit (loop); + if (new_exit) + update_exit_probability_after_unrolling (loop, new_exit); + if (opt_info) { apply_opt_in_copies (opt_info, max_unroll, true, true); diff --git a/gcc/testsuite/gcc.dg/tree-prof/peel-1.c b/gcc/testsuite/gcc.dg/tree-prof/peel-1.c index 7245b68c1ee9..32ecccb16da2 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/peel-1.c +++ b/gcc/testsuite/gcc.dg/tree-prof/peel-1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -fdump-tree-cunroll-details -fno-unroll-loops -fpeel-loops" } */ +/* { dg-options "-O3 -fdump-tree-cunroll-details-blocks -fdump-tree-optimized-details-blocks -fno-unroll-loops -fpeel-loops" } */ void abort(); int a[1000]; @@ -21,3 +21,5 @@ main() return 0; } /* { dg-final-use { scan-tree-dump "Peeled loop ., 1 times" "cunroll" } } */ +/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "cunroll" } } */ +/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c index 3ad0cf019b34..0b25c1f2f1cf 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c +++ b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details -funroll-loops -fno-peel-loops" } */ +/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops -fno-peel-loops" } */ void abort (); int a[1000]; @@ -20,4 +20,5 @@ main() t(); return 0; } -/* { dg-final-use { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */ +/* { dg-final-use-not-autofdo { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */ +/* { dg-final-use-not-autofdo { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/peel1.c b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c index dc5848cb5c51..bc136605e94c 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/peel1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details" } */ +/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details-blocks" } */ struct foo {int b; int a[3];} foo; void add(struct foo *a,int l) { @@ -9,3 +9,4 @@ void add(struct foo *a,int l) } /* { dg-final { scan-tree-dump "Loop 1 likely iterates at most 2 times." "cunroll"} } */ /* { dg-final { scan-tree-dump "Peeled loop 1, 3 times." "cunroll"} } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-1.c b/gcc/testsuite/gcc.dg/unroll-1.c index e7032891823f..ff2cbb07b222 100644 --- a/gcc/testsuite/gcc.dg/unroll-1.c +++ b/gcc/testsuite/gcc.dg/unroll-1.c @@ -1,7 +1,7 @@ /* PR optimization/8599 */ /* { dg-do run } */ /* { dg-options "-O2 -funroll-loops" } */ -/* { dg-options "-mtune=k6 -O2 -funroll-loops" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */ +/* { dg-options "-mtune=k6 -O2 -funroll-loops -fdump-rtl-loop2_unroll-details-blocks" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */ extern void abort (void); @@ -25,3 +25,5 @@ int main() abort (); return 0; } +/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */ +/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-3.c b/gcc/testsuite/gcc.dg/unroll-3.c index 10bf59b9a2e7..fbc8378c73d9 100644 --- a/gcc/testsuite/gcc.dg/unroll-3.c +++ b/gcc/testsuite/gcc.dg/unroll-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */ +/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */ unsigned a[100], b[100]; inline void bar() @@ -29,3 +29,4 @@ int foo2(void) } /* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-4.c b/gcc/testsuite/gcc.dg/unroll-4.c index 17f194212279..055ef3f35455 100644 --- a/gcc/testsuite/gcc.dg/unroll-4.c +++ b/gcc/testsuite/gcc.dg/unroll-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */ +/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */ unsigned a[100], b[100]; inline void bar() @@ -29,3 +29,4 @@ int foo2(void) } /* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-5.c b/gcc/testsuite/gcc.dg/unroll-5.c index f3bdebe9882f..1f22b1fa5d6b 100644 --- a/gcc/testsuite/gcc.dg/unroll-5.c +++ b/gcc/testsuite/gcc.dg/unroll-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */ +/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */ unsigned a[100], b[100]; inline void bar() @@ -29,3 +29,4 @@ int foo2(void) } /* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-6.c b/gcc/testsuite/gcc.dg/unroll-6.c index e4c231ea79f6..7664bbff109f 100644 --- a/gcc/testsuite/gcc.dg/unroll-6.c +++ b/gcc/testsuite/gcc.dg/unroll-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops" } */ +/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops" } */ /* { dg-require-effective-target int32plus } */ void abort (void); @@ -32,3 +32,4 @@ int t2() /* { dg-final { scan-rtl-dump-not "realistic bound: 999999" "loop2_unroll" } } */ /* { dg-final { scan-rtl-dump-times " upper bound: 2999999" 1 "loop2_unroll" } } */ /* { dg-final { scan-rtl-dump-times "realistic bound: 2999999" 1 "loop2_unroll" } } */ +/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" { xfail *-*-* } } } */ diff --git a/gcc/tree-ssa-loop-ivopts.h b/gcc/tree-ssa-loop-ivopts.h index 7a53ce47f109..31ec893b9cb0 100644 --- a/gcc/tree-ssa-loop-ivopts.h +++ b/gcc/tree-ssa-loop-ivopts.h @@ -20,7 +20,6 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_TREE_SSA_LOOP_IVOPTS_H #define GCC_TREE_SSA_LOOP_IVOPTS_H -extern edge single_dom_exit (class loop *); extern void dump_iv (FILE *, struct iv *); extern void dump_use (FILE *, struct iv_use *); extern void dump_uses (FILE *, struct ivopts_data *); diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc index 8e3b1057b6ff..e58892e235ca 100644 --- a/gcc/tree-ssa-loop-manip.cc +++ b/gcc/tree-ssa-loop-manip.cc @@ -1040,6 +1040,29 @@ determine_exit_conditions (class loop *loop, class tree_niter_desc *desc, *exit_bound = bound; } +/* Updat NEW_EXIT probability after loop has been unrolled. */ + +void +update_exit_probability_after_unrolling (class loop *loop, edge new_exit) +{ + /* gimple_duplicate_loop_body_to_header_edge depending on + DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header + or scales it down accordingly. + However exit edge probability is kept as original. Fix it if needed + and compensate. */ + profile_probability new_prob + = loop_preheader_edge + (loop)->count ().probability_in (new_exit->src->count); + if (!(new_prob == new_exit->probability)) + { + profile_count old_count = new_exit->src->count - new_exit->count (); + set_edge_probability_and_rescale_others (new_exit, new_prob); + profile_count new_count = new_exit->src->count - new_exit->count (); + scale_dominated_blocks_in_loop (loop, new_exit->src, + new_count, old_count); + } +} + /* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge whose source block dominates the latch. DESC describes the number of iterations of LOOP. @@ -1266,23 +1289,7 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor, update_ssa (TODO_update_ssa); new_exit = single_dom_exit (loop); - - /* gimple_duplicate_loop_body_to_header_edge depending on - DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header - or scales it down accordingly. - However exit edge probability is kept as original. Fix it if needed - and compensate. */ - profile_probability new_prob - = loop_preheader_edge - (loop)->count ().probability_in (new_exit->src->count); - if (!(new_prob == new_exit->probability)) - { - profile_count old_count = new_exit->src->count - new_exit->count (); - set_edge_probability_and_rescale_others (new_exit, new_prob); - profile_count new_count = new_exit->src->count - new_exit->count (); - scale_dominated_blocks_in_loop (loop, new_exit->src, - new_count, old_count); - } + update_exit_probability_after_unrolling (loop, new_exit); if (!single_loop_p) { /* Finally create the new counter for number of iterations and add