]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Fix profile update in tree_transform_and_unroll_loop
authorJan Hubicka <jh@suse.cz>
Thu, 27 Jul 2023 14:17:59 +0000 (16:17 +0200)
committerJan Hubicka <jh@suse.cz>
Thu, 27 Jul 2023 14:17:59 +0000 (16:17 +0200)
Fixe profile update in tree_transform_and_unroll_loop which is used
by predictive comming.  I stared by attempt to fix
gcc.dg/tree-ssa/update-unroll-1.c I xfailed last week, but it turned to be
harder job.

Unrolling was never fixed for changes in duplicate_loop_body_to_header_edge
which is now smarter on getting profile right when some exists are eliminated.
A lot of manual profile can thus now be done using existing infrastructure.

I also noticed that scale_dominated_blocks_in_loop does job identical
to loop I wrote in scale_loop_profile and thus I commonized the implementaiton
and removed recursion.

I also extended duplicate_loop_body_to_header_edge to handle flat profiles same
way as we do in vectorizer. Without it we end up with less then 0 iteration
count in gcc.dg/tree-ssa/update-unroll-1.c (it is unrolled 32times but predicted
to iterated fewer times) and added missing code to update loop_info.

gcc/ChangeLog:

* cfgloopmanip.cc (scale_dominated_blocks_in_loop): Move here from
tree-ssa-loop-manip.cc and avoid recursion.
(scale_loop_profile): Use scale_dominated_blocks_in_loop.
(duplicate_loop_body_to_header_edge): Add DLTHE_FLAG_FLAT_PROFILE
flag.
* cfgloopmanip.h (DLTHE_FLAG_FLAT_PROFILE): Define.
(scale_dominated_blocks_in_loop): Declare.
* predict.cc (dump_prediction): Do not ICE on uninitialized probability.
(change_edge_frequency): Remove.
* predict.h (change_edge_frequency): Remove.
* tree-ssa-loop-manip.cc (scale_dominated_blocks_in_loop): Move to
cfgloopmanip.cc.
(niter_for_unrolled_loop): Remove.
(tree_transform_and_unroll_loop): Fix profile update.

gcc/testsuite/ChangeLog:

* gcc.dg/pr102385.c: Check for no profile mismatches.
* gcc.dg/pr96931.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-1.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-2.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-3.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-4.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-5.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-7.c: Check for one profile mismatch.
* gcc.dg/tree-ssa/predcom-8.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-1.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-10.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-11.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-12.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-2.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-3.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-4.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-5.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-6.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-7.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-8.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/predcom-dse-9.c: Check for no profile mismatches.
* gcc.dg/tree-ssa/update-unroll-1.c: Unxfail.

27 files changed:
gcc/cfgloopmanip.cc
gcc/cfgloopmanip.h
gcc/predict.cc
gcc/predict.h
gcc/testsuite/gcc.dg/pr102385.c
gcc/testsuite/gcc.dg/pr96931.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-1.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-2.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-3.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-7.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-8.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-10.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-11.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-12.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-5.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-6.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-7.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-8.c
gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-9.c
gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c
gcc/tree-ssa-loop-manip.cc

index 3012a8d60f7df170b39cc7ba4b976e984a9a3280..c3d292d0dd4c5b607f5dd1ad0f968841b1cb111e 100644 (file)
@@ -499,6 +499,32 @@ scale_loop_frequencies (class loop *loop, profile_probability p)
   free (bbs);
 }
 
+/* Scales the frequencies of all basic blocks in LOOP that are strictly
+   dominated by BB by NUM/DEN.  */
+
+void
+scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
+                               profile_count num, profile_count den)
+{
+  basic_block son;
+
+  if (!den.nonzero_p () && !(num == profile_count::zero ()))
+    return;
+  auto_vec <basic_block, 8> worklist;
+  worklist.safe_push (bb);
+
+  while (!worklist.is_empty ())
+    for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
+        son;
+        son = next_dom_son (CDI_DOMINATORS, son))
+      {
+       if (!flow_bb_inside_loop_p (loop, son))
+         continue;
+       son->count = son->count.apply_scale (num, den);
+       worklist.safe_push (son);
+      }
+}
+
 /* Scale profile in LOOP by P.
    If ITERATION_BOUND is not -1, scale even further if loop is predicted
    to iterate too many times.
@@ -649,19 +675,9 @@ scale_loop_profile (class loop *loop, profile_probability p,
       if (other_edge && other_edge->dest == loop->latch)
        loop->latch->count -= new_exit_count - old_exit_count;
       else
-       {
-         basic_block *body = get_loop_body (loop);
-         profile_count new_count = exit_edge->src->count - new_exit_count;
-         profile_count old_count = exit_edge->src->count - old_exit_count;
-
-         for (unsigned int i = 0; i < loop->num_nodes; i++)
-           if (body[i] != exit_edge->src
-               && dominated_by_p (CDI_DOMINATORS, body[i], exit_edge->src))
-             body[i]->count = body[i]->count.apply_scale (new_count,
-                                                          old_count);
-
-         free (body);
-       }
+       scale_dominated_blocks_in_loop (loop, exit_edge->src,
+                                       exit_edge->src->count - new_exit_count,
+                                       exit_edge->src->count - old_exit_count);
     }
   else if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -1237,6 +1253,7 @@ duplicate_loop_body_to_header_edge (class loop *loop, edge e,
             should've managed the flags so all except for original loop
             has won't exist set.  */
          scale_act = wanted_count.probability_in (count_in);
+
          /* Now simulate the duplication adjustments and compute header
             frequency of the last copy.  */
          for (i = 0; i < ndupl; i++)
@@ -1252,16 +1269,21 @@ duplicate_loop_body_to_header_edge (class loop *loop, edge e,
          profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
                                                        ? prob_pass_wont_exit
                                                        : prob_pass_thru;
-         profile_probability p = prob_pass_main;
-         profile_count scale_main_den = count_in;
-         for (i = 0; i < ndupl; i++)
+         if (!(flags & DLTHE_FLAG_FLAT_PROFILE))
            {
-             scale_main_den += count_in.apply_probability (p);
-             p = p * scale_step[i];
+             profile_probability p = prob_pass_main;
+             profile_count scale_main_den = count_in;
+             for (i = 0; i < ndupl; i++)
+               {
+                 scale_main_den += count_in.apply_probability (p);
+                 p = p * scale_step[i];
+               }
+             /* If original loop is executed COUNT_IN times, the unrolled
+                loop will account SCALE_MAIN_DEN times.  */
+             scale_main = count_in.probability_in (scale_main_den);
            }
-         /* If original loop is executed COUNT_IN times, the unrolled
-            loop will account SCALE_MAIN_DEN times.  */
-         scale_main = count_in.probability_in (scale_main_den);
+         else
+           scale_main = profile_probability::always ();
          scale_act = scale_main * prob_pass_main;
        }
       else
index 75b2a5e9b7565bfde49bbdc23fecd0e5f0c85e14..af6a29f70c42e42747364ce385255aefd15dcc63 100644 (file)
@@ -32,6 +32,8 @@ enum
                                           field of newly create BB.  */
 #define DLTHE_FLAG_COMPLETTE_PEEL 4    /* Update frequencies expecting
                                           a complete peeling.  */
+#define DLTHE_FLAG_FLAT_PROFILE 8      /* Profile is flat; do not reduce
+                                          count by unroll factor.  */
 extern edge mfb_kj_edge;
 
 extern bool remove_path (edge, bool * = NULL, bitmap = NULL);
@@ -64,5 +66,7 @@ class loop * loop_version (class loop *, void *,
                            profile_probability, profile_probability,
                            profile_probability, profile_probability, bool);
 void adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise);
+void scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
+                                    profile_count num, profile_count den);
 
 #endif /* GCC_CFGLOOPMANIP_H */
index 6777e6cb9c5216b0f6e454b065adaa4fd9045a50..5a1a561cc2461c6f82d3883337f7201ddcc46fc3 100644 (file)
@@ -790,7 +790,7 @@ dump_prediction (FILE *file, enum br_predictor predictor, int probability,
     {
       fprintf (file, "  exec ");
       bb->count.dump (file);
-      if (e)
+      if (e && e->count ().initialized_p () && bb->count.to_gcov_type ())
        {
          fprintf (file, " hit ");
          e->count ().dump (file);
@@ -4634,43 +4634,6 @@ force_edge_cold (edge e, bool impossible)
     }
 }
 
-/* Change E's probability to NEW_E_PROB, redistributing the probabilities
-   of other outgoing edges proportionally.
-
-   Note that this function does not change the profile counts of any
-   basic blocks.  The caller must do that instead, using whatever
-   information it has about the region that needs updating.  */
-
-void
-change_edge_frequency (edge e, profile_probability new_e_prob)
-{
-  profile_probability old_e_prob = e->probability;
-  profile_probability old_other_prob = old_e_prob.invert ();
-  profile_probability new_other_prob = new_e_prob.invert ();
-
-  e->probability = new_e_prob;
-  profile_probability cumulative_prob = new_e_prob;
-
-  unsigned int num_other = EDGE_COUNT (e->src->succs) - 1;
-  edge other_e;
-  edge_iterator ei;
-  FOR_EACH_EDGE (other_e, ei, e->src->succs)
-    if (other_e != e)
-      {
-       num_other -= 1;
-       if (num_other == 0)
-         /* Ensure that the probabilities add up to 1 without
-            rounding error.  */
-         other_e->probability = cumulative_prob.invert ();
-       else
-         {
-           other_e->probability /= old_other_prob;
-           other_e->probability *= new_other_prob;
-           cumulative_prob += other_e->probability;
-         }
-      }
-}
-
 #if CHECKING_P
 
 namespace selftest {
index 4864b7d711367cfaa2fbaaa548d923ba5fd713d4..42b7c00c46ad808c03acf02a3a1ae56458717a73 100644 (file)
@@ -100,7 +100,6 @@ extern void rebuild_frequencies (void);
 extern void report_predictor_hitrates (void);
 extern void force_edge_cold (edge, bool);
 extern void propagate_unlikely_bbs_forward (void);
-extern void change_edge_frequency (edge, profile_probability);
 
 extern void add_reg_br_prob_note (rtx_insn *, profile_probability);
 
index 1339540c722c0bf917908a516c339d094732cf0f..bdccc9e43b143751897d387030d3589d446e5bec 100644 (file)
@@ -1,4 +1,4 @@
-/* { dg-options "-Wall -Wextra -O2 -fno-toplevel-reorder -fno-tree-ch -fno-tree-dce -fno-tree-dominator-opts -fno-tree-dse -fno-tree-loop-ivcanon -fpredictive-commoning" } */
+/* { dg-options "-Wall -Wextra -O2 -fno-toplevel-reorder -fno-tree-ch -fno-tree-dce -fno-tree-dominator-opts -fno-tree-dse -fno-tree-loop-ivcanon -fpredictive-commoning -fdump-tree-pcom-details-blocks -fdump-tree-lim-details-blocks" } */
 
 short a, b;
 int c[9];
@@ -12,3 +12,5 @@ void e() {
   }
 }
 int main() {return 0;}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "lim2" } } */
index 94b8a1128eee495586aefbdd3db3cbae72e6549e..660391588ff6fd032a32c71859e2f14fe3f30630 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O1 -fpredictive-commoning -fno-tree-loop-im" } */
+/* { dg-options "-O1 -fpredictive-commoning -fno-tree-loop-im -fdump-tree-pcom-details-blocks" } */
 
 int bl;
 
@@ -17,3 +17,4 @@ ie (void)
 
   ie ();
 }
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 8c3d9a4fc58ccbf3dbbbf11f7ab37b948ab0fb6b..d0a5318e35145d01cc3b4672cf0b8ef95659a92f 100644 (file)
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 void abort (void);
 
@@ -47,3 +47,4 @@ int main(void)
 
 /* Also check that we undid the transformation previously made by PRE.  */
 /* { dg-final { scan-tree-dump-times "looparound ref" 1 "pcom" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 1c54679c022b05be18ba0bc780b47ebe40ea31ac..f19edd4cd74300ea6c38d75b0b0ff58d75ee032e 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details -fno-tree-pre" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks -fno-tree-pre" } */
 /* { dg-additional-options "-fno-tree-vectorize" { target amdgcn-*-* } } */
 
 void abort (void);
@@ -44,3 +44,4 @@ int main(void)
 
 /* Verify that both loops were transformed and unrolled.  */
 /* { dg-final { scan-tree-dump-times "Unrolling 2 times." 2 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 9abbe6c1380038811875696278ba12d42da39260..aaee3b7ff9e829e5ee24bf4cf0f62bfeade64422 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details -fno-tree-pre -fno-tree-loop-vectorize" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks -fno-tree-pre -fno-tree-loop-vectorize" } */
 
 int a[1000], b[1000];
 
@@ -13,3 +13,4 @@ void test(void)
 
 /* Verify that we used 3 temporary variables for the loop.  */
 /* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 382a464cef7aeb5d805157787a0296dd6d6f0758..af9ae0e0f3d1256575bf4c72b9dc344bffe8503b 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 /* Test for predictive commoning of expressions, without reassociation.  */
 
@@ -26,3 +26,4 @@ int main(void)
 
 /* { dg-final { scan-tree-dump-times "Combination" 1 "pcom"} } */
 /* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index a3ee1d946a76db246188f1c850e8f9bf8b918606..52adb59d669af876d091f6312ee3c9a6067ab783 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 /* Test for predictive commoning of expressions, with reassociation.  */
 
@@ -26,3 +26,4 @@ int main(void)
 
 /* { dg-final { scan-tree-dump-times "Combination" 2 "pcom"} } */
 /* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 683fb9b5d351231e3d085d22512f2c89254f4cd5..99939767562221036b8ef0ce45a80e19d01da2cb 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O3 -fdump-tree-pcom-details" } */
+/* { dg-options "-O3 -fdump-tree-pcom-details-blocks" } */
 
 int b, f, d[5][2];
 unsigned int c;
@@ -15,3 +15,7 @@ main ()
 }
 
 /* { dg-final { scan-tree-dump "Executing predictive commoning" "pcom" } } */
+/* dom pass introduces one mismatch after simplfying mispredicted conditional
+   on c being non-zero on first iteration.  This happens since c is global variable
+   and needs alias analysis.  */
+/* { dg-final { scan-tree-dump-times "Invalid sum" 1 "pcom" } } */
index c4562768398b50a3d35e7d1d237e235988d963e8..dcddf57314580c061792dc91bf0f0edb915bdf33 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-pcom-details" } */
+/* { dg-options "-O3 -fdump-tree-pcom-details-blocks" } */
 
 int is_sorted(int *a, int n)
 {
@@ -10,3 +10,4 @@ int is_sorted(int *a, int n)
 }
 
 /* { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index b61b651fca85f74ec34179f5b0e9afc067410d30..a0a04a08c61d48128ad5fd1a11daaf0abc783053 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -60,3 +60,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index bd5575d9502fe10100b7c7c1036aa0d0cd6661ad..f770a8ad812aedee8f65b011134cda91cbe2bf91 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -41,4 +41,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump-not "Store-stores chain" "pcom"} } */
-
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 9e496f68a1210fef98567d4be1652da2a7cf3509..ed2b96a0d1a4e0c90bf52a83b5f21e2fd1c5a5c5 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int x[105] = {2, 3, 5, 7, 11};
@@ -48,4 +48,5 @@ int main (void)
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
 /* { dg-final { scan-tree-dump "Store-loads chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
 
index 510c600f574acff266f9aee546a04fa659dd64c3..2487c1c8205a4f09fd16974f3599ddc8c48b92cf 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -65,3 +65,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 7f959a833f4a26bf268aac93253abb08b7187aa1..020ca705790d6ace707184c9d2804f3d690de916 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -60,3 +60,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 1fc8f089345a1c329adefa50aa474c65c888d980..667cc333d9f2c030474e0b3115c0b86cda733c2e 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-tree-vectorize -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr1[105] = {2, 3, 5, 7, 11, 13, 0};
 int arr2[105] = {2, 3, 5, 7, 11, 13, 0};
@@ -106,3 +106,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump-times "Store-stores chain" 4 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index f97d32f4b41dd2b5bbeb80ab577d9c805331d664..8118461af0b63d1f9b42879783ae2650a9d9b34a 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -59,3 +59,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index a13d56098bef1286cd6b64078abc4acb98647253..03fa646661e2839946e80e0b27ea1d0ea0ef9aeb 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -61,3 +61,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 63d6c8f33b0d3600c2fd7b56ad213cdb4d8ecb0a..ab2fd403d3005ba06d9992580945ce28f8fb1c09 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11, 13, 17, 19};
 int result0[10] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -63,3 +63,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 0bde6e6dced4e372c2950636e033e9acbf2b22a8..c746ebd715561eb9f7192a433c321f86e0751eaa 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11, 13, 17, 19};
 int result0[10] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -61,3 +61,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 45ffd25c424e965d805672f4220ac864c9943623..6c4e9afa487ed33e4ab5d887640e0efa44a72c6d 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11, 13, 17, 19};
 int result0[10] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -58,3 +58,4 @@ int main (void)
 
   return 0;
 }
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 1c4e3140309a60c67229aa53efa65fa92426f820..9c5e8ca9a793b0405e7f448798aa1fac483d2f05 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr1[105] = {2, 3, 5, 7, 11, 13, 17, 19};
 int arr2[105] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -88,3 +88,4 @@ int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
index 138448bac4377d262c615297483d427186d72ac9..23dd5c0d570b69a888a77d81d7698845409c64dc 100644 (file)
@@ -16,5 +16,4 @@ int foo(unsigned n)
 /* We used to make the probability that the body of the loop (unrolled
    to enable prefetching) is entered 0, which is not correct.  */
 
-/* { dg-final { scan-tree-dump-not "Invalid sum" "aprefetch" { xfail *-*-* }} } */
 /* { dg-final { scan-tree-dump-not "SUCC: 7 .100.0%" "aprefetch"} } */
index b2d3dcf406d45a8c5206fbdf7e0f20600c8a2b6a..8e3b1057b6ff67e870d69e44de94f8a19cef3de5 100644 (file)
@@ -1040,71 +1040,6 @@ determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
   *exit_bound = bound;
 }
 
-/* Scales the frequencies of all basic blocks in LOOP that are strictly
-   dominated by BB by NUM/DEN.  */
-
-static void
-scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
-                               profile_count num, profile_count den)
-{
-  basic_block son;
-
-  if (!den.nonzero_p () && !(num == profile_count::zero ()))
-    return;
-
-  for (son = first_dom_son (CDI_DOMINATORS, bb);
-       son;
-       son = next_dom_son (CDI_DOMINATORS, son))
-    {
-      if (!flow_bb_inside_loop_p (loop, son))
-       continue;
-      scale_bbs_frequencies_profile_count (&son, 1, num, den);
-      scale_dominated_blocks_in_loop (loop, son, num, den);
-    }
-}
-
-/* Return estimated niter for LOOP after unrolling by FACTOR times.  */
-
-gcov_type
-niter_for_unrolled_loop (class loop *loop, unsigned factor)
-{
-  gcc_assert (factor != 0);
-  bool profile_p = false;
-  gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
-  /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the
-     "+ 1" converts latch iterations to loop iterations and the "- 1"
-     converts back.  */
-  gcov_type new_est_niter = est_niter / factor;
-
-  if (est_niter == -1)
-    return -1;
-
-  /* Without profile feedback, loops for which we do not know a better estimate
-     are assumed to roll 10 times.  When we unroll such loop, it appears to
-     roll too little, and it may even seem to be cold.  To avoid this, we
-     ensure that the created loop appears to roll at least 5 times (but at
-     most as many times as before unrolling).  Don't do adjustment if profile
-     feedback is present.  */
-  if (new_est_niter < 5 && !profile_p)
-    {
-      if (est_niter < 5)
-       new_est_niter = est_niter;
-      else
-       new_est_niter = 5;
-    }
-
-  if (loop->any_upper_bound)
-    {
-      /* As above, this is really CEIL (upper_bound + 1, factor) - 1.  */
-      widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound,
-                                        factor);
-      if (wi::ltu_p (bound, new_est_niter))
-       new_est_niter = bound.to_uhwi ();
-    }
-
-  return new_est_niter;
-}
-
 /* Unroll LOOP FACTOR times.  LOOP is known to have a single exit edge
    whose source block dominates the latch.  DESC describes the number of
    iterations of LOOP.
@@ -1169,47 +1104,39 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
                                transform_callback transform,
                                void *data)
 {
-  gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
   unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
 
   enum tree_code exit_cmp;
   tree enter_main_cond, exit_base, exit_step, exit_bound;
+  bool flat = maybe_flat_loop_profile (loop);
   determine_exit_conditions (loop, desc, factor,
                             &enter_main_cond, &exit_base, &exit_step,
                             &exit_cmp, &exit_bound);
   bool single_loop_p = !exit_base;
 
-  /* Let us assume that the unrolled loop is quite likely to be entered.  */
-  profile_probability prob_entry;
-  if (integer_nonzerop (enter_main_cond))
-    prob_entry = profile_probability::always ();
-  else
-    prob_entry = profile_probability::guessed_always ()
-                       .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
-
   gcond *exit_if = nullptr;
   class loop *new_loop = nullptr;
   edge new_exit;
   if (!single_loop_p)
     {
-      edge exit = single_dom_exit (loop);
+      profile_count entry_count = loop_preheader_edge (loop)->src->count;
+      /* Let us assume that the unrolled loop is quite likely to be entered.  */
+      profile_probability prob_entry;
+      if (integer_nonzerop (enter_main_cond))
+       prob_entry = profile_probability::always ();
+      else
+       prob_entry = profile_probability::guessed_always ()
+                           .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
+
 
       /* The values for scales should keep profile consistent, and somewhat
-        close to correct.
-
-        TODO: The current value of SCALE_REST makes it appear that the loop
-        that is created by splitting the remaining iterations of the unrolled
-        loop is executed the same number of times as the original loop, and
-        with the same frequencies, which is obviously wrong.  This does not
-        appear to cause problems, so we do not bother with fixing it for now.
-        To make the profile correct, we would need to change the probability
-        of the exit edge of the loop, and recompute the distribution of
-        frequencies in its body because of this change (scale the frequencies
-        of blocks before and after the exit by appropriate factors).  */
-      profile_probability scale_unrolled = prob_entry;
+        close to correct.  */
       new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
-                              prob_entry.invert (), scale_unrolled,
-                              profile_probability::guessed_always (),
+                              prob_entry.invert (),
+                              prob_entry,
+                              /* We will later redirect exit from vectorized
+                                 loop to new_loop.  */
+                              profile_probability::always (),
                               true);
       gcc_assert (new_loop != NULL);
       update_ssa (TODO_update_ssa_no_phi);
@@ -1220,18 +1147,16 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
       edge precond_edge = single_pred_edge (rest);
       split_edge (loop_latch_edge (loop));
       basic_block exit_bb = single_pred (loop->latch);
+      edge exit = single_dom_exit (loop);
 
       /* Since the exit edge will be removed, the frequency of all the blocks
-        in the loop that are dominated by it must be scaled by
-        1 / (1 - exit->probability).  */
+        in the loop that are dominated by it must be scaled.  */
       if (exit->probability.initialized_p ())
        scale_dominated_blocks_in_loop (loop, exit->src,
                                        /* We are scaling up here so
                                           probability does not fit.  */
-                                       loop->header->count,
-                                       loop->header->count
-                                       - loop->header->count.apply_probability
-                                           (exit->probability));
+                                       exit->src->count,
+                                       exit->src->count - exit->count ());
 
       gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
       exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
@@ -1243,14 +1168,14 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
       rescan_loop_exit (new_exit, true, false);
 
       /* Set the probability of new exit to the same of the old one.  Fix
-        the frequency of the latch block, by scaling it back by
-        1 - exit->probability.  */
+        the count of the latch block.  */
       new_exit->probability = exit->probability;
       edge new_nonexit = single_pred_edge (loop->latch);
       new_nonexit->probability = exit->probability.invert ();
       new_nonexit->flags = EDGE_TRUE_VALUE;
-      if (new_nonexit->probability.initialized_p ())
-       scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability);
+      set_edge_probability_and_rescale_others
+             (exit, profile_probability::never ());
+      loop->latch->count = new_nonexit->count ();
 
       edge old_entry = loop_preheader_edge (loop);
       edge new_entry = loop_preheader_edge (new_loop);
@@ -1296,12 +1221,21 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
        }
 
       remove_path (exit);
+      /* We will later redirect exit from vectorized loop to new_loop.  */
+      loop_preheader_edge (new_loop)->src->count = entry_count;
 
       /* The epilog loop latch executes at most factor - 1 times.
         Since the epilog is entered unconditionally it will need to handle
         up to factor executions of its body.  */
-      new_loop->any_upper_bound = 1;
+      new_loop->any_upper_bound = true;
       new_loop->nb_iterations_upper_bound = factor - 1;
+      /* We do not really know estimate on number of iterations, since we do not
+        track any estimates modulo unroll factor.
+        Drop estimate from loop_info and scale loop profile.
+        It may be more realistic to scale loop profile to factor / 2 - 1,
+        but vectorizer also uses factor - 1.  */
+      new_loop->any_estimate = false;
+      scale_loop_profile (new_loop, profile_probability::always (), factor - 1);
     }
   else
     new_exit = single_dom_exit (loop);
@@ -1318,10 +1252,10 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
 
   auto_vec<edge> to_remove;
   bool ok
-    = gimple_duplicate_loop_body_to_header_edge (loop, loop_latch_edge (loop),
-                                                factor - 1, wont_exit,
-                                                new_exit, &to_remove,
-                                                DLTHE_FLAG_UPDATE_FREQ);
+    = gimple_duplicate_loop_body_to_header_edge
+           (loop, loop_latch_edge (loop), factor - 1, wont_exit,
+            new_exit, &to_remove,
+            DLTHE_FLAG_UPDATE_FREQ | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
   gcc_assert (ok);
 
   for (edge e : to_remove)
@@ -1332,36 +1266,25 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
   update_ssa (TODO_update_ssa);
 
   new_exit = single_dom_exit (loop);
+
+  /* gimple_duplicate_loop_body_to_header_edge depending on
+     DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
+     or scales it down accordingly.
+     However exit edge probability is kept as original.  Fix it if needed
+     and compensate.  */
+  profile_probability new_prob
+         = loop_preheader_edge
+                 (loop)->count ().probability_in (new_exit->src->count);
+  if (!(new_prob == new_exit->probability))
+    {
+      profile_count old_count = new_exit->src->count - new_exit->count ();
+      set_edge_probability_and_rescale_others (new_exit, new_prob);
+      profile_count new_count = new_exit->src->count - new_exit->count ();
+      scale_dominated_blocks_in_loop (loop, new_exit->src,
+                                     new_count, old_count);
+    }
   if (!single_loop_p)
     {
-      /* Ensure that the frequencies in the loop match the new estimated
-        number of iterations, and change the probability of the new
-        exit edge.  */
-
-      profile_count freq_h = loop->header->count;
-      profile_count freq_e = (loop_preheader_edge (loop))->count ();
-      if (freq_h.nonzero_p ())
-       {
-         /* Avoid dropping loop body profile counter to 0 because of zero
-            count in loop's preheader.  */
-         if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ()))
-           freq_e = freq_e.force_nonzero ();
-         scale_loop_frequencies (loop, freq_e.probability_in (freq_h));
-       }
-
-      basic_block rest = new_exit->dest;
-      new_exit->probability
-       = (profile_probability::always () / (new_est_niter + 1));
-
-      rest->count += new_exit->count ();
-
-      edge new_nonexit = single_pred_edge (loop->latch);
-      profile_probability prob = new_nonexit->probability;
-      new_nonexit->probability = new_exit->probability.invert ();
-      prob = new_nonexit->probability / prob;
-      if (prob.initialized_p ())
-       scale_bbs_frequencies (&loop->latch, 1, prob);
-
       /* Finally create the new counter for number of iterations and add
         the new exit instruction.  */
       tree ctr_before, ctr_after;
@@ -1374,66 +1297,15 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
       gimple_cond_set_rhs (exit_if, exit_bound);
       update_stmt (exit_if);
     }
-  else
-    {
-      /* gimple_duplicate_loop_to_header_edge has adjusted the loop body's
-        original profile counts in line with the unroll factor.  However,
-        the old counts might not have been consistent with the old
-        iteration count.
-
-        Therefore, if the iteration count is known exactly, make sure that the
-        profile counts of the loop header (and any other blocks that might be
-        executed in the final iteration) are consistent with the combination
-        of (a) the incoming profile count and (b) the new iteration count.  */
-      profile_count in_count = loop_preheader_edge (loop)->count ();
-      profile_count old_header_count = loop->header->count;
-      if (in_count.nonzero_p ()
-         && old_header_count.nonzero_p ()
-         && TREE_CODE (desc->niter) == INTEGER_CST)
-       {
-         /* The + 1 converts latch counts to iteration counts.  */
-         profile_count new_header_count = in_count * (new_est_niter + 1);
-         basic_block *body = get_loop_body (loop);
-         scale_bbs_frequencies_profile_count (body, loop->num_nodes,
-                                              new_header_count,
-                                              old_header_count);
-         free (body);
-       }
-
-      /* gimple_duplicate_loop_to_header_edge discarded FACTOR - 1
-        exit edges and adjusted the loop body's profile counts for the
-        new probabilities of the remaining non-exit edges.  However,
-        the remaining exit edge still has the same probability as it
-        did before, even though it is now more likely.
-
-        Therefore, all blocks executed after a failed exit test now have
-        a profile count that is too high, and the sum of the profile counts
-        for the header's incoming edges is greater than the profile count
-        of the header itself.
-
-        Adjust the profile counts of all code in the loop body after
-        the exit test so that the sum of the counts on entry to the
-        header agree.  */
-      profile_count old_latch_count = loop_latch_edge (loop)->count ();
-      profile_count new_latch_count = loop->header->count - in_count;
-      if (old_latch_count.nonzero_p () && new_latch_count.nonzero_p ())
-       scale_dominated_blocks_in_loop (loop, new_exit->src, new_latch_count,
-                                       old_latch_count);
-
-      /* Set the probability of the exit edge based on NEW_EST_NITER
-        (which estimates latch counts rather than iteration counts).
-        Update the probabilities of other edges to match.
-
-        If the profile counts are large enough to give the required
-        precision, the updates above will have made
-
-           e->dest->count / e->src->count ~= new e->probability
-
-        for every outgoing edge e of NEW_EXIT->src.  */
-      profile_probability new_exit_prob
-       = profile_probability::always () / (new_est_niter + 1);
-      change_edge_frequency (new_exit, new_exit_prob);
-    }
+  if (loop->any_upper_bound)
+    loop->nb_iterations_upper_bound = wi::udiv_floor
+               (loop->nb_iterations_upper_bound + 1, factor) - 1;
+  if (loop->any_likely_upper_bound)
+    loop->nb_iterations_likely_upper_bound = wi::udiv_floor
+               (loop->nb_iterations_likely_upper_bound + 1, factor) - 1;
+  if (loop->any_estimate)
+    loop->nb_iterations_estimate = wi::udiv_floor
+               (loop->nb_iterations_estimate + 1, factor) - 1;
 
   checking_verify_flow_info ();
   checking_verify_loop_structure ();