Information about profile mismatches is printed only with -details-blocks for some time.
I think it should be printed even with default to make it easier to spot when someone introduces
new transform that breaks the profile, but I will send separate RFC for that.
This patch enables details in all testcases that greps for Invalid sum. There are 4 testcases
which fails:
gcc.dg/tree-ssa/loop-ch-profile-1.c
here the problem is that loop header dulication introduces loop invariant conditoinal that is later
updated by tree-ssa-dom but dom does not take care of updating profile.
Since loop-ch knows when it duplicates loop invariant, we may be able to get this right.
The test is still useful since it tests that right after ch profile is consistent.
gcc.dg/tree-prof/update-cunroll-2.c
This is about profile updating code in duplicate_loop_body_to_header_edge being wrong when optimized
out exit is not last in the loop. In that case the probability of later exits needs to be accounted in.
I will think about making this better - in general this does not seem to have easy solution, but for
special case of chained tests we can definitely account for the later exits.
gcc.dg/tree-ssa/update-unroll-1.c
This fails after aprefetch invoked unrolling. I did not look into details yet.
gcc.dg/tree-prof/update-unroll-2.c
This one seems similar as previous
I decided to xfail these tests and deal with them incrementally and filled in PR110590.
gcc/testsuite/ChangeLog:
* g++.dg/tree-prof/indir-call-prof.C: Add block-details to dump flags.
* gcc.dg/pr43864-2.c: Likewise.
* gcc.dg/pr43864-3.c: Likewise.
* gcc.dg/pr43864-4.c: Likewise.
* gcc.dg/pr43864.c: Likewise.
* gcc.dg/tree-prof/cold_partition_label.c: Likewise.
* gcc.dg/tree-prof/indir-call-prof.c: Likewise.
* gcc.dg/tree-prof/update-cunroll-2.c: Likewise.
* gcc.dg/tree-prof/update-tailcall.c: Likewise.
* gcc.dg/tree-prof/val-prof-1.c: Likewise.
* gcc.dg/tree-prof/val-prof-2.c: Likewise.
* gcc.dg/tree-prof/val-prof-3.c: Likewise.
* gcc.dg/tree-prof/val-prof-4.c: Likewise.
* gcc.dg/tree-prof/val-prof-5.c: Likewise.
* gcc.dg/tree-ssa/fnsplit-1.c: Likewise.
* gcc.dg/tree-ssa/loop-ch-profile-2.c: Likewise.
* gcc.dg/tree-ssa/update-threading.c: Likewise.
* gcc.dg/tree-ssa/update-unswitch-1.c: Likewise.
* gcc.dg/unroll-7.c: Likewise.
* gcc.dg/unroll-8.c: Likewise.
* gfortran.dg/pr25623-2.f90: Likewise.
* gfortran.dg/pr25623.f90: Likewise.
* gcc.dg/tree-ssa/loop-ch-profile-1.c: Likewise; xfail.
* gcc.dg/tree-ssa/update-cunroll.c: Likewise; xfail.
* gcc.dg/tree-ssa/update-unroll-1.c: Likewise; xfail.
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-blocks-details -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
struct A {
A () {}
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre" } */
+/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre-details-blocks" } */
int
f (int c, int b, int d)
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre" } */
+/* { dg-options "-O2 -fdump-tree-pre-blocks-details" } */
/* Commutative case. */
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre" } */
+/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre-details-blocks" } */
/* Different stmt order. */
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-tail-merge -fdump-tree-pre" } */
+/* { dg-options "-O2 -fdump-tree-pre-details-blocks" } */
extern void foo (char*, int);
extern void mysprintf (char *, char *);
/* Test case to check if function foo gets split and the cold function
gets a label. */
/* { dg-require-effective-target freorder } */
-/* { dg-options "-O2 -freorder-blocks-and-partition -save-temps -fdump-tree-optimized" } */
+/* { dg-options "-O2 -freorder-blocks-and-partition -save-temps -fdump-tree-optimized-details-blocks" } */
#ifdef FOR_AUTOFDO_TESTING
#define MAXITER 1000000
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks -fdump-ipa-profile-optimized -fdump-ipa-afdo-optimized" } */
static int a1 (void)
{
-/* { dg-options "-O2 -fdump-tree-optimized-blocks" } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks" } */
int a[8];
__attribute__ ((noinline))
int t()
t ();
return 0;
}
-/* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */
+/* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized" {xfail *-*-*} } } */
-/* { dg-options "-O2 -fdump-tree-tailc -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-tailc-details-blocks -fdump-tree-optimized-details-blocks" } */
__attribute__ ((noinline))
int factorial(int x)
{
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks -fdump-ipa-profile-optimized" } */
int a[1000];
int b = 256;
int c = 257;
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks -fdump-ipa-profile-optimized" } */
unsigned int a[1000];
unsigned int b = 256;
unsigned int c = 1024;
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks -fdump-ipa-profile-optimized" } */
unsigned int a[1000];
unsigned int b = 257;
unsigned int c = 1023;
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks -fdump-ipa-profile-optimized" } */
unsigned int a[1000];
unsigned int b = 999;
unsigned int c = 1002;
-/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks -fdump-ipa-profile-optimized" } */
int a[1000];
int b=997;
int
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-fnsplit" } */
+/* { dg-options "-O2 -fdump-tree-fnsplit-blocks-details" } */
#include <stdio.h>
int a[1000];
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-ch2-blocks-details -fdump-tree-optimized" } */
+/* { dg-options "-O1 -fdump-tree-ch2-blocks-details -fdump-tree-optimized-blocks-details" } */
void foo ();
void test(int v, int q)
{
foo ();
}
/* { dg-final { scan-tree-dump-not "Invalid sum" "ch2"} } */
-/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized"} } */
+/* dom2 optimizes out the redundant test for loop invariant v/q
+ which leads to inconsistent profile. */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" { xfail *-*-* }} } */
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-ch2-blocks-details -fdump-tree-optimized" } */
+/* { dg-options "-O1 -fdump-tree-ch2-blocks-details -fdump-tree-optimized-blocks-details" } */
void foo ();
void test()
{
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-details-blocks" } */
int a[8];
int t()
{
break;
return i;
}
-/* { dg-final { scan-tree-dump-times "Invalid sum" 0 "optimized"} } */
+/* Currently duplicate_loop_body_to_header_edge gets wrong computation of prob_pass_wont_exit
+ which assumes that the exit condition is last in the loop. */
+/* { dg-final { scan-tree-dump-times "Invalid sum" 0 "optimized" { xfail *-*-*}} } */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized-blocks-details" } */
typedef struct { unsigned short a; } A;
/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
-/* { dg-options "-O1 -fprefetch-loop-arrays -march=amdfam10 -fdump-tree-aprefetch-blocks" } */
+/* { dg-options "-O1 -fprefetch-loop-arrays -march=amdfam10 -fdump-tree-aprefetch-blocks-details" } */
int a[10000];
/* We used to make the probability that the body of the loop (unrolled
to enable prefetching) is entered 0, which is not correct. */
-/* { dg-final { scan-tree-dump-not "Invalid sum" "aprefetch"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "aprefetch" { xfail *-*-* }} } */
/* { dg-final { scan-tree-dump-not "SUCC: 7 .100.0%" "aprefetch"} } */
/* { dg-do compile } */
-/* { dg-options "-O1 -funswitch-loops -fdump-tree-unswitch-blocks" } */
+/* { dg-options "-O1 -funswitch-loops -fdump-tree-unswitch-blocks-details" } */
int bla(int p)
{
/* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-vectorize -fdump-rtl-loop2_unroll-details -funroll-loops" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fdump-rtl-loop2_unroll-blocks-details -funroll-loops" } */
/* { dg-require-effective-target int32plus } */
extern int *a;
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-rtl-loop2_unroll -funroll-loops" } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops" } */
/* { dg-additional-options "-fno-tree-vectorize" { target amdgcn-*-* } } */
struct a {int a[7];};
! { dg-do compile }
-! { dg-options "-fdump-tree-optimized-blocks -O3" }
+! { dg-options "-fdump-tree-optimized-blocks-details -O3" }
SUBROUTINE S42(a,b,c,N)
IMPLICIT NONE
! { dg-do compile }
-! { dg-options "-fdump-tree-optimized-blocks -O2" }
+! { dg-options "-fdump-tree-optimized-blocks-details -O2" }
SUBROUTINE S42(a,b,c,N)
IMPLICIT NONE