This is a rewrite of the backwards threader with a ranger based solver.
The code is divided into two parts: the path solver in
gimple-range-path.*, and the path discovery bits in
tree-ssa-threadbackward.c.
The legacy code is still available with --param=threader-mode=legacy,
but will be removed shortly after.
gcc/ChangeLog:
* Makefile.in (tree-ssa-loop-im.o-warn): New.
* flag-types.h (enum threader_mode): New.
* params.opt: Add entry for --param=threader-mode.
* tree-ssa-threadbackward.c (THREADER_ITERATIVE_MODE): New.
(class back_threader): New.
(back_threader::back_threader): New.
(back_threader::~back_threader): New.
(back_threader::maybe_register_path): New.
(back_threader::find_taken_edge): New.
(back_threader::find_taken_edge_switch): New.
(back_threader::find_taken_edge_cond): New.
(back_threader::resolve_def): New.
(back_threader::resolve_phi): New.
(back_threader::find_paths_to_names): New.
(back_threader::find_paths): New.
(dump_path): New.
(debug): New.
(thread_jumps::find_jump_threads_backwards): Call ranger threader.
(thread_jumps::find_jump_threads_backwards_with_ranger): New.
(pass_thread_jumps::execute): Abstract out code...
(try_thread_blocks): ...here.
* tree-ssa-threadedge.c (jump_threader::thread_outgoing_edges):
Abstract out threading candidate code to...
(single_succ_to_potentially_threadable_block): ...here.
* tree-ssa-threadedge.h (single_succ_to_potentially_threadable_block):
New.
* tree-ssa-threadupdate.c (register_jump_thread): Return boolean.
* tree-ssa-threadupdate.h (class jump_thread_path_registry):
Return bool from register_jump_thread.
libgomp/ChangeLog:
* testsuite/libgomp.graphite/force-parallel-4.c: Adjust for
threader.
* testsuite/libgomp.graphite/force-parallel-8.c: Same.
gcc/testsuite/ChangeLog:
* g++.dg/debug/dwarf2/deallocator.C: Adjust for threader.
* gcc.c-torture/compile/pr83510.c: Same.
* dg.dg/analyzer/pr94851-2.c: Same.
* gcc.dg/loop-unswitch-2.c: Same.
* gcc.dg/old-style-asm-1.c: Same.
* gcc.dg/pr68317.c: Same.
* gcc.dg/pr97567-2.c: Same.
* gcc.dg/predict-9.c: Same.
* gcc.dg/shrink-wrap-loop.c: Same.
* gcc.dg/sibcall-1.c: Same.
* gcc.dg/tree-ssa/builtin-sprintf-3.c: Same.
* gcc.dg/tree-ssa/pr21001.c: Same.
* gcc.dg/tree-ssa/pr21294.c: Same.
* gcc.dg/tree-ssa/pr21417.c: Same.
* gcc.dg/tree-ssa/pr21458-2.c: Same.
* gcc.dg/tree-ssa/pr21563.c: Same.
* gcc.dg/tree-ssa/pr49039.c: Same.
* gcc.dg/tree-ssa/pr61839_1.c: Same.
* gcc.dg/tree-ssa/pr61839_3.c: Same.
* gcc.dg/tree-ssa/pr77445-2.c: Same.
* gcc.dg/tree-ssa/split-path-4.c: Same.
* gcc.dg/tree-ssa/ssa-dom-thread-11.c: Same.
* gcc.dg/tree-ssa/ssa-dom-thread-12.c: Same.
* gcc.dg/tree-ssa/ssa-dom-thread-14.c: Same.
* gcc.dg/tree-ssa/ssa-dom-thread-18.c: Same.
* gcc.dg/tree-ssa/ssa-dom-thread-6.c: Same.
* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Same.
* gcc.dg/tree-ssa/ssa-fre-48.c: Same.
* gcc.dg/tree-ssa/ssa-thread-11.c: Same.
* gcc.dg/tree-ssa/ssa-thread-12.c: Same.
* gcc.dg/tree-ssa/ssa-thread-14.c: Same.
* gcc.dg/tree-ssa/vrp02.c: Same.
* gcc.dg/tree-ssa/vrp03.c: Same.
* gcc.dg/tree-ssa/vrp05.c: Same.
* gcc.dg/tree-ssa/vrp06.c: Same.
* gcc.dg/tree-ssa/vrp07.c: Same.
* gcc.dg/tree-ssa/vrp09.c: Same.
* gcc.dg/tree-ssa/vrp19.c: Same.
* gcc.dg/tree-ssa/vrp20.c: Same.
* gcc.dg/tree-ssa/vrp33.c: Same.
* gcc.dg/uninit-pred-9_b.c: Same.
* gcc.dg/uninit-pr61112.c: Same.
* gcc.dg/vect/bb-slp-16.c: Same.
* gcc.target/i386/avx2-vect-aggressive.c: Same.
* gcc.dg/tree-ssa/ranger-threader-1.c: New test.
* gcc.dg/tree-ssa/ranger-threader-2.c: New test.
* gcc.dg/tree-ssa/ranger-threader-3.c: New test.
* gcc.dg/tree-ssa/ranger-threader-4.c: New test.
* gcc.dg/tree-ssa/ranger-threader-5.c: New test.
EVRP_MODE_RVRP_DEBUG = EVRP_MODE_RVRP_ONLY | EVRP_MODE_DEBUG
};
+/* Backwards threader mode. */
+enum threader_mode
+{
+ THREADER_MODE_LEGACY = 0,
+ THREADER_MODE_RANGER = 1
+};
+
/* Modes of OpenACC 'kernels' constructs handling. */
enum openacc_kernels
{
Common Joined UInteger Var(param_modref_max_escape_points) Init(256) Param Optimization
Maximum number of escape points tracked by modref per SSA-name.
+-param=threader-iterative=
+Common Joined UInteger Var(param_threader_iterative) Init(0) Param Optimization
+Run backwards threader in iterative mode.
+
+-param=threader-mode=
+Common Joined Var(param_threader_mode) Enum(threader_mode) Init(THREADER_MODE_RANGER) Param Optimization
+--param=threader-mode=[legacy|ranger] Specifies the mode the backwards threader should run in.
+
+Enum
+Name(threader_mode) Type(enum threader_mode) UnknownError(unknown threader mode %qs)
+
+EnumValue
+Enum(threader_mode) String(legacy) Value(THREADER_MODE_LEGACY)
+
+EnumValue
+Enum(threader_mode) String(ranger) Value(THREADER_MODE_RANGER)
+
-param=tm-max-aggregate-size=
Common Joined UInteger Var(param_tm_max_aggregate_size) Init(9) Param Optimization
Size in bytes after which thread-local aggregates should be instrumented with the logging functions instead of save/restore pairs.
return;
}
}
- if (i)
+ if (i) // Threader makes everything after here disappear.
{
t test;
if (i == 10)
}
// { dg-final { scan-assembler "deallocator.C:29" } }
// { dg-final { scan-assembler "deallocator.C:24" } }
-// { dg-final { scan-assembler "deallocator.C:34" } }
// { dg-final { scan-assembler "deallocator.C:21" } }
(PR tree-optimization/83510). */
/* { dg-options "-Warray-bounds" } */
+/* { dg-xfail-if "" { "*-*-*" } { "-Os" } } */
+
+
+/* This test is XFAILed because thread1 threads a switch statement
+ such that the various cases have been split into different
+ independent blocks. One of these blocks exposes an arr[i_27]
+ which is later propagated by VRP to be arr[10]. This is an
+ invalid access, but the array bounds code doesn't know it is an
+ unreachable path.
+
+ However, it is not until dom2 that we "know" that the value of the
+ switch index is such that the path to arr[10] is unreachable. For
+ that matter, it is not until dom3 that we remove the unreachable
+ path.
+
+
+ See:
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83510
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83312
+
+ It's not until here that ranger "knows" that the path is
+ unreachable:
+
+ thread1
+ vrp1 <-- array bounds checking
+ dce2
+ stdarg
+ cdce
+ cselim
+ copyprop
+ ifcombine
+ mergephi3 <-- too late
+*/
extern int get_flag (void);
if (curbp->b_amark == (AMARK *)NULL)
curbp->b_amark = p;
else
- last->m_next = p; /* { dg-warning "dereference of NULL 'last'" } */
+ last->m_next = p; /* { dg-warning "dereference of NULL 'last'" "deref" { xfail *-*-* } } */
}
p->m_name = (char)c; /* { dg-bogus "leak of 'p'" "bogus leak" } */
/* { dg-do compile } */
-/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details" } */
+/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details -fdisable-tree-thread2 -fdisable-tree-thread3" } */
void foo (float **a, float **b, float *c, int n, int m, int l)
{
/* PR inline-asm/8832 */
/* { dg-do compile } */
-/* { dg-options "-O2 -dP" } */
+/* { dg-options "-O2 -dP -fdisable-tree-ethread -fdisable-tree-thread1 -fdisable-tree-thread2 -fdisable-tree-thread3 -fdisable-tree-thread4" } */
+
+/* Note: Threader will duplicate BBs and replace one conditional branch by an
+ unconditional one. */
/* Verify that GCC doesn't optimize
old style asm instructions. */
/* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -fdisable-tree-ethread" } */
+
+/* Note: Threader will collapse loop. */
typedef int int32_t __attribute__((mode (__SI__)));
/* { dg-do compile} */
-/* { dg-options "-O2 -fdump-tree-evrp" } */
+/* { dg-options "-O2 -fdump-tree-evrp -fdisable-tree-ethread" } */
char a[2];
/* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-profile_estimate -fno-finite-loops" } */
+/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-profile_estimate -fno-finite-loops -fdisable-tree-ethread" } */
+
+/* Note: Threader causes removal of for loop. */
extern int global;
extern int global2;
/* { dg-do compile { target { { { i?86-*-* x86_64-*-* } && lp64 } || { arm_thumb2 } } } } */
/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */
+// { dg-additional-options "-fdisable-tree-ethread" }
+
+/*
+Our new threader is threading things a bit too early, and causing the
+testcase in gcc.dg/shrink-wrap-loop.c to fail.
+
+ The gist is this BB inside a loop:
+
+ <bb 6> :
+ # p_2 = PHI <p2_6(D)(2), p_12(5)>
+ if (p_2 != 0B)
+ goto <bb 3>; [INV]
+ else
+ goto <bb 7>; [INV]
+
+Our threader can move this check outside of the loop (good). This is
+done before branch probabilities are calculated and causes the probs
+to be calculated as:
+
+<bb 2> [local count: 216361238]:
+ if (p2_6(D) != 0B)
+ goto <bb 7>; [54.59%]
+ else
+ goto <bb 6>; [45.41%]
+
+Logically this seems correct to me. A simple check outside of a loop
+should slightly but not overwhelmingly favor a non-zero value.
+
+Interestingly however, the old threader couldn't get this, but the IL
+ended up identical, albeit with different probabilities. What happens
+is that, because the old code could not thread this, the p2 != 0 check
+would remain inside the loop and probs would be calculated thusly:
+
+ <bb 6> [local count: 1073741824]:
+ # p_2 = PHI <p2_6(D)(2), p_12(5)>
+ if (p_2 != 0B)
+ goto <bb 3>; [94.50%]
+ else
+ goto <bb 7>; [5.50%]
+
+Then when the loop header copying pass ("ch") shuffled things around,
+the IL would end up identical to my early threader code, but with the
+probabilities would remain as 94.5/5.5.
+
+The above discrepancy causes the RTL ifcvt pass to generate different
+code, and by the time we get to the shrink wrapping pass, things look
+sufficiently different such that the legacy code can actually shrink
+wrap, whereas our new code does not.
+
+IMO, if the loop-ch pass moves conditionals outside of a loop, the
+probabilities should be adjusted, but that does mean the shrink wrap
+won't happen for this contrived testcase.
+ */
int foo (int *p1, int *p2);
/* { dg-do run } */
/* { dg-options "-O2 -foptimize-sibling-calls" } */
+/* See note in recurser_void() as to why we disable threading. */
+/* { dg-additional-options "-fdisable-tree-thread1" } */
+
/* The option -foptimize-sibling-calls is the default, but serves as
marker. Self-recursion tail calls are optimized for all targets,
regardless of presence of sibcall patterns. */
void
recurser_void (int n)
{
+ /* In some architectures like ppc64*, jump threading may thread
+ paths such that there are two calls into track(), one for
+ track(0) and one for track(7). The track(7) call can be
+ transformed into a jump instead of a call, which means that
+ different calls into track() may end up with a different
+ &stackpos. This is the reason we disable jump threading for this
+ test. */
if (n == 0 || n == 7)
track (n);
extern void string_eq_0_fail ();
extern void string_gt_0_fail ();
-void test_string (char *d, const char *s)
+void test_string_eq_min (char *d, const char *s)
{
int n = __builtin_sprintf (d, "%-s", s);
or INT_MAX. (This is a white box test based on knowing that
the optimization computes its own values of the two constants.) */
if (n == INT_MIN) string_eq_min_fail ();
+}
+
+void test_string_eq_max (char *d, const char *s)
+{
+ int n = __builtin_sprintf (d, "%-s", s);
+
if (n == INT_MAX) string_eq_max_fail ();
+}
+
+void test_string_lt_0 (char *d, const char *s)
+{
+ int n = __builtin_sprintf (d, "%-s", s);
/* The return value could be negative when strlen(s) is in excess
of 4095 (the maximum number of bytes a single directive is required
to handle). */
if (n < 0) string_lt_0_fail ();
+}
+
+void test_string_eq_0 (char *d, const char *s)
+{
+ int n = __builtin_sprintf (d, "%-s", s);
+
if (n == 0) string_eq_0_fail ();
+}
+
+void test_string_gt_0 (char *d, const char *s)
+{
+ int n = __builtin_sprintf (d, "%-s", s);
+
if (n > 0) string_gt_0_fail ();
}
/* { dg-do compile } */
/* { dg-options "-O2 -fno-tree-dominator-opts -fno-tree-fre -fdisable-tree-evrp -fdump-tree-vrp1-details" } */
+/* { dg-additional-options "-fdisable-tree-ethread -fdisable-tree-thread1" } */
int
foo (int a)
/* { dg-do compile } */
/* { dg-options "-O2 -fno-tree-dominator-opts -fdisable-tree-evrp -fdump-tree-vrp1-details" } */
+/* { dg-additional-options "-fdisable-tree-ethread -fdisable-tree-thread1" } */
struct f {
int i;
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-thread4-details" } */
+/* { dg-options "-O2 -fdisable-tree-thread3 -fdump-tree-thread4-details" } */
struct tree_common
{
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-evrp-details" } */
+/* { dg-options "-O2 -fdump-tree-evrp-details -fdisable-tree-ethread" } */
extern void g (void);
extern void bar (int);
Make sure VRP folds the second "if" statement. */
/* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-dominator-opts -fdisable-tree-evrp -fdump-tree-vrp1-details" } */
+/* { dg-options "-O2 -fno-tree-dominator-opts -fdisable-tree-evrp -fdump-tree-vrp1-details -fdisable-tree-ethread -fdisable-tree-thread1" } */
int
foo (int a)
/* PR tree-optimization/49039 */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1" } */
+/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fdisable-tree-ethread -fdisable-tree-thread1" } */
extern void bar (void);
/* PR tree-optimization/61839. */
/* { dg-do run } */
-/* { dg-options "-O2 -fdump-tree-vrp1 -fdisable-tree-evrp -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-vrp1 -fdisable-tree-evrp -fdump-tree-optimized -fdisable-tree-ethread -fdisable-tree-thread1" } */
/* { dg-require-effective-target int32plus } */
__attribute__ ((noinline))
/* PR tree-optimization/61839. */
/* { dg-do run } */
-/* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-optimized -fdisable-tree-ethread -fdisable-tree-thread1" } */
__attribute__ ((noinline))
int foo (int a, unsigned b)
to change decisions in switch expansion which in turn can expose new
jump threading opportunities. Skip the later tests on aarch64. */
/* { dg-final { scan-tree-dump "Jumps threaded: 1\[1-9\]" "thread1" } } */
-/* { dg-final { scan-tree-dump-times "Invalid sum" 3 "thread1" } } */
+/* { dg-final { scan-tree-dump-times "Invalid sum" 4 "thread1" } } */
/* { dg-final { scan-tree-dump-not "optimizing for size" "thread1" } } */
/* { dg-final { scan-tree-dump-not "optimizing for size" "thread2" } } */
/* { dg-final { scan-tree-dump-not "optimizing for size" "thread3" { target { ! aarch64*-*-* } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-thread1-details --param logical-op-non-short-circuit=1" } */
+
+// Copied from ssa-dom-thread-11.c
+
+static int *bb_ticks;
+extern void frob (void);
+void
+mark_target_live_regs (int b, int block, int bb_tick)
+{
+ if (b == block && b != -1 && bb_tick == bb_ticks[b])
+ return;
+ if (b != -1)
+ frob ();
+}
+
+/* When the first two conditionals in the first IF are true, but
+ the third conditional is false, then there's a jump threading
+ opportunity to bypass the second IF statement. */
+/* { dg-final { scan-tree-dump-times "Registering.*jump thread" 1 "thread1"} } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-thread2-details -w" } */
+
+// Copied from ssa-dom-thread-12.c.
+
+typedef long unsigned int size_t;
+union tree_node;
+typedef union tree_node *tree;
+typedef union gimple_statement_d *gimple;
+typedef const union gimple_statement_d *const_gimple;
+union gimple_statement_d
+{
+ unsigned num_ops;
+ tree exp;
+};
+
+unsigned int x;
+static inline tree
+gimple_op (const_gimple gs, unsigned i)
+{
+ if (!(i < gs->num_ops))
+ abort ();
+ return gs->exp;
+}
+
+unsigned char
+scan_function (gimple stmt)
+{
+ unsigned i;
+ for (i = 0; i < stmt->num_ops - 3 ; i++)
+ gimple_call_arg (stmt, i);
+ gimple_op (stmt, 1);
+}
+
+/* The test which bypasses the loop is simplified prior to DOM to check
+ that stmt->num_ops - 3 != 0. When that test is false, we can derive
+ a value for stmt->num_ops. That in turn allows us to thread the jump
+ for the conditional at the start of the call to gimple_op. */
+/* { dg-final { scan-tree-dump-times "Registering.*jump thread" 1 "thread2"} } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ethread-details -w --param logical-op-non-short-circuit=1" } */
+
+// Copied from ssa-dom-thread-14.c
+
+enum optab_methods
+{
+ OPTAB_DIRECT,
+ OPTAB_LIB,
+ OPTAB_WIDEN,
+ OPTAB_LIB_WIDEN,
+ OPTAB_MUST_WIDEN
+};
+struct optab_d { };
+typedef struct optab_d *optab;
+void
+expand_shift_1 (int code, int unsignedp, int rotate,
+ optab lshift_optab, optab rshift_arith_optab)
+{
+ int left = (code == 42 || code == 0xde);
+ int attempt;
+ enum optab_methods methods;
+ if (attempt == 0)
+ methods = OPTAB_DIRECT;
+ else if (attempt == 1)
+ methods = OPTAB_WIDEN;
+ if ((!unsignedp || (!left && methods == OPTAB_WIDEN)))
+ {
+ enum optab_methods methods1 = methods;
+ if (unsignedp)
+ methods1 = OPTAB_MUST_WIDEN;
+ expand_binop (left ? lshift_optab : rshift_arith_optab,
+ unsignedp, methods1);
+ }
+}
+
+/* When UNSIGNEDP is true, LEFT is false and METHOD == OPTAB_WIDEN
+ we will enter the TRUE arm of the conditional and we can thread
+ the test to compute the first first argument of the expand_binop
+ call if we look backwards through the boolean logicals. */
+/* { dg-final { scan-tree-dump-times "Registering.*jump thread" 1 "ethread"} } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -fdump-tree-vrp-details -fdump-tree-thread1-details --param logical-op-non-short-circuit=1" } */
+/* { dg-final { scan-tree-dump-times "Registering FSM jump" 8 "thread1" } } */
+
+/* Copied from ssa-thread-14. */
+
+void foo (void);
+void bar (void);
+void blah (void);
+
+/* One jump threaded here. */
+
+void
+baz_1 (int a, int b, int c)
+{
+ if (a && b)
+ foo ();
+ if (!b && c)
+ bar ();
+}
+
+/* One jump threaded here. */
+
+void
+baz_2 (int a, int b, int c)
+{
+ if (a && b)
+ foo ();
+ if (b || c)
+ bar ();
+}
+
+/* One jump threaded here. */
+
+void
+baz_3 (int a, int b, int c)
+{
+ if (a && b > 10)
+ foo ();
+ if (b < 5 && c)
+ bar ();
+}
+
+/* Two jumps threaded here. */
+
+void
+baz_4 (int a, int b, int c)
+{
+ if (a && b)
+ {
+ foo ();
+ if (c)
+ bar ();
+ }
+ if (b && c)
+ blah ();
+}
+
+/* Two jumps threaded here. */
+
+void
+baz_5 (int a, int b, int c)
+{
+ if (a && b)
+ {
+ foo ();
+ if (c)
+ bar ();
+ }
+ if (!b || !c)
+ blah ();
+}
+
+/* One jump threaded here. */
+
+void
+baz_6 (int a, int b, int c)
+{
+ if (a == 39 && b == 41)
+ foo ();
+ if (c == 12 || b == 41)
+ bar ();
+}
--- /dev/null
+// { dg-do compile }
+// { dg-options "-fgimple -O2 -fdump-tree-thread1-details" }
+
+/* This tests that we can thread BB4->BB999 coming in through the
+ following path:
+
+ latch many insns
+ | |
+ V V
+ 6 -> 7 -> 3 -> 4 -> 999
+
+ The ranger based threader cannot thread this because BB4 has too
+ many instructions so it gives up looking back. However, if we were
+ able to looking further, we would notice that a profitable path
+ passing through the loop latch (BB7) exists.
+
+ That is, 3->4->N in isolation is not profitable, but 6->7->3->4->N is.
+
+ It is not clear whether handling this case in the backwards
+ threader is profitable, as it would increase the search space
+ considerably. The test is being added to note a regression from
+ the old backward threader code.
+
+ This test has been distilled from libphobos/src/std/net/isemail.d.
+
+ The ranger threader stops at the 3->4 subpath with: "did not thread
+ around loop and would copy too many statements". */
+
+
+extern void bar();
+extern int random();
+
+int __GIMPLE (ssa,startwith("thread1"))
+foo (int key)
+{
+ int context;
+ int _1454;
+
+ __BB(2):
+ goto __BB3;
+
+ // Loop header.
+ __BB(3):
+ context_448 = __PHI (__BB2: 0, __BB7: context_450);
+ if (key_5(D) > 0)
+ goto __BB999;
+ else
+ goto __BB4;
+
+ __BB(4):
+ bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar();
+ bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar();
+ bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar();
+ bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar();
+ bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar();
+ bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar(); bar();
+ switch (context_448) {default: L5; case 0: L999; }
+
+ __BB(5):
+ L5:
+ goto __BB6;
+
+ __BB(6):
+ context_450 = __PHI (__BB5: 0);
+ _1454 = random ();
+ if (_1454 > 0)
+ goto __BB999;
+ else
+ goto __BB7;
+
+ // Loop latch.
+ __BB(7):
+ goto __BB3;
+
+ __BB(999):
+ L999:
+ return 5;
+}
+
+// { dg-final { scan-tree-dump-times "Registering.*jump thread.*incoming edge; \\(6, 7\\) \\(7, 3\\) \\(3, 4\\) \\(4, 999\\) nocopy" 1 "thread1" { xfail *-*-* } } }
/* { dg-do compile } */
-/* { dg-options "-O2 -fsplit-paths -fdump-tree-split-paths-details -w" } */
+/* { dg-options "-O2 -fsplit-paths -fdump-tree-split-paths-details -w -fdisable-tree-thread1 -fdisable-tree-thread2" } */
+
+/* Note: Threader causes the infinite loop in val & 1 sooner. */
powi_cost (long n)
{
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-dom2-details --param logical-op-non-short-circuit=1" } */
+/* { dg-options "-O2 -fdump-tree-dom2-details --param logical-op-non-short-circuit=1 -fdisable-tree-thread1 -fdisable-tree-thread2" } */
static int *bb_ticks;
extern void frob (void);
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-dom2-details -w" } */
+/* { dg-options "-O2 -fdump-tree-dom2-details -w -fdisable-tree-thread2" } */
typedef long unsigned int size_t;
union tree_node;
typedef union tree_node *tree;
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-dom2-details -w --param logical-op-non-short-circuit=1" } */
+/* { dg-additional-options "-fdisable-tree-thread1 -fdisable-tree-ethread -fdisable-tree-thread2" } */
enum optab_methods
{
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-vrp1-details -fdump-tree-dom2-details -std=gnu89 --param logical-op-non-short-circuit=0" } */
+/* { dg-options "-O2 -fdump-tree-vrp1-details -fdump-tree-thread1-details -std=gnu89 --param logical-op-non-short-circuit=0" } */
#include "ssa-dom-thread-4.c"
condition.
All the cases are picked up by VRP1 as jump threads. */
-/* { dg-final { scan-tree-dump-times "Threaded" 4 "vrp1" } } */
+/* { dg-final { scan-tree-dump-times "Registering FSM jump" 6 "thread1" } } */
+/* { dg-final { scan-tree-dump-times "Threaded" 2 "vrp1" } } */
SWITCH_BB -> BBx -> BBy -> BBz -> PHI
We now know the value of the switch index at PHI. */
-/* { dg-final { scan-tree-dump-times "FSM" 6 "thread1" } } */
-/* { dg-final { scan-tree-dump-times "FSM" 1 "thread2" } } */
+/* { dg-final { scan-tree-dump-times "Registering FSM jump" 6 "thread1" } } */
+/* { dg-final { scan-tree-dump-times "Registering FSM jump" 1 "thread2" } } */
int sum0, sum1, sum2, sum3;
int foo (char *s, char **ret)
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-thread2-stats -fdump-tree-dom2-stats -fdump-tree-thread3-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats -fno-guess-branch-probability" } */
+/* { dg-additional-options "--param=threader-mode=legacy" } */
/* Here we have the same issue as was commented in ssa-dom-thread-6.c.
The PHI coming into the threader has a lot more constants, so the
/* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-fre1-details" } */
+/* { dg-options "-O -fdump-tree-fre1-details -fdisable-tree-ethread" } */
int foo (int i)
{
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-vrp2-details --param logical-op-non-short-circuit=1" } */
+/* { dg-additional-options "-fdisable-tree-ethread -fdisable-tree-thread1 -fdisable-tree-thread2" } */
/* { dg-final { scan-tree-dump-not "IRREDUCIBLE_LOOP" "vrp2" } } */
void abort (void);
/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details -fno-finite-loops --param early-inlining-insns=14 -fno-inline-functions" } */
/* { dg-final { scan-tree-dump "FSM" "thread2" } } */
/* { dg-final { scan-tree-dump "FSM" "thread3" } } */
-/* { dg-final { scan-tree-dump "FSM" "thread4" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "FSM" "thread4" } } */
typedef struct bitmap_head_def *bitmap;
typedef const struct bitmap_head_def *const_bitmap;
/* { dg-do compile } */
/* { dg-additional-options "-O2 -fdump-tree-vrp-details --param logical-op-non-short-circuit=1" } */
+/* { dg-additional-options "-fdisable-tree-thread1" } */
/* { dg-final { scan-tree-dump-times "Threaded jump" 8 "vrp1" } } */
void foo (void);
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-vrp1 -fdelete-null-pointer-checks -fdisable-tree-evrp" } */
+/* { dg-options "-O2 -fdump-tree-vrp1 -fdelete-null-pointer-checks -fdisable-tree-evrp -fdisable-tree-ethread -fdisable-tree-thread1" } */
struct A
{
/* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1" } */
+/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fdisable-tree-ethread -fdisable-tree-thread1" } */
struct A
{
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-vrp1 -fno-early-inlining" } */
+/* { dg-options "-O2 -fdump-tree-vrp1 -fno-early-inlining -fdisable-tree-ethread -fdisable-tree-thread1" } */
inline int ten()
/* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1" } */
+/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fdisable-tree-ethread -fdisable-tree-thread1" } */
int baz (void);
/* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-fre -fdisable-tree-evrp -fdump-tree-vrp1-details -fdelete-null-pointer-checks" } */
+/* { dg-options "-O2 -fno-tree-fre -fdisable-tree-evrp -fdump-tree-vrp1-details -fdelete-null-pointer-checks -fdisable-tree-ethread -fdisable-tree-thread1" } */
int
foo (int i, int *p)
/* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-fre -fdisable-tree-evrp -fdump-tree-vrp1 -std=gnu89" } */
+/* { dg-options "-O2 -fno-tree-fre -fdisable-tree-evrp -fdump-tree-vrp1 -std=gnu89 -fdisable-tree-ethread -fdisable-tree-thread1" } */
foo (int *p)
{
/* { dg-do compile } */
-/* { dg-options "-fwrapv -O1 -ftree-vrp -fdisable-tree-evrp -fdump-tree-vrp1" } */
+/* { dg-options "-fwrapv -O1 -ftree-vrp -fdisable-tree-evrp -fdump-tree-vrp1 -fdisable-tree-ethread -fdisable-tree-thread1" } */
#include <limits.h>
extern void abort ();
/* { dg-do compile } */
-/* { dg-options "-fwrapv -O1 -fno-tree-fre -fdisable-tree-evrp -ftree-vrp -fdump-tree-vrp1" } */
+/* { dg-options "-fwrapv -O1 -fno-tree-fre -fdisable-tree-evrp -ftree-vrp -fdump-tree-vrp1 -fdisable-tree-ethread -fdisable-tree-thread1" } */
extern void abort ();
extern void exit (int);
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-vrp1 -fno-tree-fre -fdisable-tree-evrp" } */
+/* { dg-options "-O2 -fdump-tree-vrp1 -fno-tree-fre -fdisable-tree-evrp -fdisable-tree-ethread -fdisable-tree-thread1" } */
/* This is from PR14052. */
w = __LINE__;
if (x || y || a)
- p = w; // { dg-bogus "-Wmaybe-uninitialized" "pr61112" { xfail *-*-* } }
+ p = w;
}
void foo_c5_1_2 (int x, int y, int z, int a)
w = __LINE__;
if (x || a || y)
- p = w; // { dg-bogus "-Wmaybe-uninitialized" "pr61112" { xfail *-*-* } }
+ p = w;
}
void foo_c5_1_3 (int x, int y, int z, int a)
w = __LINE__;
if (a || x || y)
- p = w; // { dg-bogus "-Wmaybe-uninitialized" "pr61112" { xfail *-*-* } }
+ p = w;
}
void foo_c5_2 (int x, int y, int z, int a)
/* { dg-do compile } */
/* { dg-options "-Wuninitialized -O2" } */
+/* { dg-xfail-if "threading shuffles things around" { ppc64*-*-* } } */
int g;
void bar();
/* { dg-require-effective-target vect_int } */
+/* See note below as to why we disable threading. */
+/* { dg-additional-options "-fdisable-tree-thread1" } */
+
#include <stdarg.h>
#include "tree-vect.h"
*pout++ = *pin++ + a;
*pout++ = *pin++ + a;
*pout++ = *pin++ + a;
+ /* In some architectures like ppc64, jump threading may thread
+ the iteration where i==0 such that we no longer optimize the
+ BB. Another alternative to disable jump threading would be
+ to wrap the read from `i' into a function returning i. */
if (arr[i] = i)
a = i;
else
/* { dg-do run } */
/* { dg-require-effective-target avx2 } */
-/* { dg-options "-mavx2 -O3 -fopenmp-simd -fdump-tree-vect-details" } */
+/* { dg-options "-mavx2 -O3 -fopenmp-simd -fdump-tree-vect-details -fdisable-tree-thread1" } */
#include "avx2-check.h"
#define N 64
#include "tree-phinodes.h"
#include "tree-inline.h"
#include "tree-vectorizer.h"
+#include "value-range.h"
+#include "gimple-range.h"
+#include "tree-ssa-threadedge.h"
+#include "gimple-range-path.h"
+#include "ssa.h"
+#include "tree-cfgcleanup.h"
// Path registry for the backwards threader. After all paths have been
// registered with register_path(), thread_through_all_blocks() is called
const bool m_speed_p;
};
+// Ranger based backwards threader.
+
+class back_threader
+{
+ // Temporary until we remove old code.
+ friend bool path_is_unreachable_p (const vec<jump_thread_edge *> &);
+
+public:
+ back_threader (back_threader_profitability &, back_threader_registry &);
+ ~back_threader ();
+ void find_paths (basic_block bb, tree name);
+
+private:
+ void maybe_register_path (edge taken_edge);
+ bool find_paths_to_names (basic_block bb, bitmap imports);
+ bool resolve_def (tree name, bitmap interesting, vec<tree> worklist);
+ bool resolve_phi (gphi *phi, bitmap imports);
+ edge find_taken_edge (const vec<basic_block> &path);
+ edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
+ edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
+
+ back_threader_registry &m_registry;
+ back_threader_profitability &m_profit;
+ gimple_ranger m_ranger;
+ path_range_query m_solver;
+
+ // Current path being analyzed.
+ auto_vec<basic_block> m_path;
+ // Hash to mark visited BBs while analyzing a path.
+ hash_set<basic_block> m_visited_bbs;
+ // The set of SSA names, any of which could potentially change the
+ // value of the final conditional in a path.
+ bitmap m_imports;
+ // The last statement in the path.
+ gimple *m_last_stmt;
+ // This is a bit of a wart. It's used to pass the LHS SSA name to
+ // the profitability engine.
+ tree m_name;
+ // Marker to differentiate unreachable edges.
+ static const edge UNREACHABLE_EDGE;
+};
+
+// Used to differentiate unreachable edges, so we may stop the search
+// in a the given direction.
+const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
+
+back_threader::back_threader (back_threader_profitability &profit,
+ back_threader_registry ®istry)
+ : m_registry (registry),
+ m_profit (profit),
+ m_solver (m_ranger)
+{
+ m_last_stmt = NULL;
+ m_imports = BITMAP_ALLOC (NULL);
+}
+
+back_threader::~back_threader ()
+{
+ m_path.release ();
+ BITMAP_FREE (m_imports);
+}
+
+// Register the current path for jump threading if it's profitable to
+// do so. TAKEN_EDGE is the known edge out of the path.
+
+void
+back_threader::maybe_register_path (edge taken_edge)
+{
+ bool irreducible = false;
+ bool profitable
+ = m_profit.profitable_path_p (m_path, m_name, taken_edge, &irreducible);
+
+ if (profitable)
+ {
+ m_registry.register_path (m_path, taken_edge);
+
+ if (irreducible)
+ vect_free_loop_info_assumptions (m_path[0]->loop_father);
+ }
+}
+
+// Return the known taken edge out of a path. If the path can be
+// determined to be unreachable, return UNREACHABLE_EDGE. If no
+// outgoing edge can be calculated, return NULL.
+
+edge
+back_threader::find_taken_edge (const vec<basic_block> &path)
+{
+ gcc_checking_assert (path.length () > 1);
+ switch (gimple_code (m_last_stmt))
+ {
+ case GIMPLE_COND:
+ return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
+
+ case GIMPLE_SWITCH:
+ return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
+
+ default:
+ return NULL;
+ }
+}
+
+// Same as find_taken_edge, but for paths ending in a switch.
+
+edge
+back_threader::find_taken_edge_switch (const vec<basic_block> &path,
+ gswitch *sw)
+{
+ tree name = gimple_switch_index (sw);
+ int_range_max r;
+
+ m_solver.precompute_ranges (path, m_imports);
+ m_solver.range_of_expr (r, name, sw);
+
+ if (r.undefined_p ())
+ return UNREACHABLE_EDGE;
+
+ if (r.varying_p ())
+ return NULL;
+
+ tree val;
+ if (r.singleton_p (&val))
+ return ::find_taken_edge (gimple_bb (sw), val);
+
+ return NULL;
+}
+
+// Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
+
+edge
+back_threader::find_taken_edge_cond (const vec<basic_block> &path,
+ gcond *cond)
+{
+ m_solver.precompute_ranges (path, m_imports);
+
+ // Check if either operand is unreachable since this knowledge could
+ // help the caller cut down the search space.
+ int_range_max r;
+ m_solver.range_of_expr (r, gimple_cond_lhs (cond));
+ if (r.undefined_p ())
+ return UNREACHABLE_EDGE;
+ m_solver.range_of_expr (r, gimple_cond_rhs (cond));
+ if (r.undefined_p ())
+ return UNREACHABLE_EDGE;
+
+ m_solver.range_of_stmt (r, cond);
+
+ int_range<2> true_range (boolean_true_node, boolean_true_node);
+ int_range<2> false_range (boolean_false_node, boolean_false_node);
+
+ if (r == true_range || r == false_range)
+ {
+ edge e_true, e_false;
+ basic_block bb = gimple_bb (cond);
+ extract_true_false_edges_from_block (bb, &e_true, &e_false);
+ return r == true_range ? e_true : e_false;
+ }
+ return NULL;
+}
+
+// Populate a vector of trees from a bitmap.
+
+static inline void
+populate_worklist (vec<tree> worklist, bitmap bits)
+{
+ bitmap_iterator bi;
+ unsigned i;
+
+ EXECUTE_IF_SET_IN_BITMAP (bits, 0, i, bi)
+ {
+ tree name = ssa_name (i);
+ worklist.quick_push (name);
+ }
+}
+
+// If taking any of the incoming edges to a PHI causes the final
+// conditional of the current path to be constant, register the
+// path(s), and return TRUE.
+
+bool
+back_threader::resolve_phi (gphi *phi, bitmap interesting)
+{
+ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (phi)))
+ return true;
+
+ bool done = false;
+ for (size_t i = 0; i < gimple_phi_num_args (phi); ++i)
+ {
+ edge e = gimple_phi_arg_edge (phi, i);
+
+ // This is like path_crosses_loops in profitable_path_p but more
+ // restrictive, since profitable_path_p allows threading the
+ // first block because it would be redirected anyhow.
+ //
+ // If we loosened the restriction and used profitable_path_p()
+ // here instead, we would peel off the first iterations of loops
+ // in places like tree-ssa/pr14341.c.
+ bool profitable_p = m_path[0]->loop_father == e->src->loop_father;
+ if (!profitable_p)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ " FAIL: path through PHI in bb%d (incoming bb:%d) crosses loop\n",
+ e->dest->index, e->src->index);
+ continue;
+ }
+
+ tree arg = gimple_phi_arg_def (phi, i);
+ if (TREE_CODE (arg) == SSA_NAME)
+ {
+ unsigned v = SSA_NAME_VERSION (arg);
+
+ // Avoid loops as in: x_5 = PHI <x_5(2), ...>.
+ if (bitmap_bit_p (interesting, v))
+ continue;
+
+ bitmap_set_bit (interesting, v);
+ bitmap_set_bit (m_imports, v);
+ done |= find_paths_to_names (e->src, interesting);
+ bitmap_clear_bit (interesting, v);
+ }
+ else if (TREE_CODE (arg) == INTEGER_CST)
+ {
+ m_path.safe_push (e->src);
+ edge taken_edge = find_taken_edge (m_path);
+ if (taken_edge && taken_edge != UNREACHABLE_EDGE)
+ {
+ maybe_register_path (taken_edge);
+ done = true;
+ }
+ m_path.pop ();
+ }
+ }
+ return done;
+}
+
+// If the definition of NAME causes the final conditional of the
+// current path to be constant, register the path, and return TRUE.
+
+bool
+back_threader::resolve_def (tree name, bitmap interesting, vec<tree> worklist)
+{
+ gimple *def_stmt = SSA_NAME_DEF_STMT (name);
+
+ // Handle PHIs.
+ if (is_a<gphi *> (def_stmt)
+ && resolve_phi (as_a<gphi *> (def_stmt), interesting))
+ return true;
+
+ // Defer copies of SSAs by adding the source to the worklist.
+ if (gimple_assign_single_p (def_stmt)
+ && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
+ {
+ tree rhs = gimple_assign_rhs1 (def_stmt);
+ bitmap_set_bit (m_imports, SSA_NAME_VERSION (rhs));
+ bitmap_set_bit (interesting, SSA_NAME_VERSION (rhs));
+ worklist.safe_push (rhs);
+ }
+ return false;
+}
+
+// Find jump threading paths to any of the SSA names in the
+// INTERESTING bitmap, and register any such paths.
+//
+// Return TRUE if no further processing past this block is necessary.
+// This is because we've either registered a path, or because there is
+// nothing of interesting beyond this block.
+//
+// BB is the current path being processed.
+
+bool
+back_threader::find_paths_to_names (basic_block bb, bitmap interesting)
+{
+ if (m_visited_bbs.add (bb))
+ return true;
+
+ m_path.safe_push (bb);
+
+ if (m_path.length () > 1
+ && !m_profit.profitable_path_p (m_path, m_name, NULL))
+ {
+ m_path.pop ();
+ m_visited_bbs.remove (bb);
+ return false;
+ }
+
+ auto_bitmap processed;
+ unsigned i;
+ bool done = false;
+
+ // We use a worklist instead of iterating through the bitmap,
+ // because we may add new items in-flight.
+ auto_vec<tree> worklist (bitmap_count_bits (interesting));
+ populate_worklist (worklist, interesting);
+ while (!worklist.is_empty ())
+ {
+ tree name = worklist.pop ();
+ unsigned i = SSA_NAME_VERSION (name);
+ basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
+
+ // Process any names defined in this block.
+ if (def_bb == bb)
+ {
+ bitmap_set_bit (processed, i);
+
+ if (resolve_def (name, interesting, worklist))
+ {
+ done = true;
+ goto leave_bb;
+ }
+ }
+ // Examine blocks that define or export an interesting SSA,
+ // since they may compute a range which resolve this path.
+ if ((def_bb == bb
+ || bitmap_bit_p (m_ranger.gori ().exports (bb), i))
+ && m_path.length () > 1)
+ {
+ edge taken_edge = find_taken_edge (m_path);
+ if (taken_edge)
+ {
+ if (taken_edge != UNREACHABLE_EDGE)
+ maybe_register_path (taken_edge);
+
+ done = true;
+ goto leave_bb;
+ }
+ }
+ }
+
+ // If there are interesting names not yet processed, keep looking.
+ bitmap_and_compl_into (interesting, processed);
+ if (!bitmap_empty_p (interesting))
+ {
+ edge_iterator iter;
+ edge e;
+ FOR_EACH_EDGE (e, iter, bb->preds)
+ if ((e->flags & EDGE_ABNORMAL) == 0)
+ done |= find_paths_to_names (e->src, interesting);
+ }
+
+ leave_bb:
+ bitmap_iterator bi;
+ EXECUTE_IF_SET_IN_BITMAP (processed, 0, i, bi)
+ bitmap_set_bit (interesting, i);
+
+ m_path.pop ();
+ m_visited_bbs.remove (bb);
+ return done;
+}
+
+// Search backwards from BB looking for paths where the final
+// conditional out of BB can be determined. NAME is the LHS of the
+// final conditional. Register such paths for jump threading.
+
+void
+back_threader::find_paths (basic_block bb, tree name)
+{
+ gimple *stmt = last_stmt (bb);
+ if (!stmt
+ || (gimple_code (stmt) != GIMPLE_COND
+ && gimple_code (stmt) != GIMPLE_SWITCH))
+ return;
+
+ if (EDGE_COUNT (bb->succs) > 1
+ || single_succ_to_potentially_threadable_block (bb))
+ {
+ m_last_stmt = stmt;
+ m_visited_bbs.empty ();
+ m_path.truncate (0);
+ m_name = name;
+ bitmap_clear (m_imports);
+
+ auto_bitmap interesting;
+ bitmap_copy (m_imports, m_ranger.gori ().imports (bb));
+ bitmap_copy (interesting, m_imports);
+ find_paths_to_names (bb, interesting);
+ }
+}
+
+// Dump a sequence of BBs through the CFG.
+
+DEBUG_FUNCTION void
+dump_path (FILE *dump_file, const vec<basic_block> &path)
+{
+ for (size_t i = 0; i < path.length (); ++i)
+ {
+ fprintf (dump_file, "BB%d", path[i]->index);
+ if (i + 1 < path.length ())
+ fprintf (dump_file, " <- ");
+ }
+ fprintf (dump_file, "\n");
+}
+
+DEBUG_FUNCTION void
+debug (const vec <basic_block> &path)
+{
+ dump_path (stderr, path);
+}
+
class thread_jumps
{
public:
thread_jumps (bool speed_p = true)
- : m_profit (speed_p), m_registry (param_max_fsm_thread_paths)
+ : m_profit (speed_p),
+ m_registry (param_max_fsm_thread_paths),
+ m_back_threader (m_profit, m_registry)
{ }
void find_jump_threads_backwards (basic_block bb);
+ void find_jump_threads_backwards_with_ranger (basic_block bb);
bool thread_through_all_blocks ();
private:
tree m_name;
back_threader_profitability m_profit;
back_threader_registry m_registry;
+ back_threader m_back_threader;
};
// Perform the actual jump threading for the all queued paths.
EDGE_NO_COPY_SRC_BLOCK);
jump_thread_path->safe_push (x);
- m_lowlevel_registry.register_jump_thread (jump_thread_path);
- ++m_threaded_paths;
+ if (m_lowlevel_registry.register_jump_thread (jump_thread_path))
+ ++m_threaded_paths;
return true;
}
void
thread_jumps::find_jump_threads_backwards (basic_block bb)
{
+ if (param_threader_mode & THREADER_MODE_RANGER)
+ {
+ find_jump_threads_backwards_with_ranger (bb);
+ return;
+ }
+
gimple *stmt = get_gimple_control_stmt (bb);
if (!stmt)
return;
fsm_find_control_statement_thread_paths (name);
}
+// Like find_jump_threads_backwards(), but using ranger.
+
+void
+thread_jumps::find_jump_threads_backwards_with_ranger (basic_block bb)
+{
+ gimple *stmt = get_gimple_control_stmt (bb);
+ if (!stmt)
+ return;
+
+ enum gimple_code code = gimple_code (stmt);
+ tree name = NULL;
+ if (code == GIMPLE_SWITCH)
+ name = gimple_switch_index (as_a <gswitch *> (stmt));
+ else if (code == GIMPLE_GOTO)
+ name = gimple_goto_dest (stmt);
+ else if (code == GIMPLE_COND)
+ name = gimple_cond_lhs (stmt);
+
+ m_name = name;
+ m_back_threader.find_paths (bb, name);
+}
+
namespace {
const pass_data pass_data_thread_jumps =
return flag_expensive_optimizations;
}
+// Try to thread blocks in FUN. Return TRUE if any jump thread paths were
+// registered.
-unsigned int
-pass_thread_jumps::execute (function *fun)
+static bool
+try_thread_blocks (function *fun)
{
- loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
-
/* Try to thread each block with more than one successor. */
thread_jumps threader;
basic_block bb;
if (EDGE_COUNT (bb->succs) > 1)
threader.find_jump_threads_backwards (bb);
}
- bool changed = threader.thread_through_all_blocks ();
+ return threader.thread_through_all_blocks ();
+}
+
+unsigned int
+pass_thread_jumps::execute (function *fun)
+{
+ loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
+
+ // Iterative mode is a testing construct and is not meant for public
+ // consumption. It is OFF by default.
+ bool iterative = param_threader_iterative;
+
+ bool changed = false;
+ while (try_thread_blocks (fun))
+ {
+ changed = true;
+
+ if (!iterative)
+ break;
+
+ if ((param_threader_mode & THREADER_MODE_RANGER) == 0)
+ break;
+ cleanup_tree_cfg (TODO_update_ssa);
+ }
loop_optimizer_finalize ();
return changed ? TODO_cleanup_cfg : 0;
m_state->pop ();
}
+/* Return TRUE if BB has a single successor to a block with multiple
+ incoming and outgoing edges. */
+
+bool
+single_succ_to_potentially_threadable_block (basic_block bb)
+{
+ int flags = (EDGE_IGNORE | EDGE_COMPLEX | EDGE_ABNORMAL);
+ return (single_succ_p (bb)
+ && (single_succ_edge (bb)->flags & flags) == 0
+ && potentially_threadable_block (single_succ (bb)));
+}
+
/* Examine the outgoing edges from BB and conditionally
try to thread them. */
outgoing edges, then we may be able to thread the edge, i.e., we
may be able to statically determine which of the outgoing edges
will be traversed when the incoming edge from BB is traversed. */
- if (single_succ_p (bb)
- && (single_succ_edge (bb)->flags & flags) == 0
- && potentially_threadable_block (single_succ (bb)))
- {
- thread_across_edge (single_succ_edge (bb));
- }
+ if (single_succ_to_potentially_threadable_block (bb))
+ thread_across_edge (single_succ_edge (bb));
else if ((last = last_stmt (bb))
&& gimple_code (last) == GIMPLE_COND
&& EDGE_COUNT (bb->succs) == 2
};
extern void propagate_threaded_block_debug_into (basic_block, basic_block);
+extern bool single_succ_to_potentially_threadable_block (basic_block);
// ?? All this ssa_name_values stuff is the store of values for
// avail_exprs_stack and const_and_copies, so it really belongs in the
#include "dbgcnt.h"
#include "tree-cfg.h"
#include "tree-vectorizer.h"
+#include "tree-pass.h"
/* Given a block B, update the CFG and SSA graph to reflect redirecting
one or more in-edges to B to instead reach the destination of an
E is the edge we can thread, E2 is the new target edge, i.e., we
are effectively recording that E->dest can be changed to E2->dest
- after fixing the SSA graph. */
+ after fixing the SSA graph.
-void
+ Return TRUE if PATH was successfully threaded. */
+
+bool
jump_thread_path_registry::register_jump_thread (vec<jump_thread_edge *> *path)
{
if (!dbg_cnt (registered_jump_thread))
{
path->release ();
- return;
+ return false;
}
/* First make sure there are no NULL outgoing edges on the jump threading
}
path->release ();
- return;
+ return false;
}
/* Only the FSM threader is allowed to thread across
dump_jump_thread_path (dump_file, *path, true);
m_paths.safe_push (path);
+ return true;
}
/* Return how many uses of T there are within BB, as long as there
public:
jump_thread_path_registry ();
~jump_thread_path_registry ();
- void register_jump_thread (vec<jump_thread_edge *> *);
+ bool register_jump_thread (vec<jump_thread_edge *> *);
void remove_jump_threads_including (edge);
bool thread_through_all_blocks (bool);
jump_thread_edge *allocate_thread_edge (edge e, jump_thread_edge_type t);
/* Autopar with IF conditions. */
+/* { dg-additional-options "-fdisable-tree-thread1" } */
void abort();
+/* { dg-additional-options "-fdisable-tree-thread1" } */
+
#define N 1500
int x[N][N], y[N];