]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-threadbackward.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-ssa-threadbackward.cc
CommitLineData
0732f75f 1/* SSA Jump Threading
a945c346 2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
0732f75f
JL
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "predict.h"
25#include "tree.h"
26#include "gimple.h"
27#include "fold-const.h"
28#include "cfgloop.h"
29#include "gimple-iterator.h"
30#include "tree-cfg.h"
31#include "tree-ssa-threadupdate.h"
0732f75f
JL
32#include "tree-ssa-loop.h"
33#include "cfganal.h"
34#include "tree-pass.h"
8b2ef235
JL
35#include "gimple-ssa.h"
36#include "tree-phinodes.h"
27bddc4a 37#include "tree-inline.h"
0a4e5cf3 38#include "tree-vectorizer.h"
2e96b5f1
AH
39#include "value-range.h"
40#include "gimple-range.h"
41#include "tree-ssa-threadedge.h"
42#include "gimple-range-path.h"
43#include "ssa.h"
44#include "tree-cfgcleanup.h"
779275c0 45#include "tree-pretty-print.h"
01b50387 46#include "cfghooks.h"
bc5baac5 47#include "dbgcnt.h"
0732f75f 48
69e55442
AH
49// Path registry for the backwards threader. After all paths have been
50// registered with register_path(), thread_through_all_blocks() is called
51// to modify the CFG.
52
7f6c2258 53class back_threader_registry : public back_jt_path_registry
69e55442
AH
54{
55public:
69e55442 56 bool register_path (const vec<basic_block> &, edge taken);
69e55442
AH
57};
58
59// Class to abstract the profitability code for the backwards threader.
60
61class back_threader_profitability
62{
63public:
bac07a1d 64 back_threader_profitability (bool speed_p, gimple *stmt);
d45ddc2c 65 bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
bac07a1d
RB
66 bool profitable_path_p (const vec<basic_block> &,
67 edge taken, bool *irreducible_loop);
69e55442
AH
68private:
69 const bool m_speed_p;
bac07a1d
RB
70 int m_exit_jump_benefit;
71 bool m_threaded_multiway_branch;
72 // The following are computed by possibly_profitable_path_p
73 bool m_threaded_through_latch;
74 bool m_multiway_branch_in_path;
75 bool m_contains_hot_bb;
76 int m_n_insns;
69e55442
AH
77};
78
bac07a1d
RB
79back_threader_profitability::back_threader_profitability (bool speed_p,
80 gimple *last)
81 : m_speed_p (speed_p)
82{
83 m_threaded_multiway_branch = (gimple_code (last) == GIMPLE_SWITCH
84 || gimple_code (last) == GIMPLE_GOTO);
85 // The forward threader has estimate_threading_killed_stmts, in
86 // particular it estimates further DCE from eliminating the exit
87 // control stmt.
88 m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
89}
90
4e0f56d7
AH
91// Back threader flags.
92#define BT_NONE 0
93// Generate fast code at the expense of code size.
94#define BT_SPEED 1
95// Resolve unknown SSAs on entry to a threading path. If set, use the
96// ranger. If not, assume all ranges on entry to a path are VARYING.
97#define BT_RESOLVE 2
98
2e96b5f1
AH
99class back_threader
100{
2e96b5f1 101public:
bc5baac5 102 back_threader (function *fun, unsigned flags, bool first);
4e0f56d7
AH
103 ~back_threader ();
104 unsigned thread_blocks ();
2e96b5f1 105private:
4e0f56d7 106 void maybe_thread_block (basic_block bb);
bc5baac5 107 bool debug_counter ();
bac07a1d 108 edge maybe_register_path (back_threader_profitability &);
53080c5b 109 void maybe_register_path_dump (edge taken_edge);
bac07a1d
RB
110 void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
111 back_threader_profitability &);
2e96b5f1
AH
112 edge find_taken_edge (const vec<basic_block> &path);
113 edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
114 edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
779275c0
AH
115 virtual void debug ();
116 virtual void dump (FILE *out);
2e96b5f1 117
34cd97ff 118 back_threader_registry m_registry;
2e96b5f1
AH
119
120 // Current path being analyzed.
121 auto_vec<basic_block> m_path;
122 // Hash to mark visited BBs while analyzing a path.
123 hash_set<basic_block> m_visited_bbs;
124 // The set of SSA names, any of which could potentially change the
125 // value of the final conditional in a path.
d71e1be7 126 auto_bitmap m_imports;
2e96b5f1
AH
127 // The last statement in the path.
128 gimple *m_last_stmt;
2e96b5f1
AH
129 // Marker to differentiate unreachable edges.
130 static const edge UNREACHABLE_EDGE;
401aaa59
AH
131 // Set to TRUE if unknown SSA names along a path should be resolved
132 // with the ranger. Otherwise, unknown SSA names are assumed to be
5d4d64fa 133 // VARYING. Setting to true is more precise but slower.
4e0f56d7 134 function *m_fun;
011d0a03
AH
135 // Ranger for the path solver.
136 gimple_ranger *m_ranger;
4e0f56d7 137 unsigned m_flags;
bc5baac5
AH
138 // Set to TRUE for the first of each thread[12] pass or the first of
139 // each threadfull[12] pass. This is used to differentiate between
140 // the different threading passes so we can set up debug counters.
141 bool m_first;
2e96b5f1
AH
142};
143
144// Used to differentiate unreachable edges, so we may stop the search
145// in a the given direction.
146const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
147
bc5baac5 148back_threader::back_threader (function *fun, unsigned flags, bool first)
bac07a1d 149 : m_first (first)
2e96b5f1 150{
4e0f56d7
AH
151 if (flags & BT_SPEED)
152 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
153 else
154 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
155
156 m_fun = fun;
157 m_flags = flags;
2e96b5f1 158 m_last_stmt = NULL;
eb5ee646
AH
159
160 // The path solver needs EDGE_DFS_BACK in resolving mode.
161 if (flags & BT_RESOLVE)
162 mark_dfs_back_edges ();
011d0a03
AH
163
164 m_ranger = new gimple_ranger;
4e0f56d7
AH
165}
166
167back_threader::~back_threader ()
168{
011d0a03 169 delete m_ranger;
4e0f56d7 170 loop_optimizer_finalize ();
2e96b5f1
AH
171}
172
bc5baac5
AH
173// A wrapper for the various debug counters for the threading passes.
174// Returns TRUE if it's OK to register the current threading
175// candidate.
176
177bool
178back_threader::debug_counter ()
179{
180 // The ethread pass is mostly harmless ;-).
181 if ((m_flags & BT_SPEED) == 0)
182 return true;
183
184 if (m_flags & BT_RESOLVE)
185 {
186 if (m_first && !dbg_cnt (back_threadfull1))
187 return false;
188
189 if (!m_first && !dbg_cnt (back_threadfull2))
190 return false;
191 }
192 else
193 {
194 if (m_first && !dbg_cnt (back_thread1))
195 return false;
196
197 if (!m_first && !dbg_cnt (back_thread2))
198 return false;
199 }
200 return true;
201}
202
a2ab1a5a
AH
203static void
204dump_path (FILE *dump_file, const vec<basic_block> &path)
205{
206 for (unsigned i = path.length (); i > 0; --i)
207 {
208 basic_block bb = path[i - 1];
209 fprintf (dump_file, "%d", bb->index);
210 if (i > 1)
211 fprintf (dump_file, "->");
212 }
213}
214
53080c5b
AH
215// Dump details of an attempt to register a path.
216
217void
218back_threader::maybe_register_path_dump (edge taken)
219{
220 if (m_path.is_empty ())
221 return;
222
223 fprintf (dump_file, "path: ");
a2ab1a5a 224 dump_path (dump_file, m_path);
53080c5b
AH
225 fprintf (dump_file, "->");
226
227 if (taken == UNREACHABLE_EDGE)
228 fprintf (dump_file, "xx REJECTED (unreachable)\n");
229 else if (taken)
230 fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
231 else
232 fprintf (dump_file, "xx REJECTED\n");
233}
234
235// If an outgoing edge can be determined out of the current path,
236// register it for jump threading and return the taken edge.
cbeeadff 237//
2b59cf47
AH
238// Return NULL if it is unprofitable to thread this path, or the
239// outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
240// unreachable.
2e96b5f1 241
cbeeadff 242edge
bac07a1d 243back_threader::maybe_register_path (back_threader_profitability &profit)
2e96b5f1 244{
cbeeadff 245 edge taken_edge = find_taken_edge (m_path);
2e96b5f1 246
cbeeadff 247 if (taken_edge && taken_edge != UNREACHABLE_EDGE)
2e96b5f1 248 {
837be6c7
RB
249 bool irreducible = false;
250 if (profit.profitable_path_p (m_path, taken_edge, &irreducible)
251 && debug_counter ()
252 && m_registry.register_path (m_path, taken_edge))
cbeeadff 253 {
837be6c7
RB
254 if (irreducible)
255 vect_free_loop_info_assumptions (m_path[0]->loop_father);
2b59cf47
AH
256 }
257 else
837be6c7 258 taken_edge = NULL;
2e96b5f1 259 }
53080c5b
AH
260
261 if (dump_file && (dump_flags & TDF_DETAILS))
262 maybe_register_path_dump (taken_edge);
263
cbeeadff 264 return taken_edge;
2e96b5f1
AH
265}
266
267// Return the known taken edge out of a path. If the path can be
268// determined to be unreachable, return UNREACHABLE_EDGE. If no
269// outgoing edge can be calculated, return NULL.
270
271edge
272back_threader::find_taken_edge (const vec<basic_block> &path)
273{
274 gcc_checking_assert (path.length () > 1);
275 switch (gimple_code (m_last_stmt))
276 {
277 case GIMPLE_COND:
278 return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
279
280 case GIMPLE_SWITCH:
281 return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
282
283 default:
284 return NULL;
285 }
286}
287
288// Same as find_taken_edge, but for paths ending in a switch.
289
290edge
291back_threader::find_taken_edge_switch (const vec<basic_block> &path,
292 gswitch *sw)
293{
294 tree name = gimple_switch_index (sw);
295 int_range_max r;
296
011d0a03
AH
297 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
298 solver.range_of_expr (r, name, sw);
2e96b5f1
AH
299
300 if (r.undefined_p ())
301 return UNREACHABLE_EDGE;
302
303 if (r.varying_p ())
304 return NULL;
305
113dab2b
AH
306 tree label = find_case_label_range (sw, &r);
307 if (!label)
308 return NULL;
2e96b5f1 309
113dab2b 310 return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
2e96b5f1
AH
311}
312
313// Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
314
315edge
316back_threader::find_taken_edge_cond (const vec<basic_block> &path,
317 gcond *cond)
318{
2e96b5f1 319 int_range_max r;
2e96b5f1 320
011d0a03
AH
321 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
322 solver.range_of_stmt (r, cond);
2e96b5f1 323
011d0a03 324 if (solver.unreachable_path_p ())
90ef1535
AH
325 return UNREACHABLE_EDGE;
326
cb779afe
AH
327 int_range<2> true_range = range_true ();
328 int_range<2> false_range = range_false ();
2e96b5f1
AH
329
330 if (r == true_range || r == false_range)
331 {
332 edge e_true, e_false;
333 basic_block bb = gimple_bb (cond);
334 extract_true_false_edges_from_block (bb, &e_true, &e_false);
335 return r == true_range ? e_true : e_false;
336 }
337 return NULL;
338}
339
2e96b5f1
AH
340// Find jump threading paths to any of the SSA names in the
341// INTERESTING bitmap, and register any such paths.
342//
2e96b5f1 343// BB is the current path being processed.
409978d5
RB
344//
345// OVERALL_PATHS is the search space up to this block
2e96b5f1 346
98b212c1 347void
409978d5 348back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
bac07a1d
RB
349 unsigned overall_paths,
350 back_threader_profitability &profit)
2e96b5f1
AH
351{
352 if (m_visited_bbs.add (bb))
98b212c1 353 return;
2e96b5f1
AH
354
355 m_path.safe_push (bb);
356
bac07a1d
RB
357 // Try to resolve the path without looking back. Avoid resolving paths
358 // we know are large but are not (yet) recognized as Finite State Machine.
359 // ??? Ideally we'd explore the cheapest path to the loop backedge here,
360 // avoiding the exponential greedy search and only start that from there.
361 // Precomputing a path-size-to-immediate-dominator-of-successor for each
362 // edge might help here. Alternatively copying divergent control flow
363 // on the way to the backedge could be worthwhile.
364 bool large_non_fsm;
98b212c1 365 if (m_path.length () > 1
d45ddc2c 366 && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
bac07a1d
RB
367 || (!large_non_fsm
368 && maybe_register_path (profit))))
d86d81a4 369 ;
401aaa59 370
9594e04e
RB
371 // The backwards thread copier cannot copy blocks that do not belong
372 // to the same loop, so when the new source of the path entry no
373 // longer belongs to it we don't need to search further.
374 else if (m_path[0]->loop_father != bb->loop_father)
375 ;
376
409978d5
RB
377 // Continue looking for ways to extend the path but limit the
378 // search space along a branch
379 else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
380 <= (unsigned)param_max_jump_thread_paths)
2e96b5f1 381 {
d86d81a4
RB
382 // For further greedy searching we want to remove interesting
383 // names defined in BB but add ones on the PHI edges for the
16b013c9
RB
384 // respective edges and adding imports from those stmts.
385 // We do this by starting with all names
d86d81a4
RB
386 // not defined in BB as interesting, collecting a list of
387 // interesting PHIs in BB on the fly. Then we iterate over
388 // predecessor edges, adding interesting PHI edge defs to
389 // the set of interesting names to consider when processing it.
390 auto_bitmap new_interesting;
16b013c9 391 auto_vec<int, 16> new_imports;
d86d81a4
RB
392 auto_vec<gphi *, 4> interesting_phis;
393 bitmap_iterator bi;
394 unsigned i;
16b013c9 395 auto_vec<tree, 16> worklist;
d86d81a4 396 EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
2e96b5f1 397 {
d86d81a4
RB
398 tree name = ssa_name (i);
399 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
16b013c9 400 /* Imports remain interesting. */
d86d81a4 401 if (gimple_bb (def_stmt) != bb)
d86d81a4 402 {
16b013c9
RB
403 bitmap_set_bit (new_interesting, i);
404 continue;
405 }
406 worklist.quick_push (name);
407 while (!worklist.is_empty ())
408 {
409 tree name = worklist.pop ();
410 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
411 /* Newly discovered imports are interesting. */
412 if (gimple_bb (def_stmt) != bb)
413 {
414 bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
415 continue;
416 }
417 /* Local PHIs participate in renaming below. */
418 if (gphi *phi = dyn_cast<gphi *> (def_stmt))
419 {
420 tree res = gimple_phi_result (phi);
421 if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
422 interesting_phis.safe_push (phi);
423 }
424 /* For other local defs process their uses, amending
425 imports on the way. */
3cba5cd6 426 else
16b013c9
RB
427 {
428 tree ssa[3];
3cba5cd6
AM
429 unsigned lim = gimple_range_ssa_names (ssa, 3, def_stmt);
430 for (unsigned j = 0; j < lim; ++j)
16b013c9
RB
431 {
432 tree rhs = ssa[j];
433 if (rhs
16b013c9
RB
434 && bitmap_set_bit (m_imports,
435 SSA_NAME_VERSION (rhs)))
436 {
437 new_imports.safe_push (SSA_NAME_VERSION (rhs));
438 worklist.safe_push (rhs);
439 }
440 }
441 }
d86d81a4 442 }
2e96b5f1 443 }
d86d81a4
RB
444 if (!bitmap_empty_p (new_interesting)
445 || !interesting_phis.is_empty ())
98b212c1 446 {
16b013c9
RB
447 auto_vec<int, 4> unwind (interesting_phis.length ());
448 auto_vec<int, 4> imports_unwind (interesting_phis.length ());
98b212c1
AH
449 edge_iterator iter;
450 edge e;
451 FOR_EACH_EDGE (e, iter, bb->preds)
d86d81a4
RB
452 {
453 if (e->flags & EDGE_ABNORMAL
454 // This is like path_crosses_loops in profitable_path_p but
455 // more restrictive to avoid peeling off loop iterations (see
456 // tree-ssa/pr14341.c for an example).
457 // ??? Note this restriction only applied when visiting an
458 // interesting PHI with the former resolve_phi.
459 || (!interesting_phis.is_empty ()
460 && m_path[0]->loop_father != e->src->loop_father))
461 continue;
462 for (gphi *phi : interesting_phis)
463 {
464 tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
465 if (TREE_CODE (def) == SSA_NAME)
16b013c9
RB
466 {
467 int ver = SSA_NAME_VERSION (def);
468 if (bitmap_set_bit (new_interesting, ver))
469 {
470 if (bitmap_set_bit (m_imports, ver))
471 imports_unwind.quick_push (ver);
472 unwind.quick_push (ver);
473 }
474 }
d86d81a4 475 }
bac07a1d
RB
476 find_paths_to_names (e->src, new_interesting, overall_paths,
477 profit);
16b013c9
RB
478 // Restore new_interesting.
479 for (int def : unwind)
480 bitmap_clear_bit (new_interesting, def);
d86d81a4 481 unwind.truncate (0);
16b013c9
RB
482 // Restore and m_imports.
483 for (int def : imports_unwind)
484 bitmap_clear_bit (m_imports, def);
485 imports_unwind.truncate (0);
d86d81a4 486 }
98b212c1 487 }
16b013c9
RB
488 /* m_imports tracks all interesting names on the path, so when
489 backtracking we have to restore it. */
490 for (int j : new_imports)
491 bitmap_clear_bit (m_imports, j);
2e96b5f1 492 }
409978d5
RB
493 else if (dump_file && (dump_flags & TDF_DETAILS))
494 fprintf (dump_file, " FAIL: Search space limit %d reached.\n",
495 param_max_jump_thread_paths);
2e96b5f1 496
98b212c1 497 // Reset things to their original state.
2e96b5f1
AH
498 m_path.pop ();
499 m_visited_bbs.remove (bb);
2e96b5f1
AH
500}
501
34cd97ff
AH
502// Search backwards from BB looking for paths where the final
503// conditional maybe threaded to a successor block. Record such paths
504// for jump threading.
0732f75f 505
34cd97ff
AH
506void
507back_threader::maybe_thread_block (basic_block bb)
0732f75f 508{
bac07a1d
RB
509 if (EDGE_COUNT (bb->succs) <= 1)
510 return;
511
60bf26a4 512 gimple *stmt = *gsi_last_bb (bb);
34cd97ff
AH
513 if (!stmt)
514 return;
0732f75f 515
34cd97ff 516 enum gimple_code code = gimple_code (stmt);
d45ddc2c
RB
517 if (code != GIMPLE_SWITCH
518 && code != GIMPLE_COND)
bac07a1d 519 return;
34cd97ff 520
bac07a1d
RB
521 m_last_stmt = stmt;
522 m_visited_bbs.empty ();
523 m_path.truncate (0);
bac07a1d
RB
524
525 // We compute imports of the path during discovery starting
526 // just with names used in the conditional.
527 bitmap_clear (m_imports);
528 ssa_op_iter iter;
d45ddc2c 529 tree name;
bac07a1d
RB
530 FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
531 {
532 if (!gimple_range_ssa_p (name))
533 return;
534 bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
535 }
536
537 // Interesting is the set of imports we still not have see
538 // the definition of. So while imports only grow, the
539 // set of interesting defs dwindles and once empty we can
540 // stop searching.
541 auto_bitmap interesting;
542 bitmap_copy (interesting, m_imports);
543 back_threader_profitability profit (m_flags & BT_SPEED, stmt);
544 find_paths_to_names (bb, interesting, 1, profit);
34cd97ff
AH
545}
546
34cd97ff
AH
547DEBUG_FUNCTION void
548debug (const vec <basic_block> &path)
549{
550 dump_path (stderr, path);
a2ab1a5a 551 fputc ('\n', stderr);
0732f75f
JL
552}
553
779275c0
AH
554void
555back_threader::dump (FILE *out)
556{
779275c0
AH
557 fprintf (out, "\nCandidates for pre-computation:\n");
558 fprintf (out, "===================================\n");
559
560 bitmap_iterator bi;
561 unsigned i;
562
563 EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
564 {
565 tree name = ssa_name (i);
566 print_generic_expr (out, name, TDF_NONE);
567 fprintf (out, "\n");
568 }
569}
570
571void
572back_threader::debug ()
573{
574 dump (stderr);
575}
576
bac07a1d
RB
577/* Examine jump threading path PATH and return TRUE if it is possibly
578 profitable to thread it, otherwise return FALSE. If this function
579 returns TRUE profitable_path_p might not be satisfied but when
580 the path is extended it might be. In particular indicate in
581 *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
582 but would be OK if we extend the path to cover the loop backedge.
bb5e62d6 583
01b50387
AH
584 ?? It seems we should be able to loosen some of the restrictions in
585 this function after loop optimizations have run. */
69e55442
AH
586
587bool
bac07a1d 588back_threader_profitability::possibly_profitable_path_p
d45ddc2c 589 (const vec<basic_block> &m_path,
bac07a1d 590 bool *large_non_fsm)
bb5e62d6 591{
69e55442 592 gcc_checking_assert (!m_path.is_empty ());
ad071b2b 593
69e55442
AH
594 /* We can an empty path here (excluding the DEF block) when the
595 statement that makes a conditional generate a compile-time
596 constant result is in the same block as the conditional.
ad071b2b
JL
597
598 That's not really a jump threading opportunity, but instead is
599 simple cprop & simplification. We could handle it here if we
600 wanted by wiring up all the incoming edges. If we run this
601 early in IPA, that might be worth doing. For now we just
602 reject that case. */
69e55442
AH
603 if (m_path.length () <= 1)
604 return false;
ad071b2b 605
bb5e62d6 606 gimple_stmt_iterator gsi;
cde30fe0 607 loop_p loop = m_path[0]->loop_father;
bac07a1d
RB
608
609 // We recompute the following, when we rewrite possibly_profitable_path_p
610 // to work incrementally on added BBs we have to unwind them on backtracking
611 m_n_insns = 0;
612 m_threaded_through_latch = false;
613 m_multiway_branch_in_path = false;
614 m_contains_hot_bb = false;
0f0c2cc3
JH
615
616 if (dump_file && (dump_flags & TDF_DETAILS))
617 fprintf (dump_file, "Checking profitability of path (backwards): ");
bb5e62d6
JL
618
619 /* Count the number of instructions on the path: as these instructions
620 will have to be duplicated, we will not record the path if there
621 are too many instructions on the path. Also check that all the
622 blocks in the path belong to a single loop. */
cde30fe0 623 for (unsigned j = 0; j < m_path.length (); j++)
bb5e62d6 624 {
cde30fe0 625 basic_block bb = m_path[j];
bb5e62d6 626
0f0c2cc3
JH
627 if (dump_file && (dump_flags & TDF_DETAILS))
628 fprintf (dump_file, " bb:%i", bb->index);
a7753db4
AH
629 /* Remember, blocks in the path are stored in opposite order in
630 the PATH array. The last entry in the array represents the
631 block with an outgoing edge that we will redirect to the jump
632 threading path. Thus we don't care how many statements are
633 in that block because it will not be copied or whether or not
634 it ends in a multiway branch. */
cde30fe0 635 if (j < m_path.length () - 1)
bb5e62d6 636 {
bac07a1d 637 int orig_n_insns = m_n_insns;
bac07a1d
RB
638 if (!m_contains_hot_bb && m_speed_p)
639 m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
bb5e62d6
JL
640 for (gsi = gsi_after_labels (bb);
641 !gsi_end_p (gsi);
642 gsi_next_nondebug (&gsi))
643 {
70a62009
IL
644 /* Do not allow OpenACC loop markers and __builtin_constant_p on
645 threading paths. The latter is disallowed, because an
646 expression might be constant on two threading paths, and
647 become non-constant (i.e.: phi) when they merge. */
bb5e62d6 648 gimple *stmt = gsi_stmt (gsi);
70a62009
IL
649 if (gimple_call_internal_p (stmt, IFN_UNIQUE)
650 || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
79db991e
AH
651 {
652 if (dump_file && (dump_flags & TDF_DETAILS))
653 fputc ('\n', dump_file);
654 return false;
655 }
bb5e62d6
JL
656 /* Do not count empty statements and labels. */
657 if (gimple_code (stmt) != GIMPLE_NOP
bb5e62d6 658 && !is_gimple_debug (stmt))
bac07a1d 659 m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
bb5e62d6 660 }
0f0c2cc3 661 if (dump_file && (dump_flags & TDF_DETAILS))
bac07a1d 662 fprintf (dump_file, " (%i insns)", m_n_insns-orig_n_insns);
bb5e62d6
JL
663
664 /* We do not look at the block with the threaded branch
665 in this loop. So if any block with a last statement that
666 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
667 multiway branch on our path.
668
669 The block in PATH[0] is special, it's the block were we're
670 going to be able to eliminate its branch. */
bac07a1d 671 if (j > 0)
bb5e62d6 672 {
60bf26a4 673 gimple *last = *gsi_last_bb (bb);
bac07a1d
RB
674 if (last
675 && (gimple_code (last) == GIMPLE_SWITCH
676 || gimple_code (last) == GIMPLE_GOTO))
677 m_multiway_branch_in_path = true;
bb5e62d6
JL
678 }
679 }
680
681 /* Note if we thread through the latch, we will want to include
682 the last entry in the array when determining if we thread
683 through the loop latch. */
684 if (loop->latch == bb)
01b50387 685 {
bac07a1d 686 m_threaded_through_latch = true;
01b50387
AH
687 if (dump_file && (dump_flags & TDF_DETAILS))
688 fprintf (dump_file, " (latch)");
689 }
bb5e62d6
JL
690 }
691
692 /* We are going to remove the control statement at the end of the
693 last block in the threading path. So don't count it against our
694 statement count. */
bac07a1d 695 m_n_insns -= m_exit_jump_benefit;
0f0c2cc3
JH
696
697 if (dump_file && (dump_flags & TDF_DETAILS))
698 fprintf (dump_file, "\n Control statement insns: %i\n"
699 " Overall: %i insns\n",
bac07a1d 700 m_exit_jump_benefit, m_n_insns);
bb5e62d6 701
0f0c2cc3
JH
702 /* Threading is profitable if the path duplicated is hot but also
703 in a case we separate cold path from hot path and permit optimization
704 of the hot path later. Be on the agressive side here. In some testcases,
705 as in PR 78407 this leads to noticeable improvements. */
bac07a1d 706 if (m_speed_p)
27bddc4a 707 {
bac07a1d 708 if (m_n_insns >= param_max_fsm_thread_path_insns)
27bddc4a
JH
709 {
710 if (dump_file && (dump_flags & TDF_DETAILS))
c7a669af 711 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
27bddc4a
JH
712 "the number of instructions on the path "
713 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
69e55442 714 return false;
27bddc4a 715 }
bac07a1d
RB
716 edge entry = find_edge (m_path[m_path.length () - 1],
717 m_path[m_path.length () - 2]);
718 if (probably_never_executed_edge_p (cfun, entry))
b9da6864
RB
719 {
720 if (dump_file && (dump_flags & TDF_DETAILS))
721 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
bac07a1d 722 "path entry is probably never executed.\n");
b9da6864
RB
723 return false;
724 }
bac07a1d
RB
725 }
726 else if (m_n_insns > 1)
727 {
728 if (dump_file && (dump_flags & TDF_DETAILS))
729 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
730 "duplication of %i insns is needed and optimizing for size.\n",
731 m_n_insns);
732 return false;
733 }
734
735 /* The generic copier used by the backthreader does not re-use an
736 existing threading path to reduce code duplication. So for that
737 case, drastically reduce the number of statements we are allowed
738 to copy. We don't know yet whether we will thread through the latch
739 so we have to be permissive and continue threading, but indicate
740 to the caller the thread, if final, wouldn't be profitable. */
741 if ((!m_threaded_multiway_branch
742 || !loop->latch
743 || loop->latch->index == EXIT_BLOCK)
744 && (m_n_insns * param_fsm_scale_path_stmts
745 >= param_max_jump_thread_duplication_stmts))
746 {
747 if (dump_file && (dump_flags & TDF_DETAILS))
748 fprintf (dump_file,
749 " FAIL: Did not thread around loop and would copy too "
750 "many statements.\n");
751 return false;
752 }
753 *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
754 && (m_n_insns * param_fsm_scale_path_stmts
755 >= param_max_jump_thread_duplication_stmts));
756
79db991e
AH
757 if (dump_file && (dump_flags & TDF_DETAILS))
758 fputc ('\n', dump_file);
bac07a1d
RB
759 return true;
760}
761
762/* Examine jump threading path PATH and return TRUE if it is profitable to
763 thread it, otherwise return FALSE.
764
765 The taken edge out of the path is TAKEN_EDGE.
766
767 CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
768 would create an irreducible loop.
769
770 ?? It seems we should be able to loosen some of the restrictions in
771 this function after loop optimizations have run. */
772
773bool
774back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
775 edge taken_edge,
776 bool *creates_irreducible_loop)
777{
778 // We can assume that possibly_profitable_path_p holds here
779
780 loop_p loop = m_path[0]->loop_father;
781
782 if (dump_file && (dump_flags & TDF_DETAILS))
783 fprintf (dump_file, "Checking profitability of path (backwards): ");
784
785 /* If this path threaded through the loop latch back into the
786 same loop and the destination does not dominate the loop
787 latch, then this thread would create an irreducible loop. */
788 *creates_irreducible_loop = false;
789 if (m_threaded_through_latch
790 && loop == taken_edge->dest->loop_father
791 && (determine_bb_domination_status (loop, taken_edge->dest)
792 == DOMST_NONDOMINATING))
793 *creates_irreducible_loop = true;
794
795 /* Threading is profitable if the path duplicated is hot but also
796 in a case we separate cold path from hot path and permit optimization
797 of the hot path later. Be on the agressive side here. In some testcases,
798 as in PR 78407 this leads to noticeable improvements. */
799 if (m_speed_p
800 && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
801 {
802 if (probably_never_executed_edge_p (cfun, taken_edge))
49ba4fde
RB
803 {
804 if (dump_file && (dump_flags & TDF_DETAILS))
805 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
bac07a1d 806 "path leads to probably never executed edge.\n");
49ba4fde
RB
807 return false;
808 }
27bddc4a 809 }
bac07a1d 810 else if (m_n_insns > 1)
bb5e62d6
JL
811 {
812 if (dump_file && (dump_flags & TDF_DETAILS))
c7a669af 813 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
27bddc4a 814 "duplication of %i insns is needed and optimizing for size.\n",
bac07a1d 815 m_n_insns);
69e55442 816 return false;
bb5e62d6
JL
817 }
818
819 /* We avoid creating irreducible inner loops unless we thread through
820 a multiway branch, in which case we have deemed it worth losing
821 other loop optimizations later.
822
7c9f20fc
RB
823 We also consider it worth creating an irreducible inner loop after
824 loop optimizations if the number of copied statement is low. */
bac07a1d 825 if (!m_threaded_multiway_branch
69e55442 826 && *creates_irreducible_loop
7c9f20fc
RB
827 && (!(cfun->curr_properties & PROP_loop_opts_done)
828 || (m_n_insns * param_fsm_scale_path_stmts
829 >= param_max_jump_thread_duplication_stmts)))
bb5e62d6
JL
830 {
831 if (dump_file && (dump_flags & TDF_DETAILS))
832 fprintf (dump_file,
7c9f20fc
RB
833 " FAIL: Would create irreducible loop early without "
834 "threading multiway branch.\n");
bac07a1d 835 /* We compute creates_irreducible_loop only late. */
69e55442 836 return false;
bb5e62d6
JL
837 }
838
c7a669af
AH
839 /* The generic copier used by the backthreader does not re-use an
840 existing threading path to reduce code duplication. So for that
841 case, drastically reduce the number of statements we are allowed
842 to copy. */
bac07a1d
RB
843 if (!(m_threaded_through_latch && m_threaded_multiway_branch)
844 && (m_n_insns * param_fsm_scale_path_stmts
028d4092 845 >= param_max_jump_thread_duplication_stmts))
bb5e62d6
JL
846 {
847 if (dump_file && (dump_flags & TDF_DETAILS))
848 fprintf (dump_file,
c7a669af 849 " FAIL: Did not thread around loop and would copy too "
bb5e62d6 850 "many statements.\n");
69e55442 851 return false;
bb5e62d6
JL
852 }
853
854 /* When there is a multi-way branch on the path, then threading can
855 explode the CFG due to duplicating the edges for that multi-way
856 branch. So like above, only allow a multi-way branch on the path
857 if we actually thread a multi-way branch. */
bac07a1d 858 if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
bb5e62d6
JL
859 {
860 if (dump_file && (dump_flags & TDF_DETAILS))
861 fprintf (dump_file,
c7a669af 862 " FAIL: Thread through multiway branch without threading "
bb5e62d6 863 "a multiway branch.\n");
69e55442 864 return false;
bb5e62d6 865 }
01b50387
AH
866
867 /* Threading through an empty latch would cause code to be added to
868 the latch. This could alter the loop form sufficiently to cause
869 loop optimizations to fail. Disable these threads until after
870 loop optimizations have run. */
bac07a1d 871 if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
01b50387
AH
872 && !(cfun->curr_properties & PROP_loop_opts_done)
873 && empty_block_p (loop->latch))
874 {
875 if (dump_file && (dump_flags & TDF_DETAILS))
876 fprintf (dump_file,
bac07a1d
RB
877 " FAIL: Thread through latch before loop opts would create "
878 "non-empty latch\n");
01b50387 879 return false;
01b50387 880 }
79db991e
AH
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fputc ('\n', dump_file);
69e55442
AH
883 return true;
884}
885
bac07a1d 886
cde30fe0
AH
887/* The current path PATH is a vector of blocks forming a jump threading
888 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
081fdda6 889
cde30fe0 890 Convert the current path into the form used by register_jump_thread and
69e55442 891 register it.
081fdda6 892
69e55442
AH
893 Return TRUE if successful or FALSE otherwise. */
894
895bool
896back_threader_registry::register_path (const vec<basic_block> &m_path,
897 edge taken_edge)
081fdda6 898{
7f6c2258 899 vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
081fdda6 900
c7a669af
AH
901 // The generic copier ignores the edge type. We can build the
902 // thread edges with any type.
cde30fe0 903 for (unsigned int j = 0; j + 1 < m_path.length (); j++)
081fdda6 904 {
cde30fe0
AH
905 basic_block bb1 = m_path[m_path.length () - j - 1];
906 basic_block bb2 = m_path[m_path.length () - j - 2];
8af01c66 907
081fdda6
JL
908 edge e = find_edge (bb1, bb2);
909 gcc_assert (e);
7f6c2258 910 push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
081fdda6
JL
911 }
912
7f6c2258 913 push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
6ca94826 914 return register_jump_thread (jump_thread_path);
081fdda6
JL
915}
916
4e0f56d7 917// Thread all suitable paths in the current function.
5d4d64fa 918//
4e0f56d7 919// Return TODO_flags.
8b2ef235 920
4e0f56d7
AH
921unsigned int
922back_threader::thread_blocks ()
8b2ef235 923{
8b2ef235 924 basic_block bb;
4e0f56d7
AH
925 FOR_EACH_BB_FN (bb, m_fun)
926 if (EDGE_COUNT (bb->succs) > 1)
927 maybe_thread_block (bb);
bb7ebad1 928
4e0f56d7 929 bool changed = m_registry.thread_through_all_blocks (true);
5d4d64fa 930
4e0f56d7
AH
931 if (m_flags & BT_SPEED)
932 return changed ? TODO_cleanup_cfg : 0;
5d4d64fa 933
4e0f56d7 934 return false;
8b2ef235 935}
b720e919
JH
936
937namespace {
938
939const pass_data pass_data_early_thread_jumps =
940{
941 GIMPLE_PASS,
942 "ethread",
943 OPTGROUP_NONE,
944 TV_TREE_SSA_THREAD_JUMPS,
945 ( PROP_cfg | PROP_ssa ),
946 0,
947 0,
948 0,
949 ( TODO_cleanup_cfg | TODO_update_ssa ),
950};
951
5d4d64fa
AH
952const pass_data pass_data_thread_jumps =
953{
954 GIMPLE_PASS,
955 "thread",
956 OPTGROUP_NONE,
957 TV_TREE_SSA_THREAD_JUMPS,
958 ( PROP_cfg | PROP_ssa ),
959 0,
960 0,
961 0,
962 TODO_update_ssa,
963};
964
965const pass_data pass_data_thread_jumps_full =
966{
967 GIMPLE_PASS,
4b3a325f 968 "threadfull",
5d4d64fa
AH
969 OPTGROUP_NONE,
970 TV_TREE_SSA_THREAD_JUMPS,
971 ( PROP_cfg | PROP_ssa ),
972 0,
973 0,
974 0,
975 TODO_update_ssa,
976};
977
978// Early jump threading pass optimizing for size.
b720e919
JH
979class pass_early_thread_jumps : public gimple_opt_pass
980{
981public:
982 pass_early_thread_jumps (gcc::context *ctxt)
983 : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
984 {}
985
5d4d64fa
AH
986 opt_pass * clone () override
987 {
988 return new pass_early_thread_jumps (m_ctxt);
989 }
bc5baac5
AH
990 void set_pass_param (unsigned int, bool param) override
991 {
992 m_first = param;
993 }
5d4d64fa
AH
994 bool gate (function *) override
995 {
996 return flag_thread_jumps;
997 }
998 unsigned int execute (function *fun) override
999 {
bc5baac5 1000 back_threader threader (fun, BT_NONE, m_first);
4e0f56d7 1001 return threader.thread_blocks ();
5d4d64fa 1002 }
bc5baac5
AH
1003private:
1004 bool m_first;
b720e919
JH
1005};
1006
5d4d64fa
AH
1007// Jump threading pass without resolving of unknown SSAs.
1008class pass_thread_jumps : public gimple_opt_pass
b720e919 1009{
5d4d64fa
AH
1010public:
1011 pass_thread_jumps (gcc::context *ctxt)
1012 : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1013 {}
1014 opt_pass * clone (void) override
1015 {
1016 return new pass_thread_jumps (m_ctxt);
1017 }
bc5baac5
AH
1018 void set_pass_param (unsigned int, bool param) override
1019 {
1020 m_first = param;
1021 }
5d4d64fa
AH
1022 bool gate (function *) override
1023 {
1024 return flag_thread_jumps && flag_expensive_optimizations;
1025 }
1026 unsigned int execute (function *fun) override
1027 {
bc5baac5 1028 back_threader threader (fun, BT_SPEED, m_first);
4e0f56d7 1029 return threader.thread_blocks ();
5d4d64fa 1030 }
bc5baac5
AH
1031private:
1032 bool m_first;
5d4d64fa 1033};
b720e919 1034
5d4d64fa
AH
1035// Jump threading pass that fully resolves unknown SSAs.
1036class pass_thread_jumps_full : public gimple_opt_pass
b720e919 1037{
5d4d64fa
AH
1038public:
1039 pass_thread_jumps_full (gcc::context *ctxt)
1040 : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1041 {}
1042 opt_pass * clone (void) override
1043 {
dece6ae7 1044 return new pass_thread_jumps_full (m_ctxt);
5d4d64fa 1045 }
bc5baac5
AH
1046 void set_pass_param (unsigned int, bool param) override
1047 {
1048 m_first = param;
1049 }
5d4d64fa
AH
1050 bool gate (function *) override
1051 {
1052 return flag_thread_jumps && flag_expensive_optimizations;
1053 }
1054 unsigned int execute (function *fun) override
1055 {
bc5baac5 1056 back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
4e0f56d7 1057 return threader.thread_blocks ();
5d4d64fa 1058 }
bc5baac5
AH
1059private:
1060 bool m_first;
5d4d64fa 1061};
108fdd6d 1062
5d4d64fa 1063} // namespace {
108fdd6d 1064
5d4d64fa
AH
1065gimple_opt_pass *
1066make_pass_thread_jumps (gcc::context *ctxt)
1067{
1068 return new pass_thread_jumps (ctxt);
b720e919
JH
1069}
1070
5d4d64fa
AH
1071gimple_opt_pass *
1072make_pass_thread_jumps_full (gcc::context *ctxt)
1073{
1074 return new pass_thread_jumps_full (ctxt);
b720e919
JH
1075}
1076
1077gimple_opt_pass *
1078make_pass_early_thread_jumps (gcc::context *ctxt)
1079{
1080 return new pass_early_thread_jumps (ctxt);
1081}