]>
Commit | Line | Data |
---|---|---|
a8046f60 | 1 | /* Thread edges through blocks and update the control flow and SSA graphs. |
711789cc | 2 | Copyright (C) 2004-2013 Free Software Foundation, Inc. |
a8046f60 | 3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8c4c00c1 | 8 | the Free Software Foundation; either version 3, or (at your option) |
a8046f60 | 9 | any later version. |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
a8046f60 | 19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
23 | #include "tm.h" | |
24 | #include "tree.h" | |
25 | #include "flags.h" | |
a8046f60 | 26 | #include "tm_p.h" |
a8046f60 | 27 | #include "basic-block.h" |
a8046f60 | 28 | #include "function.h" |
69ee5dbb | 29 | #include "tree-ssa.h" |
b9ed1410 | 30 | #include "dumpfile.h" |
388d1fc1 | 31 | #include "cfgloop.h" |
2b15d2ba | 32 | #include "hash-table.h" |
a8046f60 | 33 | |
34 | /* Given a block B, update the CFG and SSA graph to reflect redirecting | |
35 | one or more in-edges to B to instead reach the destination of an | |
36 | out-edge from B while preserving any side effects in B. | |
37 | ||
0c6d8c36 | 38 | i.e., given A->B and B->C, change A->B to be A->C yet still preserve the |
a8046f60 | 39 | side effects of executing B. |
40 | ||
41 | 1. Make a copy of B (including its outgoing edges and statements). Call | |
42 | the copy B'. Note B' has no incoming edges or PHIs at this time. | |
43 | ||
44 | 2. Remove the control statement at the end of B' and all outgoing edges | |
45 | except B'->C. | |
46 | ||
47 | 3. Add a new argument to each PHI in C with the same value as the existing | |
48 | argument associated with edge B->C. Associate the new PHI arguments | |
49 | with the edge B'->C. | |
50 | ||
51 | 4. For each PHI in B, find or create a PHI in B' with an identical | |
7a635e9c | 52 | PHI_RESULT. Add an argument to the PHI in B' which has the same |
a8046f60 | 53 | value as the PHI in B associated with the edge A->B. Associate |
54 | the new argument in the PHI in B' with the edge A->B. | |
55 | ||
56 | 5. Change the edge A->B to A->B'. | |
57 | ||
58 | 5a. This automatically deletes any PHI arguments associated with the | |
59 | edge A->B in B. | |
60 | ||
61 | 5b. This automatically associates each new argument added in step 4 | |
62 | with the edge A->B'. | |
63 | ||
64 | 6. Repeat for other incoming edges into B. | |
65 | ||
66 | 7. Put the duplicated resources in B and all the B' blocks into SSA form. | |
67 | ||
68 | Note that block duplication can be minimized by first collecting the | |
f0b5f617 | 69 | set of unique destination blocks that the incoming edges should |
255a8494 | 70 | be threaded to. |
71 | ||
72 | Block duplication can be further minimized by using B instead of | |
73 | creating B' for one destination if all edges into B are going to be | |
74 | threaded to a successor of B. We had code to do this at one time, but | |
75 | I'm not convinced it is correct with the changes to avoid mucking up | |
76 | the loop structure (which may cancel threading requests, thus a block | |
77 | which we thought was going to become unreachable may still be reachable). | |
78 | This code was also going to get ugly with the introduction of the ability | |
79 | for a single jump thread request to bypass multiple blocks. | |
a8046f60 | 80 | |
778182c1 | 81 | We further reduce the number of edges and statements we create by |
82 | not copying all the outgoing edges and the control statement in | |
83 | step #1. We instead create a template block without the outgoing | |
84 | edges and duplicate the template. */ | |
85 | ||
86 | ||
87 | /* Steps #5 and #6 of the above algorithm are best implemented by walking | |
88 | all the incoming edges which thread to the same destination edge at | |
89 | the same time. That avoids lots of table lookups to get information | |
90 | for the destination edge. | |
91 | ||
92 | To realize that implementation we create a list of incoming edges | |
93 | which thread to the same outgoing edge. Thus to implement steps | |
94 | #5 and #6 we traverse our hash table of outgoing edge information. | |
95 | For each entry we walk the list of incoming edges which thread to | |
96 | the current outgoing edge. */ | |
97 | ||
98 | struct el | |
99 | { | |
100 | edge e; | |
101 | struct el *next; | |
102 | }; | |
a8046f60 | 103 | |
104 | /* Main data structure recording information regarding B's duplicate | |
105 | blocks. */ | |
106 | ||
778182c1 | 107 | /* We need to efficiently record the unique thread destinations of this |
108 | block and specific information associated with those destinations. We | |
109 | may have many incoming edges threaded to the same outgoing edge. This | |
c5d4a10b | 110 | can be naturally implemented with a hash table. */ |
778182c1 | 111 | |
494bbaae | 112 | struct redirection_data : typed_free_remove<redirection_data> |
a8046f60 | 113 | { |
114 | /* A duplicate of B with the trailing control statement removed and which | |
115 | targets a single successor of B. */ | |
116 | basic_block dup_block; | |
117 | ||
118 | /* An outgoing edge from B. DUP_BLOCK will have OUTGOING_EDGE->dest as | |
119 | its single successor. */ | |
120 | edge outgoing_edge; | |
778182c1 | 121 | |
da81e0c5 | 122 | edge intermediate_edge; |
123 | ||
778182c1 | 124 | /* A list of incoming edges which we want to thread to |
125 | OUTGOING_EDGE->dest. */ | |
126 | struct el *incoming_edges; | |
494bbaae | 127 | |
128 | /* hash_table support. */ | |
c580da87 | 129 | typedef redirection_data value_type; |
130 | typedef redirection_data compare_type; | |
131 | static inline hashval_t hash (const value_type *); | |
132 | static inline int equal (const value_type *, const compare_type *); | |
a8046f60 | 133 | }; |
134 | ||
494bbaae | 135 | inline hashval_t |
c580da87 | 136 | redirection_data::hash (const value_type *p) |
494bbaae | 137 | { |
138 | edge e = p->outgoing_edge; | |
139 | return e->dest->index; | |
140 | } | |
141 | ||
142 | inline int | |
c580da87 | 143 | redirection_data::equal (const value_type *p1, const compare_type *p2) |
494bbaae | 144 | { |
145 | edge e1 = p1->outgoing_edge; | |
146 | edge e2 = p2->outgoing_edge; | |
147 | edge e3 = p1->intermediate_edge; | |
148 | edge e4 = p2->intermediate_edge; | |
149 | return e1 == e2 && e3 == e4; | |
150 | } | |
151 | ||
778182c1 | 152 | /* Data structure of information to pass to hash table traversal routines. */ |
2b15d2ba | 153 | struct ssa_local_info_t |
778182c1 | 154 | { |
155 | /* The current block we are working on. */ | |
156 | basic_block bb; | |
157 | ||
158 | /* A template copy of BB with no outgoing edges or control statement that | |
159 | we use for creating copies. */ | |
160 | basic_block template_block; | |
388d1fc1 | 161 | |
162 | /* TRUE if we thread one or more jumps, FALSE otherwise. */ | |
163 | bool jumps_threaded; | |
778182c1 | 164 | }; |
a3d0fd80 | 165 | |
3cebc9d2 | 166 | /* Passes which use the jump threading code register jump threading |
167 | opportunities as they are discovered. We keep the registered | |
168 | jump threading opportunities in this vector as edge pairs | |
169 | (original_edge, target_edge). */ | |
f1f41a6c | 170 | static vec<edge> threaded_edges; |
3cebc9d2 | 171 | |
eb31063a | 172 | /* When we start updating the CFG for threading, data necessary for jump |
173 | threading is attached to the AUX field for the incoming edge. Use these | |
174 | macros to access the underlying structure attached to the AUX field. */ | |
175 | #define THREAD_TARGET(E) ((edge *)(E)->aux)[0] | |
da81e0c5 | 176 | #define THREAD_TARGET2(E) ((edge *)(E)->aux)[1] |
3cebc9d2 | 177 | |
5236b8bb | 178 | /* Jump threading statistics. */ |
179 | ||
180 | struct thread_stats_d | |
181 | { | |
182 | unsigned long num_threaded_edges; | |
183 | }; | |
184 | ||
185 | struct thread_stats_d thread_stats; | |
186 | ||
187 | ||
f582bb6c | 188 | /* Remove the last statement in block BB if it is a control statement |
189 | Also remove all outgoing edges except the edge which reaches DEST_BB. | |
190 | If DEST_BB is NULL, then remove all outgoing edges. */ | |
a8046f60 | 191 | |
192 | static void | |
f582bb6c | 193 | remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) |
a8046f60 | 194 | { |
75a70cf9 | 195 | gimple_stmt_iterator gsi; |
cd665a06 | 196 | edge e; |
197 | edge_iterator ei; | |
a8046f60 | 198 | |
75a70cf9 | 199 | gsi = gsi_last_bb (bb); |
a8046f60 | 200 | |
f582bb6c | 201 | /* If the duplicate ends with a control statement, then remove it. |
a8046f60 | 202 | |
f582bb6c | 203 | Note that if we are duplicating the template block rather than the |
204 | original basic block, then the duplicate might not have any real | |
205 | statements in it. */ | |
75a70cf9 | 206 | if (!gsi_end_p (gsi) |
207 | && gsi_stmt (gsi) | |
208 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND | |
209 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO | |
210 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH)) | |
211 | gsi_remove (&gsi, true); | |
a8046f60 | 212 | |
cd665a06 | 213 | for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ) |
a8046f60 | 214 | { |
a8046f60 | 215 | if (e->dest != dest_bb) |
0891994d | 216 | remove_edge (e); |
cd665a06 | 217 | else |
218 | ei_next (&ei); | |
a8046f60 | 219 | } |
a8046f60 | 220 | } |
221 | ||
06a81e3f | 222 | /* Create a duplicate of BB. Record the duplicate block in RD. */ |
a8046f60 | 223 | |
224 | static void | |
225 | create_block_for_threading (basic_block bb, struct redirection_data *rd) | |
226 | { | |
eb31063a | 227 | edge_iterator ei; |
228 | edge e; | |
229 | ||
a8046f60 | 230 | /* We can use the generic block duplication code and simply remove |
231 | the stuff we do not need. */ | |
c4d867e0 | 232 | rd->dup_block = duplicate_block (bb, NULL, NULL); |
a8046f60 | 233 | |
eb31063a | 234 | FOR_EACH_EDGE (e, ei, rd->dup_block->succs) |
235 | e->aux = NULL; | |
236 | ||
615dd397 | 237 | /* Zero out the profile, since the block is unreachable for now. */ |
238 | rd->dup_block->frequency = 0; | |
239 | rd->dup_block->count = 0; | |
a8046f60 | 240 | } |
241 | ||
2b15d2ba | 242 | /* Main data structure to hold information for duplicates of BB. */ |
243 | ||
494bbaae | 244 | static hash_table <redirection_data> redirection_data; |
2b15d2ba | 245 | |
778182c1 | 246 | /* Given an outgoing edge E lookup and return its entry in our hash table. |
247 | ||
248 | If INSERT is true, then we insert the entry into the hash table if | |
249 | it is not already present. INCOMING_EDGE is added to the list of incoming | |
250 | edges associated with E in the hash table. */ | |
251 | ||
252 | static struct redirection_data * | |
da81e0c5 | 253 | lookup_redirection_data (edge e, enum insert_option insert) |
778182c1 | 254 | { |
2b15d2ba | 255 | struct redirection_data **slot; |
778182c1 | 256 | struct redirection_data *elt; |
257 | ||
258 | /* Build a hash table element so we can see if E is already | |
259 | in the table. */ | |
4c36ffe6 | 260 | elt = XNEW (struct redirection_data); |
da81e0c5 | 261 | elt->intermediate_edge = THREAD_TARGET2 (e) ? THREAD_TARGET (e) : NULL; |
262 | elt->outgoing_edge = THREAD_TARGET2 (e) ? THREAD_TARGET2 (e) | |
263 | : THREAD_TARGET (e); | |
778182c1 | 264 | elt->dup_block = NULL; |
778182c1 | 265 | elt->incoming_edges = NULL; |
266 | ||
2b15d2ba | 267 | slot = redirection_data.find_slot (elt, insert); |
778182c1 | 268 | |
269 | /* This will only happen if INSERT is false and the entry is not | |
270 | in the hash table. */ | |
271 | if (slot == NULL) | |
272 | { | |
273 | free (elt); | |
274 | return NULL; | |
275 | } | |
276 | ||
277 | /* This will only happen if E was not in the hash table and | |
278 | INSERT is true. */ | |
279 | if (*slot == NULL) | |
280 | { | |
2b15d2ba | 281 | *slot = elt; |
4c36ffe6 | 282 | elt->incoming_edges = XNEW (struct el); |
da81e0c5 | 283 | elt->incoming_edges->e = e; |
778182c1 | 284 | elt->incoming_edges->next = NULL; |
285 | return elt; | |
286 | } | |
287 | /* E was in the hash table. */ | |
288 | else | |
289 | { | |
290 | /* Free ELT as we do not need it anymore, we will extract the | |
291 | relevant entry from the hash table itself. */ | |
292 | free (elt); | |
293 | ||
294 | /* Get the entry stored in the hash table. */ | |
2b15d2ba | 295 | elt = *slot; |
778182c1 | 296 | |
297 | /* If insertion was requested, then we need to add INCOMING_EDGE | |
298 | to the list of incoming edges associated with E. */ | |
299 | if (insert) | |
300 | { | |
4c36ffe6 | 301 | struct el *el = XNEW (struct el); |
778182c1 | 302 | el->next = elt->incoming_edges; |
da81e0c5 | 303 | el->e = e; |
778182c1 | 304 | elt->incoming_edges = el; |
305 | } | |
306 | ||
307 | return elt; | |
308 | } | |
309 | } | |
310 | ||
da81e0c5 | 311 | /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E. */ |
312 | ||
313 | static void | |
314 | copy_phi_args (basic_block bb, edge src_e, edge tgt_e) | |
315 | { | |
316 | gimple_stmt_iterator gsi; | |
317 | int src_indx = src_e->dest_idx; | |
318 | ||
319 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
320 | { | |
321 | gimple phi = gsi_stmt (gsi); | |
322 | source_location locus = gimple_phi_arg_location (phi, src_indx); | |
60d535d2 | 323 | add_phi_arg (phi, gimple_phi_arg_def (phi, src_indx), tgt_e, locus); |
da81e0c5 | 324 | } |
325 | } | |
326 | ||
327 | /* We have recently made a copy of ORIG_BB, including its outgoing | |
328 | edges. The copy is NEW_BB. Every PHI node in every direct successor of | |
329 | ORIG_BB has a new argument associated with edge from NEW_BB to the | |
330 | successor. Initialize the PHI argument so that it is equal to the PHI | |
331 | argument associated with the edge from ORIG_BB to the successor. */ | |
332 | ||
333 | static void | |
334 | update_destination_phis (basic_block orig_bb, basic_block new_bb) | |
335 | { | |
336 | edge_iterator ei; | |
337 | edge e; | |
338 | ||
339 | FOR_EACH_EDGE (e, ei, orig_bb->succs) | |
340 | { | |
341 | edge e2 = find_edge (new_bb, e->dest); | |
342 | copy_phi_args (e->dest, e, e2); | |
343 | } | |
344 | } | |
345 | ||
778182c1 | 346 | /* Given a duplicate block and its single destination (both stored |
347 | in RD). Create an edge between the duplicate and its single | |
348 | destination. | |
349 | ||
350 | Add an additional argument to any PHI nodes at the single | |
351 | destination. */ | |
352 | ||
353 | static void | |
42b013bc | 354 | create_edge_and_update_destination_phis (struct redirection_data *rd, |
355 | basic_block bb) | |
778182c1 | 356 | { |
42b013bc | 357 | edge e = make_edge (bb, rd->outgoing_edge->dest, EDGE_FALLTHRU); |
778182c1 | 358 | |
f9614b84 | 359 | rescan_loop_exit (e, true, false); |
421e19dd | 360 | e->probability = REG_BR_PROB_BASE; |
42b013bc | 361 | e->count = bb->count; |
eb31063a | 362 | |
363 | if (rd->outgoing_edge->aux) | |
364 | { | |
66604282 | 365 | e->aux = XNEWVEC (edge, 2); |
eb31063a | 366 | THREAD_TARGET(e) = THREAD_TARGET (rd->outgoing_edge); |
da81e0c5 | 367 | THREAD_TARGET2(e) = THREAD_TARGET2 (rd->outgoing_edge); |
eb31063a | 368 | } |
369 | else | |
370 | { | |
371 | e->aux = NULL; | |
372 | } | |
421e19dd | 373 | |
778182c1 | 374 | /* If there are any PHI nodes at the destination of the outgoing edge |
375 | from the duplicate block, then we will need to add a new argument | |
376 | to them. The argument should have the same value as the argument | |
377 | associated with the outgoing edge stored in RD. */ | |
da81e0c5 | 378 | copy_phi_args (e->dest, rd->outgoing_edge, e); |
379 | } | |
380 | ||
381 | /* Wire up the outgoing edges from the duplicate block and | |
382 | update any PHIs as needed. */ | |
2b15d2ba | 383 | void |
384 | ssa_fix_duplicate_block_edges (struct redirection_data *rd, | |
385 | ssa_local_info_t *local_info) | |
da81e0c5 | 386 | { |
387 | /* If we were threading through an joiner block, then we want | |
388 | to keep its control statement and redirect an outgoing edge. | |
389 | Else we want to remove the control statement & edges, then create | |
390 | a new outgoing edge. In both cases we may need to update PHIs. */ | |
4aa81fa6 | 391 | if (THREAD_TARGET2 (rd->incoming_edges->e)) |
778182c1 | 392 | { |
da81e0c5 | 393 | edge victim; |
394 | edge e2; | |
395 | edge e = rd->incoming_edges->e; | |
396 | ||
397 | /* This updates the PHIs at the destination of the duplicate | |
398 | block. */ | |
399 | update_destination_phis (local_info->bb, rd->dup_block); | |
efbcb6de | 400 | |
da81e0c5 | 401 | /* Find the edge from the duplicate block to the block we're |
402 | threading through. That's the edge we want to redirect. */ | |
403 | victim = find_edge (rd->dup_block, THREAD_TARGET (e)->dest); | |
404 | e2 = redirect_edge_and_branch (victim, THREAD_TARGET2 (e)->dest); | |
405 | ||
4aa81fa6 | 406 | /* If we redirected the edge, then we need to copy PHI arguments |
407 | at the target. If the edge already existed (e2 != victim case), | |
408 | then the PHIs in the target already have the correct arguments. */ | |
409 | if (e2 == victim) | |
410 | copy_phi_args (e2->dest, THREAD_TARGET2 (e), e2); | |
da81e0c5 | 411 | } |
412 | else | |
413 | { | |
414 | remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL); | |
415 | create_edge_and_update_destination_phis (rd, rd->dup_block); | |
778182c1 | 416 | } |
417 | } | |
778182c1 | 418 | /* Hash table traversal callback routine to create duplicate blocks. */ |
419 | ||
2b15d2ba | 420 | int |
421 | ssa_create_duplicates (struct redirection_data **slot, | |
422 | ssa_local_info_t *local_info) | |
778182c1 | 423 | { |
2b15d2ba | 424 | struct redirection_data *rd = *slot; |
778182c1 | 425 | |
778182c1 | 426 | /* Create a template block if we have not done so already. Otherwise |
427 | use the template to create a new block. */ | |
428 | if (local_info->template_block == NULL) | |
429 | { | |
430 | create_block_for_threading (local_info->bb, rd); | |
431 | local_info->template_block = rd->dup_block; | |
432 | ||
433 | /* We do not create any outgoing edges for the template. We will | |
434 | take care of that in a later traversal. That way we do not | |
435 | create edges that are going to just be deleted. */ | |
436 | } | |
437 | else | |
438 | { | |
439 | create_block_for_threading (local_info->template_block, rd); | |
440 | ||
441 | /* Go ahead and wire up outgoing edges and update PHIs for the duplicate | |
da81e0c5 | 442 | block. */ |
2b15d2ba | 443 | ssa_fix_duplicate_block_edges (rd, local_info); |
778182c1 | 444 | } |
445 | ||
446 | /* Keep walking the hash table. */ | |
447 | return 1; | |
448 | } | |
449 | ||
450 | /* We did not create any outgoing edges for the template block during | |
451 | block creation. This hash table traversal callback creates the | |
452 | outgoing edge for the template block. */ | |
453 | ||
2b15d2ba | 454 | inline int |
455 | ssa_fixup_template_block (struct redirection_data **slot, | |
456 | ssa_local_info_t *local_info) | |
778182c1 | 457 | { |
2b15d2ba | 458 | struct redirection_data *rd = *slot; |
778182c1 | 459 | |
da81e0c5 | 460 | /* If this is the template block halt the traversal after updating |
461 | it appropriately. | |
462 | ||
463 | If we were threading through an joiner block, then we want | |
464 | to keep its control statement and redirect an outgoing edge. | |
465 | Else we want to remove the control statement & edges, then create | |
466 | a new outgoing edge. In both cases we may need to update PHIs. */ | |
778182c1 | 467 | if (rd->dup_block && rd->dup_block == local_info->template_block) |
468 | { | |
2b15d2ba | 469 | ssa_fix_duplicate_block_edges (rd, local_info); |
778182c1 | 470 | return 0; |
471 | } | |
472 | ||
473 | return 1; | |
474 | } | |
475 | ||
476 | /* Hash table traversal callback to redirect each incoming edge | |
477 | associated with this hash table element to its new destination. */ | |
478 | ||
2b15d2ba | 479 | int |
480 | ssa_redirect_edges (struct redirection_data **slot, | |
481 | ssa_local_info_t *local_info) | |
778182c1 | 482 | { |
2b15d2ba | 483 | struct redirection_data *rd = *slot; |
778182c1 | 484 | struct el *next, *el; |
485 | ||
486 | /* Walk over all the incoming edges associated associated with this | |
487 | hash table entry. */ | |
488 | for (el = rd->incoming_edges; el; el = next) | |
489 | { | |
490 | edge e = el->e; | |
491 | ||
492 | /* Go ahead and free this element from the list. Doing this now | |
493 | avoids the need for another list walk when we destroy the hash | |
494 | table. */ | |
495 | next = el->next; | |
496 | free (el); | |
497 | ||
5236b8bb | 498 | thread_stats.num_threaded_edges++; |
da81e0c5 | 499 | /* If we are threading through a joiner block, then we have to |
500 | find the edge we want to redirect and update some PHI nodes. */ | |
501 | if (THREAD_TARGET2 (e)) | |
502 | { | |
503 | edge e2; | |
5236b8bb | 504 | |
da81e0c5 | 505 | /* We want to redirect the incoming edge to the joiner block (E) |
506 | to instead reach the duplicate of the joiner block. */ | |
507 | e2 = redirect_edge_and_branch (e, rd->dup_block); | |
508 | flush_pending_stmts (e2); | |
509 | } | |
510 | else if (rd->dup_block) | |
778182c1 | 511 | { |
512 | edge e2; | |
513 | ||
514 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
515 | fprintf (dump_file, " Threaded jump %d --> %d to %d\n", | |
516 | e->src->index, e->dest->index, rd->dup_block->index); | |
517 | ||
3ec32924 | 518 | rd->dup_block->count += e->count; |
a22223e3 | 519 | |
520 | /* Excessive jump threading may make frequencies large enough so | |
521 | the computation overflows. */ | |
522 | if (rd->dup_block->frequency < BB_FREQ_MAX * 2) | |
523 | rd->dup_block->frequency += EDGE_FREQUENCY (e); | |
3ec32924 | 524 | EDGE_SUCC (rd->dup_block, 0)->count += e->count; |
778182c1 | 525 | /* Redirect the incoming edge to the appropriate duplicate |
526 | block. */ | |
527 | e2 = redirect_edge_and_branch (e, rd->dup_block); | |
7e0311ae | 528 | gcc_assert (e == e2); |
778182c1 | 529 | flush_pending_stmts (e2); |
778182c1 | 530 | } |
eb31063a | 531 | |
532 | /* Go ahead and clear E->aux. It's not needed anymore and failure | |
533 | to clear it will cause all kinds of unpleasant problems later. */ | |
534 | free (e->aux); | |
535 | e->aux = NULL; | |
536 | ||
778182c1 | 537 | } |
388d1fc1 | 538 | |
539 | /* Indicate that we actually threaded one or more jumps. */ | |
540 | if (rd->incoming_edges) | |
541 | local_info->jumps_threaded = true; | |
542 | ||
778182c1 | 543 | return 1; |
544 | } | |
545 | ||
aed95130 | 546 | /* Return true if this block has no executable statements other than |
547 | a simple ctrl flow instruction. When the number of outgoing edges | |
548 | is one, this is equivalent to a "forwarder" block. */ | |
549 | ||
550 | static bool | |
47aaf6e6 | 551 | redirection_block_p (basic_block bb) |
aed95130 | 552 | { |
75a70cf9 | 553 | gimple_stmt_iterator gsi; |
aed95130 | 554 | |
555 | /* Advance to the first executable statement. */ | |
75a70cf9 | 556 | gsi = gsi_start_bb (bb); |
557 | while (!gsi_end_p (gsi) | |
558 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL | |
9845d120 | 559 | || is_gimple_debug (gsi_stmt (gsi)) |
75a70cf9 | 560 | || gimple_nop_p (gsi_stmt (gsi)))) |
561 | gsi_next (&gsi); | |
48e1416a | 562 | |
aed95130 | 563 | /* Check if this is an empty block. */ |
75a70cf9 | 564 | if (gsi_end_p (gsi)) |
aed95130 | 565 | return true; |
566 | ||
567 | /* Test that we've reached the terminating control statement. */ | |
75a70cf9 | 568 | return gsi_stmt (gsi) |
569 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND | |
570 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO | |
571 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH); | |
aed95130 | 572 | } |
573 | ||
a8046f60 | 574 | /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB |
575 | is reached via one or more specific incoming edges, we know which | |
576 | outgoing edge from BB will be traversed. | |
577 | ||
778182c1 | 578 | We want to redirect those incoming edges to the target of the |
a8046f60 | 579 | appropriate outgoing edge. Doing so avoids a conditional branch |
580 | and may expose new optimization opportunities. Note that we have | |
581 | to update dominator tree and SSA graph after such changes. | |
582 | ||
597ff315 | 583 | The key to keeping the SSA graph update manageable is to duplicate |
91275768 | 584 | the side effects occurring in BB so that those side effects still |
a8046f60 | 585 | occur on the paths which bypass BB after redirecting edges. |
586 | ||
587 | We accomplish this by creating duplicates of BB and arranging for | |
588 | the duplicates to unconditionally pass control to one specific | |
589 | successor of BB. We then revector the incoming edges into BB to | |
590 | the appropriate duplicate of BB. | |
591 | ||
7e0311ae | 592 | If NOLOOP_ONLY is true, we only perform the threading as long as it |
593 | does not affect the structure of the loops in a nontrivial way. */ | |
a8046f60 | 594 | |
388d1fc1 | 595 | static bool |
7e0311ae | 596 | thread_block (basic_block bb, bool noloop_only) |
a8046f60 | 597 | { |
598 | /* E is an incoming edge into BB that we may or may not want to | |
599 | redirect to a duplicate of BB. */ | |
7e0311ae | 600 | edge e, e2; |
cd665a06 | 601 | edge_iterator ei; |
2b15d2ba | 602 | ssa_local_info_t local_info; |
7e0311ae | 603 | struct loop *loop = bb->loop_father; |
388d1fc1 | 604 | |
778182c1 | 605 | /* To avoid scanning a linear array for the element we need we instead |
c5d4a10b | 606 | use a hash table. For normal code there should be no noticeable |
778182c1 | 607 | difference. However, if we have a block with a large number of |
608 | incoming and outgoing edges such linear searches can get expensive. */ | |
2b15d2ba | 609 | redirection_data.create (EDGE_COUNT (bb->succs)); |
778182c1 | 610 | |
7e0311ae | 611 | /* If we thread the latch of the loop to its exit, the loop ceases to |
612 | exist. Make sure we do not restrict ourselves in order to preserve | |
613 | this loop. */ | |
7a3bf727 | 614 | if (loop->header == bb) |
7e0311ae | 615 | { |
616 | e = loop_latch_edge (loop); | |
eb31063a | 617 | |
618 | if (e->aux) | |
619 | e2 = THREAD_TARGET (e); | |
620 | else | |
621 | e2 = NULL; | |
388d1fc1 | 622 | |
7e0311ae | 623 | if (e2 && loop_exit_edge_p (loop, e2)) |
624 | { | |
625 | loop->header = NULL; | |
626 | loop->latch = NULL; | |
79f958cb | 627 | loops_state_set (LOOPS_NEED_FIXUP); |
7e0311ae | 628 | } |
629 | } | |
388d1fc1 | 630 | |
778182c1 | 631 | /* Record each unique threaded destination into a hash table for |
632 | efficient lookups. */ | |
cd665a06 | 633 | FOR_EACH_EDGE (e, ei, bb->preds) |
a8046f60 | 634 | { |
eb31063a | 635 | if (e->aux == NULL) |
636 | continue; | |
637 | ||
da81e0c5 | 638 | if (THREAD_TARGET2 (e)) |
639 | e2 = THREAD_TARGET2 (e); | |
640 | else | |
641 | e2 = THREAD_TARGET (e); | |
7e0311ae | 642 | |
e2b72d6c | 643 | if (!e2 || noloop_only) |
644 | { | |
7e0311ae | 645 | /* If NOLOOP_ONLY is true, we only allow threading through the |
e2b72d6c | 646 | header of a loop to exit edges. |
647 | ||
648 | There are two cases to consider. The first when BB is the | |
649 | loop header. We will attempt to thread this elsewhere, so | |
650 | we can just continue here. */ | |
651 | ||
652 | if (bb == bb->loop_father->header | |
da81e0c5 | 653 | && (!loop_exit_edge_p (bb->loop_father, e2) |
e2b72d6c | 654 | || THREAD_TARGET2 (e))) |
655 | continue; | |
656 | ||
657 | ||
658 | /* The second occurs when there was loop header buried in a jump | |
659 | threading path. We do not try and thread this elsewhere, so | |
660 | just cancel the jump threading request by clearing the AUX | |
661 | field now. */ | |
bb66e2d1 | 662 | if ((bb->loop_father != e2->src->loop_father |
663 | && !loop_exit_edge_p (e2->src->loop_father, e2)) | |
664 | || (e2->src->loop_father != e2->dest->loop_father | |
665 | && !loop_exit_edge_p (e2->src->loop_father, e2))) | |
e2b72d6c | 666 | { |
667 | /* Since this case is not handled by our special code | |
668 | to thread through a loop header, we must explicitly | |
669 | cancel the threading request here. */ | |
670 | free (e->aux); | |
671 | e->aux = NULL; | |
672 | continue; | |
673 | } | |
674 | } | |
778182c1 | 675 | |
42b013bc | 676 | if (e->dest == e2->src) |
677 | update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), | |
eb31063a | 678 | e->count, THREAD_TARGET (e)); |
7e0311ae | 679 | |
680 | /* Insert the outgoing edge into the hash table if it is not | |
681 | already in the hash table. */ | |
da81e0c5 | 682 | lookup_redirection_data (e, INSERT); |
a8046f60 | 683 | } |
684 | ||
3f9439d7 | 685 | /* We do not update dominance info. */ |
686 | free_dominance_info (CDI_DOMINATORS); | |
687 | ||
d906930c | 688 | /* We know we only thread through the loop header to loop exits. |
689 | Let the basic block duplication hook know we are not creating | |
690 | a multiple entry loop. */ | |
691 | if (noloop_only | |
692 | && bb == bb->loop_father->header) | |
693 | set_loop_copy (bb->loop_father, loop_outer (bb->loop_father)); | |
694 | ||
778182c1 | 695 | /* Now create duplicates of BB. |
f582bb6c | 696 | |
697 | Note that for a block with a high outgoing degree we can waste | |
698 | a lot of time and memory creating and destroying useless edges. | |
699 | ||
700 | So we first duplicate BB and remove the control structure at the | |
701 | tail of the duplicate as well as all outgoing edges from the | |
702 | duplicate. We then use that duplicate block as a template for | |
703 | the rest of the duplicates. */ | |
778182c1 | 704 | local_info.template_block = NULL; |
705 | local_info.bb = bb; | |
388d1fc1 | 706 | local_info.jumps_threaded = false; |
2b15d2ba | 707 | redirection_data.traverse <ssa_local_info_t *, ssa_create_duplicates> |
708 | (&local_info); | |
f582bb6c | 709 | |
778182c1 | 710 | /* The template does not have an outgoing edge. Create that outgoing |
711 | edge and update PHI nodes as the edge's target as necessary. | |
f582bb6c | 712 | |
778182c1 | 713 | We do this after creating all the duplicates to avoid creating |
714 | unnecessary edges. */ | |
2b15d2ba | 715 | redirection_data.traverse <ssa_local_info_t *, ssa_fixup_template_block> |
716 | (&local_info); | |
f582bb6c | 717 | |
778182c1 | 718 | /* The hash table traversals above created the duplicate blocks (and the |
719 | statements within the duplicate blocks). This loop creates PHI nodes for | |
720 | the duplicated blocks and redirects the incoming edges into BB to reach | |
721 | the duplicates of BB. */ | |
2b15d2ba | 722 | redirection_data.traverse <ssa_local_info_t *, ssa_redirect_edges> |
723 | (&local_info); | |
a8046f60 | 724 | |
a3d0fd80 | 725 | /* Done with this block. Clear REDIRECTION_DATA. */ |
2b15d2ba | 726 | redirection_data.dispose (); |
388d1fc1 | 727 | |
d906930c | 728 | if (noloop_only |
729 | && bb == bb->loop_father->header) | |
730 | set_loop_copy (bb->loop_father, NULL); | |
731 | ||
388d1fc1 | 732 | /* Indicate to our caller whether or not any jumps were threaded. */ |
733 | return local_info.jumps_threaded; | |
a8046f60 | 734 | } |
735 | ||
eb31063a | 736 | /* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the |
737 | copy of E->dest created during threading, or E->dest if it was not necessary | |
7e0311ae | 738 | to copy it (E is its single predecessor). */ |
739 | ||
740 | static basic_block | |
741 | thread_single_edge (edge e) | |
742 | { | |
743 | basic_block bb = e->dest; | |
eb31063a | 744 | edge eto = THREAD_TARGET (e); |
7e0311ae | 745 | struct redirection_data rd; |
7e0311ae | 746 | |
eb31063a | 747 | free (e->aux); |
7e0311ae | 748 | e->aux = NULL; |
749 | ||
750 | thread_stats.num_threaded_edges++; | |
751 | ||
752 | if (single_pred_p (bb)) | |
753 | { | |
754 | /* If BB has just a single predecessor, we should only remove the | |
755 | control statements at its end, and successors except for ETO. */ | |
756 | remove_ctrl_stmt_and_useless_edges (bb, eto->dest); | |
ad330780 | 757 | |
758 | /* And fixup the flags on the single remaining edge. */ | |
759 | eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL); | |
760 | eto->flags |= EDGE_FALLTHRU; | |
761 | ||
7e0311ae | 762 | return bb; |
763 | } | |
764 | ||
765 | /* Otherwise, we need to create a copy. */ | |
42b013bc | 766 | if (e->dest == eto->src) |
767 | update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto); | |
7e0311ae | 768 | |
7e0311ae | 769 | rd.outgoing_edge = eto; |
770 | ||
771 | create_block_for_threading (bb, &rd); | |
06a81e3f | 772 | remove_ctrl_stmt_and_useless_edges (rd.dup_block, NULL); |
42b013bc | 773 | create_edge_and_update_destination_phis (&rd, rd.dup_block); |
7e0311ae | 774 | |
775 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
776 | fprintf (dump_file, " Threaded jump %d --> %d to %d\n", | |
777 | e->src->index, e->dest->index, rd.dup_block->index); | |
778 | ||
779 | rd.dup_block->count = e->count; | |
780 | rd.dup_block->frequency = EDGE_FREQUENCY (e); | |
781 | single_succ_edge (rd.dup_block)->count = e->count; | |
782 | redirect_edge_and_branch (e, rd.dup_block); | |
783 | flush_pending_stmts (e); | |
784 | ||
785 | return rd.dup_block; | |
786 | } | |
787 | ||
788 | /* Callback for dfs_enumerate_from. Returns true if BB is different | |
789 | from STOP and DBDS_CE_STOP. */ | |
790 | ||
791 | static basic_block dbds_ce_stop; | |
792 | static bool | |
7ecb5bb2 | 793 | dbds_continue_enumeration_p (const_basic_block bb, const void *stop) |
7e0311ae | 794 | { |
7ecb5bb2 | 795 | return (bb != (const_basic_block) stop |
7e0311ae | 796 | && bb != dbds_ce_stop); |
797 | } | |
798 | ||
799 | /* Evaluates the dominance relationship of latch of the LOOP and BB, and | |
800 | returns the state. */ | |
801 | ||
802 | enum bb_dom_status | |
803 | { | |
804 | /* BB does not dominate latch of the LOOP. */ | |
805 | DOMST_NONDOMINATING, | |
806 | /* The LOOP is broken (there is no path from the header to its latch. */ | |
807 | DOMST_LOOP_BROKEN, | |
808 | /* BB dominates the latch of the LOOP. */ | |
809 | DOMST_DOMINATING | |
810 | }; | |
811 | ||
812 | static enum bb_dom_status | |
813 | determine_bb_domination_status (struct loop *loop, basic_block bb) | |
814 | { | |
815 | basic_block *bblocks; | |
816 | unsigned nblocks, i; | |
817 | bool bb_reachable = false; | |
818 | edge_iterator ei; | |
819 | edge e; | |
820 | ||
42b013bc | 821 | /* This function assumes BB is a successor of LOOP->header. |
822 | If that is not the case return DOMST_NONDOMINATING which | |
823 | is always safe. */ | |
7e0311ae | 824 | { |
825 | bool ok = false; | |
826 | ||
827 | FOR_EACH_EDGE (e, ei, bb->preds) | |
828 | { | |
829 | if (e->src == loop->header) | |
830 | { | |
831 | ok = true; | |
832 | break; | |
833 | } | |
834 | } | |
835 | ||
42b013bc | 836 | if (!ok) |
837 | return DOMST_NONDOMINATING; | |
7e0311ae | 838 | } |
7e0311ae | 839 | |
840 | if (bb == loop->latch) | |
841 | return DOMST_DOMINATING; | |
842 | ||
843 | /* Check that BB dominates LOOP->latch, and that it is back-reachable | |
844 | from it. */ | |
845 | ||
846 | bblocks = XCNEWVEC (basic_block, loop->num_nodes); | |
847 | dbds_ce_stop = loop->header; | |
848 | nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p, | |
849 | bblocks, loop->num_nodes, bb); | |
850 | for (i = 0; i < nblocks; i++) | |
851 | FOR_EACH_EDGE (e, ei, bblocks[i]->preds) | |
852 | { | |
853 | if (e->src == loop->header) | |
854 | { | |
855 | free (bblocks); | |
856 | return DOMST_NONDOMINATING; | |
857 | } | |
858 | if (e->src == bb) | |
859 | bb_reachable = true; | |
860 | } | |
861 | ||
862 | free (bblocks); | |
863 | return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN); | |
864 | } | |
865 | ||
6eb99d8a | 866 | /* Return true if BB is part of the new pre-header that is created |
867 | when threading the latch to DATA. */ | |
868 | ||
869 | static bool | |
870 | def_split_header_continue_p (const_basic_block bb, const void *data) | |
871 | { | |
872 | const_basic_block new_header = (const_basic_block) data; | |
a934d302 | 873 | const struct loop *l; |
874 | ||
875 | if (bb == new_header | |
876 | || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father)) | |
877 | return false; | |
878 | for (l = bb->loop_father; l; l = loop_outer (l)) | |
879 | if (l == new_header->loop_father) | |
880 | return true; | |
881 | return false; | |
6eb99d8a | 882 | } |
883 | ||
7e0311ae | 884 | /* Thread jumps through the header of LOOP. Returns true if cfg changes. |
885 | If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges | |
886 | to the inside of the loop. */ | |
887 | ||
888 | static bool | |
889 | thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers) | |
890 | { | |
891 | basic_block header = loop->header; | |
892 | edge e, tgt_edge, latch = loop_latch_edge (loop); | |
893 | edge_iterator ei; | |
894 | basic_block tgt_bb, atgt_bb; | |
895 | enum bb_dom_status domst; | |
896 | ||
897 | /* We have already threaded through headers to exits, so all the threading | |
898 | requests now are to the inside of the loop. We need to avoid creating | |
899 | irreducible regions (i.e., loops with more than one entry block), and | |
900 | also loop with several latch edges, or new subloops of the loop (although | |
901 | there are cases where it might be appropriate, it is difficult to decide, | |
902 | and doing it wrongly may confuse other optimizers). | |
903 | ||
904 | We could handle more general cases here. However, the intention is to | |
905 | preserve some information about the loop, which is impossible if its | |
906 | structure changes significantly, in a way that is not well understood. | |
907 | Thus we only handle few important special cases, in which also updating | |
908 | of the loop-carried information should be feasible: | |
909 | ||
910 | 1) Propagation of latch edge to a block that dominates the latch block | |
911 | of a loop. This aims to handle the following idiom: | |
912 | ||
913 | first = 1; | |
914 | while (1) | |
915 | { | |
916 | if (first) | |
917 | initialize; | |
918 | first = 0; | |
919 | body; | |
920 | } | |
921 | ||
922 | After threading the latch edge, this becomes | |
923 | ||
924 | first = 1; | |
925 | if (first) | |
926 | initialize; | |
927 | while (1) | |
928 | { | |
929 | first = 0; | |
930 | body; | |
931 | } | |
932 | ||
933 | The original header of the loop is moved out of it, and we may thread | |
934 | the remaining edges through it without further constraints. | |
935 | ||
936 | 2) All entry edges are propagated to a single basic block that dominates | |
937 | the latch block of the loop. This aims to handle the following idiom | |
938 | (normally created for "for" loops): | |
939 | ||
940 | i = 0; | |
941 | while (1) | |
942 | { | |
943 | if (i >= 100) | |
944 | break; | |
945 | body; | |
946 | i++; | |
947 | } | |
948 | ||
949 | This becomes | |
950 | ||
951 | i = 0; | |
952 | while (1) | |
953 | { | |
954 | body; | |
955 | i++; | |
956 | if (i >= 100) | |
957 | break; | |
958 | } | |
959 | */ | |
960 | ||
961 | /* Threading through the header won't improve the code if the header has just | |
962 | one successor. */ | |
963 | if (single_succ_p (header)) | |
964 | goto fail; | |
965 | ||
966 | if (latch->aux) | |
967 | { | |
da81e0c5 | 968 | if (THREAD_TARGET2 (latch)) |
969 | goto fail; | |
eb31063a | 970 | tgt_edge = THREAD_TARGET (latch); |
7e0311ae | 971 | tgt_bb = tgt_edge->dest; |
972 | } | |
973 | else if (!may_peel_loop_headers | |
974 | && !redirection_block_p (loop->header)) | |
975 | goto fail; | |
976 | else | |
977 | { | |
978 | tgt_bb = NULL; | |
979 | tgt_edge = NULL; | |
980 | FOR_EACH_EDGE (e, ei, header->preds) | |
981 | { | |
982 | if (!e->aux) | |
983 | { | |
984 | if (e == latch) | |
985 | continue; | |
986 | ||
987 | /* If latch is not threaded, and there is a header | |
988 | edge that is not threaded, we would create loop | |
989 | with multiple entries. */ | |
990 | goto fail; | |
991 | } | |
992 | ||
da81e0c5 | 993 | if (THREAD_TARGET2 (e)) |
994 | goto fail; | |
eb31063a | 995 | tgt_edge = THREAD_TARGET (e); |
7e0311ae | 996 | atgt_bb = tgt_edge->dest; |
997 | if (!tgt_bb) | |
998 | tgt_bb = atgt_bb; | |
999 | /* Two targets of threading would make us create loop | |
1000 | with multiple entries. */ | |
1001 | else if (tgt_bb != atgt_bb) | |
1002 | goto fail; | |
1003 | } | |
1004 | ||
1005 | if (!tgt_bb) | |
1006 | { | |
1007 | /* There are no threading requests. */ | |
1008 | return false; | |
1009 | } | |
1010 | ||
1011 | /* Redirecting to empty loop latch is useless. */ | |
1012 | if (tgt_bb == loop->latch | |
1013 | && empty_block_p (loop->latch)) | |
1014 | goto fail; | |
1015 | } | |
1016 | ||
1017 | /* The target block must dominate the loop latch, otherwise we would be | |
1018 | creating a subloop. */ | |
1019 | domst = determine_bb_domination_status (loop, tgt_bb); | |
1020 | if (domst == DOMST_NONDOMINATING) | |
1021 | goto fail; | |
1022 | if (domst == DOMST_LOOP_BROKEN) | |
1023 | { | |
1024 | /* If the loop ceased to exist, mark it as such, and thread through its | |
1025 | original header. */ | |
1026 | loop->header = NULL; | |
1027 | loop->latch = NULL; | |
79f958cb | 1028 | loops_state_set (LOOPS_NEED_FIXUP); |
7e0311ae | 1029 | return thread_block (header, false); |
1030 | } | |
1031 | ||
1032 | if (tgt_bb->loop_father->header == tgt_bb) | |
1033 | { | |
1034 | /* If the target of the threading is a header of a subloop, we need | |
1035 | to create a preheader for it, so that the headers of the two loops | |
1036 | do not merge. */ | |
1037 | if (EDGE_COUNT (tgt_bb->preds) > 2) | |
1038 | { | |
1039 | tgt_bb = create_preheader (tgt_bb->loop_father, 0); | |
1040 | gcc_assert (tgt_bb != NULL); | |
1041 | } | |
1042 | else | |
1043 | tgt_bb = split_edge (tgt_edge); | |
1044 | } | |
48e1416a | 1045 | |
7e0311ae | 1046 | if (latch->aux) |
1047 | { | |
6eb99d8a | 1048 | basic_block *bblocks; |
1049 | unsigned nblocks, i; | |
1050 | ||
35c67c83 | 1051 | /* First handle the case latch edge is redirected. We are copying |
1052 | the loop header but not creating a multiple entry loop. Make the | |
1053 | cfg manipulation code aware of that fact. */ | |
1054 | set_loop_copy (loop, loop); | |
7e0311ae | 1055 | loop->latch = thread_single_edge (latch); |
35c67c83 | 1056 | set_loop_copy (loop, NULL); |
7e0311ae | 1057 | gcc_assert (single_succ (loop->latch) == tgt_bb); |
1058 | loop->header = tgt_bb; | |
1059 | ||
6eb99d8a | 1060 | /* Remove the new pre-header blocks from our loop. */ |
1061 | bblocks = XCNEWVEC (basic_block, loop->num_nodes); | |
1062 | nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p, | |
1063 | bblocks, loop->num_nodes, tgt_bb); | |
1064 | for (i = 0; i < nblocks; i++) | |
c99897b6 | 1065 | if (bblocks[i]->loop_father == loop) |
1066 | { | |
1067 | remove_bb_from_loops (bblocks[i]); | |
1068 | add_bb_to_loop (bblocks[i], loop_outer (loop)); | |
1069 | } | |
6eb99d8a | 1070 | free (bblocks); |
1071 | ||
bb722af4 | 1072 | /* If the new header has multiple latches mark it so. */ |
1073 | FOR_EACH_EDGE (e, ei, loop->header->preds) | |
1074 | if (e->src->loop_father == loop | |
1075 | && e->src != loop->latch) | |
1076 | { | |
1077 | loop->latch = NULL; | |
1078 | loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES); | |
1079 | } | |
1080 | ||
6eb99d8a | 1081 | /* Cancel remaining threading requests that would make the |
1082 | loop a multiple entry loop. */ | |
1083 | FOR_EACH_EDGE (e, ei, header->preds) | |
1084 | { | |
1085 | edge e2; | |
bb722af4 | 1086 | |
6eb99d8a | 1087 | if (e->aux == NULL) |
1088 | continue; | |
1089 | ||
1090 | if (THREAD_TARGET2 (e)) | |
1091 | e2 = THREAD_TARGET2 (e); | |
1092 | else | |
1093 | e2 = THREAD_TARGET (e); | |
1094 | ||
1095 | if (e->src->loop_father != e2->dest->loop_father | |
1096 | && e2->dest != loop->header) | |
1097 | { | |
1098 | free (e->aux); | |
1099 | e->aux = NULL; | |
1100 | } | |
1101 | } | |
1102 | ||
7e0311ae | 1103 | /* Thread the remaining edges through the former header. */ |
1104 | thread_block (header, false); | |
1105 | } | |
1106 | else | |
1107 | { | |
1108 | basic_block new_preheader; | |
1109 | ||
1110 | /* Now consider the case entry edges are redirected to the new entry | |
1111 | block. Remember one entry edge, so that we can find the new | |
eb31063a | 1112 | preheader (its destination after threading). */ |
7e0311ae | 1113 | FOR_EACH_EDGE (e, ei, header->preds) |
1114 | { | |
1115 | if (e->aux) | |
1116 | break; | |
1117 | } | |
1118 | ||
1119 | /* The duplicate of the header is the new preheader of the loop. Ensure | |
1120 | that it is placed correctly in the loop hierarchy. */ | |
96c90e5e | 1121 | set_loop_copy (loop, loop_outer (loop)); |
7e0311ae | 1122 | |
1123 | thread_block (header, false); | |
96c90e5e | 1124 | set_loop_copy (loop, NULL); |
7e0311ae | 1125 | new_preheader = e->dest; |
1126 | ||
1127 | /* Create the new latch block. This is always necessary, as the latch | |
1128 | must have only a single successor, but the original header had at | |
1129 | least two successors. */ | |
1130 | loop->latch = NULL; | |
1131 | mfb_kj_edge = single_succ_edge (new_preheader); | |
1132 | loop->header = mfb_kj_edge->dest; | |
1133 | latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL); | |
1134 | loop->header = latch->dest; | |
1135 | loop->latch = latch->src; | |
1136 | } | |
48e1416a | 1137 | |
7e0311ae | 1138 | return true; |
1139 | ||
1140 | fail: | |
1141 | /* We failed to thread anything. Cancel the requests. */ | |
1142 | FOR_EACH_EDGE (e, ei, header->preds) | |
1143 | { | |
eb31063a | 1144 | free (e->aux); |
7e0311ae | 1145 | e->aux = NULL; |
1146 | } | |
1147 | return false; | |
1148 | } | |
1149 | ||
b99a7d6d | 1150 | /* E1 and E2 are edges into the same basic block. Return TRUE if the |
1151 | PHI arguments associated with those edges are equal or there are no | |
1152 | PHI arguments, otherwise return FALSE. */ | |
1153 | ||
1154 | static bool | |
1155 | phi_args_equal_on_edges (edge e1, edge e2) | |
1156 | { | |
1157 | gimple_stmt_iterator gsi; | |
1158 | int indx1 = e1->dest_idx; | |
1159 | int indx2 = e2->dest_idx; | |
1160 | ||
1161 | for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1162 | { | |
1163 | gimple phi = gsi_stmt (gsi); | |
1164 | ||
1165 | if (!operand_equal_p (gimple_phi_arg_def (phi, indx1), | |
1166 | gimple_phi_arg_def (phi, indx2), 0)) | |
1167 | return false; | |
1168 | } | |
1169 | return true; | |
1170 | } | |
1171 | ||
3cebc9d2 | 1172 | /* Walk through the registered jump threads and convert them into a |
334ec2d8 | 1173 | form convenient for this pass. |
3cebc9d2 | 1174 | |
1175 | Any block which has incoming edges threaded to outgoing edges | |
1176 | will have its entry in THREADED_BLOCK set. | |
a8046f60 | 1177 | |
3cebc9d2 | 1178 | Any threaded edge will have its new outgoing edge stored in the |
1179 | original edge's AUX field. | |
a8046f60 | 1180 | |
3cebc9d2 | 1181 | This form avoids the need to walk all the edges in the CFG to |
1182 | discover blocks which need processing and avoids unnecessary | |
1183 | hash table lookups to map from threaded edge to new target. */ | |
a8046f60 | 1184 | |
3cebc9d2 | 1185 | static void |
1186 | mark_threaded_blocks (bitmap threaded_blocks) | |
1187 | { | |
1188 | unsigned int i; | |
7e0311ae | 1189 | bitmap_iterator bi; |
1190 | bitmap tmp = BITMAP_ALLOC (NULL); | |
1191 | basic_block bb; | |
1192 | edge e; | |
1193 | edge_iterator ei; | |
3cebc9d2 | 1194 | |
1f3976e7 | 1195 | /* It is possible to have jump threads in which one is a subpath |
1196 | of the other. ie, (A, B), (B, C), (C, D) where B is a joiner | |
1197 | block and (B, C), (C, D) where no joiner block exists. | |
1198 | ||
1199 | When this occurs ignore the jump thread request with the joiner | |
1200 | block. It's totally subsumed by the simpler jump thread request. | |
1201 | ||
1202 | This results in less block copying, simpler CFGs. More improtantly, | |
1203 | when we duplicate the joiner block, B, in this case we will create | |
1204 | a new threading opportunity that we wouldn't be able to optimize | |
1205 | until the next jump threading iteration. | |
1206 | ||
1207 | So first convert the jump thread requests which do not require a | |
1208 | joiner block. */ | |
f1f41a6c | 1209 | for (i = 0; i < threaded_edges.length (); i += 3) |
3cebc9d2 | 1210 | { |
f1f41a6c | 1211 | edge e = threaded_edges[i]; |
3cebc9d2 | 1212 | |
1f3976e7 | 1213 | if (threaded_edges[i + 2] == NULL) |
1214 | { | |
1215 | edge *x = XNEWVEC (edge, 2); | |
1216 | ||
1217 | e->aux = x; | |
1218 | THREAD_TARGET (e) = threaded_edges[i + 1]; | |
1219 | THREAD_TARGET2 (e) = NULL; | |
1220 | bitmap_set_bit (tmp, e->dest->index); | |
1221 | } | |
7e0311ae | 1222 | } |
1223 | ||
1f3976e7 | 1224 | |
1225 | /* Now iterate again, converting cases where we threaded through | |
1226 | a joiner block, but ignoring those where we have already | |
1227 | threaded through the joiner block. */ | |
1228 | for (i = 0; i < threaded_edges.length (); i += 3) | |
1229 | { | |
1230 | edge e = threaded_edges[i]; | |
1231 | ||
1232 | if (threaded_edges[i + 2] != NULL | |
1233 | && threaded_edges[i + 1]->aux == NULL) | |
1234 | { | |
1235 | edge *x = XNEWVEC (edge, 2); | |
1236 | ||
1237 | e->aux = x; | |
1238 | THREAD_TARGET (e) = threaded_edges[i + 1]; | |
1239 | THREAD_TARGET2 (e) = threaded_edges[i + 2]; | |
1240 | bitmap_set_bit (tmp, e->dest->index); | |
1241 | } | |
1242 | } | |
1243 | ||
b99a7d6d | 1244 | /* If we have a joiner block (J) which has two successors S1 and S2 and |
1245 | we are threading though S1 and the final destination of the thread | |
1246 | is S2, then we must verify that any PHI nodes in S2 have the same | |
1247 | PHI arguments for the edge J->S2 and J->S1->...->S2. | |
1248 | ||
1249 | We used to detect this prior to registering the jump thread, but | |
1250 | that prohibits propagation of edge equivalences into non-dominated | |
1251 | PHI nodes as the equivalency test might occur before propagation. | |
1252 | ||
1253 | This works for now, but will need improvement as part of the FSA | |
1254 | optimization. | |
1255 | ||
1256 | Note since we've moved the thread request data to the edges, | |
1257 | we have to iterate on those rather than the threaded_edges vector. */ | |
1258 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1259 | { | |
1260 | bb = BASIC_BLOCK (i); | |
1261 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1262 | { | |
1263 | if (e->aux) | |
1264 | { | |
1265 | bool have_joiner = THREAD_TARGET2 (e) != NULL; | |
1266 | ||
1267 | if (have_joiner) | |
1268 | { | |
1269 | basic_block joiner = e->dest; | |
1270 | edge final_edge = THREAD_TARGET2 (e); | |
1271 | basic_block final_dest = final_edge->dest; | |
1272 | edge e2 = find_edge (joiner, final_dest); | |
1273 | ||
1274 | if (e2 && !phi_args_equal_on_edges (e2, final_edge)) | |
1275 | { | |
1276 | free (e->aux); | |
1277 | e->aux = NULL; | |
1278 | } | |
1279 | } | |
1280 | } | |
1281 | } | |
1282 | } | |
1283 | ||
1f3976e7 | 1284 | |
7e0311ae | 1285 | /* If optimizing for size, only thread through block if we don't have |
1286 | to duplicate it or it's an otherwise empty redirection block. */ | |
0bfd8d5c | 1287 | if (optimize_function_for_size_p (cfun)) |
7e0311ae | 1288 | { |
1289 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1290 | { | |
1291 | bb = BASIC_BLOCK (i); | |
1292 | if (EDGE_COUNT (bb->preds) > 1 | |
1293 | && !redirection_block_p (bb)) | |
1294 | { | |
1295 | FOR_EACH_EDGE (e, ei, bb->preds) | |
eb31063a | 1296 | { |
1297 | free (e->aux); | |
1298 | e->aux = NULL; | |
1299 | } | |
7e0311ae | 1300 | } |
1301 | else | |
1302 | bitmap_set_bit (threaded_blocks, i); | |
1303 | } | |
3cebc9d2 | 1304 | } |
7e0311ae | 1305 | else |
1306 | bitmap_copy (threaded_blocks, tmp); | |
1307 | ||
1308 | BITMAP_FREE(tmp); | |
3cebc9d2 | 1309 | } |
1310 | ||
1311 | ||
1312 | /* Walk through all blocks and thread incoming edges to the appropriate | |
1313 | outgoing edge for each edge pair recorded in THREADED_EDGES. | |
a8046f60 | 1314 | |
1315 | It is the caller's responsibility to fix the dominance information | |
1316 | and rewrite duplicated SSA_NAMEs back into SSA form. | |
1317 | ||
7e0311ae | 1318 | If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through |
1319 | loop headers if it does not simplify the loop. | |
1320 | ||
dac49aa5 | 1321 | Returns true if one or more edges were threaded, false otherwise. */ |
a8046f60 | 1322 | |
1323 | bool | |
7e0311ae | 1324 | thread_through_all_blocks (bool may_peel_loop_headers) |
a8046f60 | 1325 | { |
a8046f60 | 1326 | bool retval = false; |
7ea47fbd | 1327 | unsigned int i; |
1328 | bitmap_iterator bi; | |
3cebc9d2 | 1329 | bitmap threaded_blocks; |
7e0311ae | 1330 | struct loop *loop; |
1331 | loop_iterator li; | |
3cebc9d2 | 1332 | |
7a3bf727 | 1333 | /* We must know about loops in order to preserve them. */ |
1334 | gcc_assert (current_loops != NULL); | |
1335 | ||
f1f41a6c | 1336 | if (!threaded_edges.exists ()) |
3cebc9d2 | 1337 | return false; |
a8046f60 | 1338 | |
3cebc9d2 | 1339 | threaded_blocks = BITMAP_ALLOC (NULL); |
5236b8bb | 1340 | memset (&thread_stats, 0, sizeof (thread_stats)); |
388d1fc1 | 1341 | |
3cebc9d2 | 1342 | mark_threaded_blocks (threaded_blocks); |
1343 | ||
96c90e5e | 1344 | initialize_original_copy_tables (); |
7e0311ae | 1345 | |
1346 | /* First perform the threading requests that do not affect | |
1347 | loop structure. */ | |
7ea47fbd | 1348 | EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi) |
a8046f60 | 1349 | { |
7ea47fbd | 1350 | basic_block bb = BASIC_BLOCK (i); |
1351 | ||
1352 | if (EDGE_COUNT (bb->preds) > 0) | |
7e0311ae | 1353 | retval |= thread_block (bb, true); |
1354 | } | |
1355 | ||
1356 | /* Then perform the threading through loop headers. We start with the | |
1357 | innermost loop, so that the changes in cfg we perform won't affect | |
1358 | further threading. */ | |
7a3bf727 | 1359 | FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) |
7e0311ae | 1360 | { |
7a3bf727 | 1361 | if (!loop->header |
1362 | || !bitmap_bit_p (threaded_blocks, loop->header->index)) | |
1363 | continue; | |
7e0311ae | 1364 | |
7a3bf727 | 1365 | retval |= thread_through_loop_header (loop, may_peel_loop_headers); |
a8046f60 | 1366 | } |
388d1fc1 | 1367 | |
581f8050 | 1368 | statistics_counter_event (cfun, "Jumps threaded", |
1369 | thread_stats.num_threaded_edges); | |
5236b8bb | 1370 | |
96c90e5e | 1371 | free_original_copy_tables (); |
1372 | ||
3cebc9d2 | 1373 | BITMAP_FREE (threaded_blocks); |
1374 | threaded_blocks = NULL; | |
f1f41a6c | 1375 | threaded_edges.release (); |
7e0311ae | 1376 | |
eb2a640e | 1377 | if (retval) |
f24ec26f | 1378 | loops_state_set (LOOPS_NEED_FIXUP); |
eb2a640e | 1379 | |
a8046f60 | 1380 | return retval; |
1381 | } | |
3cebc9d2 | 1382 | |
1383 | /* Register a jump threading opportunity. We queue up all the jump | |
1384 | threading opportunities discovered by a pass and update the CFG | |
1385 | and SSA form all at once. | |
1386 | ||
f0b5f617 | 1387 | E is the edge we can thread, E2 is the new target edge, i.e., we |
3cebc9d2 | 1388 | are effectively recording that E->dest can be changed to E2->dest |
1389 | after fixing the SSA graph. */ | |
1390 | ||
1391 | void | |
631d940c | 1392 | register_jump_thread (vec<edge> path, bool through_joiner) |
3cebc9d2 | 1393 | { |
5411af4e | 1394 | /* Convert PATH into 3 edge representation we've been using. This |
1395 | is temporary until we convert this file to use a path representation | |
1396 | throughout. */ | |
1397 | edge e = path[0]; | |
1398 | edge e2 = path[1]; | |
24ae5f10 | 1399 | edge e3; |
5411af4e | 1400 | |
631d940c | 1401 | if (!through_joiner) |
5411af4e | 1402 | e3 = NULL; |
1403 | else | |
e2914898 | 1404 | e3 = path.last (); |
5411af4e | 1405 | |
eb31063a | 1406 | /* This can occur if we're jumping to a constant address or |
1407 | or something similar. Just get out now. */ | |
1408 | if (e2 == NULL) | |
1409 | return; | |
1410 | ||
631d940c | 1411 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1412 | { | |
1413 | unsigned int i; | |
1414 | ||
1415 | fprintf (dump_file, | |
1416 | " Registering jump thread %s:", | |
1417 | through_joiner ? "(through joiner block)" : ""); | |
1418 | ||
1419 | for (i = 0; i < path.length (); i++) | |
1420 | fprintf (dump_file, " (%d, %d); ", | |
1421 | path[i]->src->index, path[i]->dest->index); | |
1422 | fputc ('\n', dump_file); | |
1423 | } | |
1424 | ||
f1f41a6c | 1425 | if (!threaded_edges.exists ()) |
1426 | threaded_edges.create (15); | |
3cebc9d2 | 1427 | |
f1f41a6c | 1428 | threaded_edges.safe_push (e); |
1429 | threaded_edges.safe_push (e2); | |
1430 | threaded_edges.safe_push (e3); | |
3cebc9d2 | 1431 | } |