]>
Commit | Line | Data |
---|---|---|
a8046f60 | 1 | /* Thread edges through blocks and update the control flow and SSA graphs. |
711789cc | 2 | Copyright (C) 2004-2013 Free Software Foundation, Inc. |
a8046f60 | 3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8c4c00c1 | 8 | the Free Software Foundation; either version 3, or (at your option) |
a8046f60 | 9 | any later version. |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ | |
a8046f60 | 19 | |
20 | #include "config.h" | |
21 | #include "system.h" | |
22 | #include "coretypes.h" | |
a8046f60 | 23 | #include "tree.h" |
24 | #include "flags.h" | |
a8046f60 | 25 | #include "basic-block.h" |
a8046f60 | 26 | #include "function.h" |
073c1fd5 | 27 | #include "gimple.h" |
dcf1a1ec | 28 | #include "gimple-iterator.h" |
073c1fd5 | 29 | #include "gimple-ssa.h" |
30 | #include "tree-phinodes.h" | |
69ee5dbb | 31 | #include "tree-ssa.h" |
0c5b289a | 32 | #include "tree-ssa-threadupdate.h" |
fc54aba7 | 33 | #include "ssa-iterators.h" |
b9ed1410 | 34 | #include "dumpfile.h" |
388d1fc1 | 35 | #include "cfgloop.h" |
2b15d2ba | 36 | #include "hash-table.h" |
a3724f9d | 37 | #include "dbgcnt.h" |
a8046f60 | 38 | |
39 | /* Given a block B, update the CFG and SSA graph to reflect redirecting | |
40 | one or more in-edges to B to instead reach the destination of an | |
41 | out-edge from B while preserving any side effects in B. | |
42 | ||
0c6d8c36 | 43 | i.e., given A->B and B->C, change A->B to be A->C yet still preserve the |
a8046f60 | 44 | side effects of executing B. |
45 | ||
46 | 1. Make a copy of B (including its outgoing edges and statements). Call | |
47 | the copy B'. Note B' has no incoming edges or PHIs at this time. | |
48 | ||
49 | 2. Remove the control statement at the end of B' and all outgoing edges | |
50 | except B'->C. | |
51 | ||
52 | 3. Add a new argument to each PHI in C with the same value as the existing | |
53 | argument associated with edge B->C. Associate the new PHI arguments | |
54 | with the edge B'->C. | |
55 | ||
56 | 4. For each PHI in B, find or create a PHI in B' with an identical | |
7a635e9c | 57 | PHI_RESULT. Add an argument to the PHI in B' which has the same |
a8046f60 | 58 | value as the PHI in B associated with the edge A->B. Associate |
59 | the new argument in the PHI in B' with the edge A->B. | |
60 | ||
61 | 5. Change the edge A->B to A->B'. | |
62 | ||
63 | 5a. This automatically deletes any PHI arguments associated with the | |
64 | edge A->B in B. | |
65 | ||
66 | 5b. This automatically associates each new argument added in step 4 | |
67 | with the edge A->B'. | |
68 | ||
69 | 6. Repeat for other incoming edges into B. | |
70 | ||
71 | 7. Put the duplicated resources in B and all the B' blocks into SSA form. | |
72 | ||
73 | Note that block duplication can be minimized by first collecting the | |
f0b5f617 | 74 | set of unique destination blocks that the incoming edges should |
255a8494 | 75 | be threaded to. |
76 | ||
afe75331 | 77 | We reduce the number of edges and statements we create by not copying all |
78 | the outgoing edges and the control statement in step #1. We instead create | |
79 | a template block without the outgoing edges and duplicate the template. | |
a8046f60 | 80 | |
afe75331 | 81 | Another case this code handles is threading through a "joiner" block. In |
82 | this case, we do not know the destination of the joiner block, but one | |
83 | of the outgoing edges from the joiner block leads to a threadable path. This | |
84 | case largely works as outlined above, except the duplicate of the joiner | |
85 | block still contains a full set of outgoing edges and its control statement. | |
86 | We just redirect one of its outgoing edges to our jump threading path. */ | |
778182c1 | 87 | |
88 | ||
89 | /* Steps #5 and #6 of the above algorithm are best implemented by walking | |
90 | all the incoming edges which thread to the same destination edge at | |
91 | the same time. That avoids lots of table lookups to get information | |
92 | for the destination edge. | |
93 | ||
94 | To realize that implementation we create a list of incoming edges | |
95 | which thread to the same outgoing edge. Thus to implement steps | |
96 | #5 and #6 we traverse our hash table of outgoing edge information. | |
97 | For each entry we walk the list of incoming edges which thread to | |
98 | the current outgoing edge. */ | |
99 | ||
100 | struct el | |
101 | { | |
102 | edge e; | |
103 | struct el *next; | |
104 | }; | |
a8046f60 | 105 | |
106 | /* Main data structure recording information regarding B's duplicate | |
107 | blocks. */ | |
108 | ||
778182c1 | 109 | /* We need to efficiently record the unique thread destinations of this |
110 | block and specific information associated with those destinations. We | |
111 | may have many incoming edges threaded to the same outgoing edge. This | |
c5d4a10b | 112 | can be naturally implemented with a hash table. */ |
778182c1 | 113 | |
494bbaae | 114 | struct redirection_data : typed_free_remove<redirection_data> |
a8046f60 | 115 | { |
11af02d8 | 116 | /* We support wiring up two block duplicates in a jump threading path. |
117 | ||
118 | One is a normal block copy where we remove the control statement | |
119 | and wire up its single remaining outgoing edge to the thread path. | |
120 | ||
121 | The other is a joiner block where we leave the control statement | |
1b83778e | 122 | in place, but wire one of the outgoing edges to a thread path. |
11af02d8 | 123 | |
124 | In theory we could have multiple block duplicates in a jump | |
125 | threading path, but I haven't tried that. | |
126 | ||
127 | The duplicate blocks appear in this array in the same order in | |
128 | which they appear in the jump thread path. */ | |
129 | basic_block dup_blocks[2]; | |
a8046f60 | 130 | |
5fe6149c | 131 | /* The jump threading path. */ |
132 | vec<jump_thread_edge *> *path; | |
778182c1 | 133 | |
5fe6149c | 134 | /* A list of incoming edges which we want to thread to the |
135 | same path. */ | |
778182c1 | 136 | struct el *incoming_edges; |
494bbaae | 137 | |
138 | /* hash_table support. */ | |
c580da87 | 139 | typedef redirection_data value_type; |
140 | typedef redirection_data compare_type; | |
141 | static inline hashval_t hash (const value_type *); | |
142 | static inline int equal (const value_type *, const compare_type *); | |
a8046f60 | 143 | }; |
144 | ||
5fe6149c | 145 | /* Simple hashing function. For any given incoming edge E, we're going |
146 | to be most concerned with the final destination of its jump thread | |
147 | path. So hash on the block index of the final edge in the path. */ | |
148 | ||
494bbaae | 149 | inline hashval_t |
c580da87 | 150 | redirection_data::hash (const value_type *p) |
494bbaae | 151 | { |
5fe6149c | 152 | vec<jump_thread_edge *> *path = p->path; |
153 | return path->last ()->e->dest->index; | |
494bbaae | 154 | } |
155 | ||
5fe6149c | 156 | /* Given two hash table entries, return true if they have the same |
157 | jump threading path. */ | |
494bbaae | 158 | inline int |
c580da87 | 159 | redirection_data::equal (const value_type *p1, const compare_type *p2) |
494bbaae | 160 | { |
5fe6149c | 161 | vec<jump_thread_edge *> *path1 = p1->path; |
162 | vec<jump_thread_edge *> *path2 = p2->path; | |
163 | ||
164 | if (path1->length () != path2->length ()) | |
165 | return false; | |
166 | ||
167 | for (unsigned int i = 1; i < path1->length (); i++) | |
168 | { | |
169 | if ((*path1)[i]->type != (*path2)[i]->type | |
170 | || (*path1)[i]->e != (*path2)[i]->e) | |
171 | return false; | |
172 | } | |
173 | ||
174 | return true; | |
494bbaae | 175 | } |
176 | ||
778182c1 | 177 | /* Data structure of information to pass to hash table traversal routines. */ |
2b15d2ba | 178 | struct ssa_local_info_t |
778182c1 | 179 | { |
180 | /* The current block we are working on. */ | |
181 | basic_block bb; | |
182 | ||
11af02d8 | 183 | /* We only create a template block for the first duplicated block in a |
184 | jump threading path as we may need many duplicates of that block. | |
185 | ||
186 | The second duplicate block in a path is specific to that path. Creating | |
187 | and sharing a template for that block is considerably more difficult. */ | |
778182c1 | 188 | basic_block template_block; |
388d1fc1 | 189 | |
190 | /* TRUE if we thread one or more jumps, FALSE otherwise. */ | |
191 | bool jumps_threaded; | |
778182c1 | 192 | }; |
a3d0fd80 | 193 | |
3cebc9d2 | 194 | /* Passes which use the jump threading code register jump threading |
195 | opportunities as they are discovered. We keep the registered | |
196 | jump threading opportunities in this vector as edge pairs | |
197 | (original_edge, target_edge). */ | |
f2981b08 | 198 | static vec<vec<jump_thread_edge *> *> paths; |
3cebc9d2 | 199 | |
eb31063a | 200 | /* When we start updating the CFG for threading, data necessary for jump |
201 | threading is attached to the AUX field for the incoming edge. Use these | |
202 | macros to access the underlying structure attached to the AUX field. */ | |
f2981b08 | 203 | #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux) |
3cebc9d2 | 204 | |
5236b8bb | 205 | /* Jump threading statistics. */ |
206 | ||
207 | struct thread_stats_d | |
208 | { | |
209 | unsigned long num_threaded_edges; | |
210 | }; | |
211 | ||
212 | struct thread_stats_d thread_stats; | |
213 | ||
214 | ||
f582bb6c | 215 | /* Remove the last statement in block BB if it is a control statement |
216 | Also remove all outgoing edges except the edge which reaches DEST_BB. | |
217 | If DEST_BB is NULL, then remove all outgoing edges. */ | |
a8046f60 | 218 | |
219 | static void | |
f582bb6c | 220 | remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) |
a8046f60 | 221 | { |
75a70cf9 | 222 | gimple_stmt_iterator gsi; |
cd665a06 | 223 | edge e; |
224 | edge_iterator ei; | |
a8046f60 | 225 | |
75a70cf9 | 226 | gsi = gsi_last_bb (bb); |
a8046f60 | 227 | |
f582bb6c | 228 | /* If the duplicate ends with a control statement, then remove it. |
a8046f60 | 229 | |
f582bb6c | 230 | Note that if we are duplicating the template block rather than the |
231 | original basic block, then the duplicate might not have any real | |
232 | statements in it. */ | |
75a70cf9 | 233 | if (!gsi_end_p (gsi) |
234 | && gsi_stmt (gsi) | |
235 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND | |
236 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO | |
237 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH)) | |
238 | gsi_remove (&gsi, true); | |
a8046f60 | 239 | |
cd665a06 | 240 | for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ) |
a8046f60 | 241 | { |
a8046f60 | 242 | if (e->dest != dest_bb) |
0891994d | 243 | remove_edge (e); |
cd665a06 | 244 | else |
245 | ei_next (&ei); | |
a8046f60 | 246 | } |
a8046f60 | 247 | } |
248 | ||
11af02d8 | 249 | /* Create a duplicate of BB. Record the duplicate block in an array |
250 | indexed by COUNT stored in RD. */ | |
a8046f60 | 251 | |
252 | static void | |
11af02d8 | 253 | create_block_for_threading (basic_block bb, |
254 | struct redirection_data *rd, | |
255 | unsigned int count) | |
a8046f60 | 256 | { |
eb31063a | 257 | edge_iterator ei; |
258 | edge e; | |
259 | ||
a8046f60 | 260 | /* We can use the generic block duplication code and simply remove |
261 | the stuff we do not need. */ | |
11af02d8 | 262 | rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL); |
a8046f60 | 263 | |
11af02d8 | 264 | FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs) |
eb31063a | 265 | e->aux = NULL; |
266 | ||
615dd397 | 267 | /* Zero out the profile, since the block is unreachable for now. */ |
11af02d8 | 268 | rd->dup_blocks[count]->frequency = 0; |
269 | rd->dup_blocks[count]->count = 0; | |
a8046f60 | 270 | } |
271 | ||
2b15d2ba | 272 | /* Main data structure to hold information for duplicates of BB. */ |
273 | ||
494bbaae | 274 | static hash_table <redirection_data> redirection_data; |
2b15d2ba | 275 | |
778182c1 | 276 | /* Given an outgoing edge E lookup and return its entry in our hash table. |
277 | ||
278 | If INSERT is true, then we insert the entry into the hash table if | |
279 | it is not already present. INCOMING_EDGE is added to the list of incoming | |
280 | edges associated with E in the hash table. */ | |
281 | ||
282 | static struct redirection_data * | |
da81e0c5 | 283 | lookup_redirection_data (edge e, enum insert_option insert) |
778182c1 | 284 | { |
2b15d2ba | 285 | struct redirection_data **slot; |
778182c1 | 286 | struct redirection_data *elt; |
f2981b08 | 287 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
778182c1 | 288 | |
289 | /* Build a hash table element so we can see if E is already | |
290 | in the table. */ | |
4c36ffe6 | 291 | elt = XNEW (struct redirection_data); |
5fe6149c | 292 | elt->path = path; |
11af02d8 | 293 | elt->dup_blocks[0] = NULL; |
294 | elt->dup_blocks[1] = NULL; | |
778182c1 | 295 | elt->incoming_edges = NULL; |
296 | ||
2b15d2ba | 297 | slot = redirection_data.find_slot (elt, insert); |
778182c1 | 298 | |
299 | /* This will only happen if INSERT is false and the entry is not | |
300 | in the hash table. */ | |
301 | if (slot == NULL) | |
302 | { | |
303 | free (elt); | |
304 | return NULL; | |
305 | } | |
306 | ||
307 | /* This will only happen if E was not in the hash table and | |
308 | INSERT is true. */ | |
309 | if (*slot == NULL) | |
310 | { | |
2b15d2ba | 311 | *slot = elt; |
4c36ffe6 | 312 | elt->incoming_edges = XNEW (struct el); |
da81e0c5 | 313 | elt->incoming_edges->e = e; |
778182c1 | 314 | elt->incoming_edges->next = NULL; |
315 | return elt; | |
316 | } | |
317 | /* E was in the hash table. */ | |
318 | else | |
319 | { | |
320 | /* Free ELT as we do not need it anymore, we will extract the | |
321 | relevant entry from the hash table itself. */ | |
322 | free (elt); | |
323 | ||
324 | /* Get the entry stored in the hash table. */ | |
2b15d2ba | 325 | elt = *slot; |
778182c1 | 326 | |
327 | /* If insertion was requested, then we need to add INCOMING_EDGE | |
328 | to the list of incoming edges associated with E. */ | |
329 | if (insert) | |
330 | { | |
559685be | 331 | struct el *el = XNEW (struct el); |
778182c1 | 332 | el->next = elt->incoming_edges; |
da81e0c5 | 333 | el->e = e; |
778182c1 | 334 | elt->incoming_edges = el; |
335 | } | |
336 | ||
337 | return elt; | |
338 | } | |
339 | } | |
340 | ||
fc54aba7 | 341 | /* Similar to copy_phi_args, except that the PHI arg exists, it just |
342 | does not have a value associated with it. */ | |
343 | ||
344 | static void | |
345 | copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e) | |
346 | { | |
347 | int src_idx = src_e->dest_idx; | |
348 | int tgt_idx = tgt_e->dest_idx; | |
349 | ||
350 | /* Iterate over each PHI in e->dest. */ | |
351 | for (gimple_stmt_iterator gsi = gsi_start_phis (src_e->dest), | |
352 | gsi2 = gsi_start_phis (tgt_e->dest); | |
353 | !gsi_end_p (gsi); | |
354 | gsi_next (&gsi), gsi_next (&gsi2)) | |
355 | { | |
356 | gimple src_phi = gsi_stmt (gsi); | |
357 | gimple dest_phi = gsi_stmt (gsi2); | |
358 | tree val = gimple_phi_arg_def (src_phi, src_idx); | |
359 | source_location locus = gimple_phi_arg_location (src_phi, src_idx); | |
360 | ||
361 | SET_PHI_ARG_DEF (dest_phi, tgt_idx, val); | |
362 | gimple_phi_arg_set_location (dest_phi, tgt_idx, locus); | |
363 | } | |
364 | } | |
365 | ||
da81e0c5 | 366 | /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E. */ |
367 | ||
368 | static void | |
369 | copy_phi_args (basic_block bb, edge src_e, edge tgt_e) | |
370 | { | |
371 | gimple_stmt_iterator gsi; | |
372 | int src_indx = src_e->dest_idx; | |
373 | ||
374 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
375 | { | |
376 | gimple phi = gsi_stmt (gsi); | |
377 | source_location locus = gimple_phi_arg_location (phi, src_indx); | |
60d535d2 | 378 | add_phi_arg (phi, gimple_phi_arg_def (phi, src_indx), tgt_e, locus); |
da81e0c5 | 379 | } |
380 | } | |
381 | ||
382 | /* We have recently made a copy of ORIG_BB, including its outgoing | |
383 | edges. The copy is NEW_BB. Every PHI node in every direct successor of | |
384 | ORIG_BB has a new argument associated with edge from NEW_BB to the | |
385 | successor. Initialize the PHI argument so that it is equal to the PHI | |
386 | argument associated with the edge from ORIG_BB to the successor. */ | |
387 | ||
388 | static void | |
389 | update_destination_phis (basic_block orig_bb, basic_block new_bb) | |
390 | { | |
391 | edge_iterator ei; | |
392 | edge e; | |
393 | ||
394 | FOR_EACH_EDGE (e, ei, orig_bb->succs) | |
395 | { | |
396 | edge e2 = find_edge (new_bb, e->dest); | |
397 | copy_phi_args (e->dest, e, e2); | |
398 | } | |
399 | } | |
400 | ||
778182c1 | 401 | /* Given a duplicate block and its single destination (both stored |
402 | in RD). Create an edge between the duplicate and its single | |
403 | destination. | |
404 | ||
405 | Add an additional argument to any PHI nodes at the single | |
406 | destination. */ | |
407 | ||
408 | static void | |
42b013bc | 409 | create_edge_and_update_destination_phis (struct redirection_data *rd, |
410 | basic_block bb) | |
778182c1 | 411 | { |
5fe6149c | 412 | edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU); |
778182c1 | 413 | |
f9614b84 | 414 | rescan_loop_exit (e, true, false); |
421e19dd | 415 | e->probability = REG_BR_PROB_BASE; |
42b013bc | 416 | e->count = bb->count; |
eb31063a | 417 | |
f2981b08 | 418 | /* We have to copy path -- which means creating a new vector as well |
419 | as all the jump_thread_edge entries. */ | |
5fe6149c | 420 | if (rd->path->last ()->e->aux) |
eb31063a | 421 | { |
5fe6149c | 422 | vec<jump_thread_edge *> *path = THREAD_PATH (rd->path->last ()->e); |
f2981b08 | 423 | vec<jump_thread_edge *> *copy = new vec<jump_thread_edge *> (); |
424 | ||
425 | /* Sadly, the elements of the vector are pointers and need to | |
426 | be copied as well. */ | |
427 | for (unsigned int i = 0; i < path->length (); i++) | |
428 | { | |
429 | jump_thread_edge *x | |
430 | = new jump_thread_edge ((*path)[i]->e, (*path)[i]->type); | |
431 | copy->safe_push (x); | |
432 | } | |
afe75331 | 433 | e->aux = (void *)copy; |
eb31063a | 434 | } |
435 | else | |
436 | { | |
437 | e->aux = NULL; | |
438 | } | |
421e19dd | 439 | |
778182c1 | 440 | /* If there are any PHI nodes at the destination of the outgoing edge |
441 | from the duplicate block, then we will need to add a new argument | |
442 | to them. The argument should have the same value as the argument | |
443 | associated with the outgoing edge stored in RD. */ | |
5fe6149c | 444 | copy_phi_args (e->dest, rd->path->last ()->e, e); |
da81e0c5 | 445 | } |
446 | ||
fc54aba7 | 447 | /* Look through PATH beginning at START and return TRUE if there are |
448 | any additional blocks that need to be duplicated. Otherwise, | |
449 | return FALSE. */ | |
450 | static bool | |
451 | any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path, | |
452 | unsigned int start) | |
453 | { | |
454 | for (unsigned int i = start + 1; i < path->length (); i++) | |
455 | { | |
456 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK | |
457 | || (*path)[i]->type == EDGE_COPY_SRC_BLOCK) | |
458 | return true; | |
459 | } | |
460 | return false; | |
461 | } | |
462 | ||
463 | /* Wire up the outgoing edges from the duplicate blocks and | |
da81e0c5 | 464 | update any PHIs as needed. */ |
2b15d2ba | 465 | void |
466 | ssa_fix_duplicate_block_edges (struct redirection_data *rd, | |
467 | ssa_local_info_t *local_info) | |
da81e0c5 | 468 | { |
f2981b08 | 469 | edge e = rd->incoming_edges->e; |
470 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
471 | ||
fc54aba7 | 472 | for (unsigned int count = 0, i = 1; i < path->length (); i++) |
1b83778e | 473 | { |
fc54aba7 | 474 | /* If we were threading through an joiner block, then we want |
475 | to keep its control statement and redirect an outgoing edge. | |
476 | Else we want to remove the control statement & edges, then create | |
477 | a new outgoing edge. In both cases we may need to update PHIs. */ | |
478 | if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
479 | { | |
480 | edge victim; | |
481 | edge e2; | |
482 | ||
483 | /* This updates the PHIs at the destination of the duplicate | |
484 | block. */ | |
485 | update_destination_phis (local_info->bb, rd->dup_blocks[count]); | |
486 | ||
487 | /* Find the edge from the duplicate block to the block we're | |
488 | threading through. That's the edge we want to redirect. */ | |
489 | victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest); | |
490 | ||
491 | /* If there are no remaining blocks on the path to duplicate, | |
492 | then redirect VICTIM to the final destination of the jump | |
493 | threading path. */ | |
494 | if (!any_remaining_duplicated_blocks (path, i)) | |
495 | { | |
496 | e2 = redirect_edge_and_branch (victim, path->last ()->e->dest); | |
497 | e2->count = path->last ()->e->count; | |
498 | /* If we redirected the edge, then we need to copy PHI arguments | |
559685be | 499 | at the target. If the edge already existed (e2 != victim |
fc54aba7 | 500 | case), then the PHIs in the target already have the correct |
501 | arguments. */ | |
502 | if (e2 == victim) | |
559685be | 503 | copy_phi_args (e2->dest, path->last ()->e, e2); |
fc54aba7 | 504 | } |
505 | else | |
506 | { | |
507 | /* Redirect VICTIM to the next duplicated block in the path. */ | |
508 | e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]); | |
509 | ||
510 | /* We need to update the PHIs in the next duplicated block. We | |
511 | want the new PHI args to have the same value as they had | |
512 | in the source of the next duplicate block. | |
513 | ||
514 | Thus, we need to know which edge we traversed into the | |
515 | source of the duplicate. Furthermore, we may have | |
516 | traversed many edges to reach the source of the duplicate. | |
517 | ||
518 | Walk through the path starting at element I until we | |
519 | hit an edge marked with EDGE_COPY_SRC_BLOCK. We want | |
520 | the edge from the prior element. */ | |
521 | for (unsigned int j = i + 1; j < path->length (); j++) | |
522 | { | |
523 | if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK) | |
524 | { | |
525 | copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2); | |
526 | break; | |
527 | } | |
528 | } | |
529 | } | |
530 | count++; | |
531 | } | |
532 | else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK) | |
533 | { | |
534 | remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL); | |
535 | create_edge_and_update_destination_phis (rd, rd->dup_blocks[count]); | |
536 | if (count == 1) | |
537 | single_succ_edge (rd->dup_blocks[1])->aux = NULL; | |
538 | count++; | |
539 | } | |
778182c1 | 540 | } |
541 | } | |
fc54aba7 | 542 | |
778182c1 | 543 | /* Hash table traversal callback routine to create duplicate blocks. */ |
544 | ||
2b15d2ba | 545 | int |
546 | ssa_create_duplicates (struct redirection_data **slot, | |
547 | ssa_local_info_t *local_info) | |
778182c1 | 548 | { |
2b15d2ba | 549 | struct redirection_data *rd = *slot; |
778182c1 | 550 | |
11af02d8 | 551 | /* The second duplicated block in a jump threading path is specific |
1b83778e | 552 | to the path. So it gets stored in RD rather than in LOCAL_DATA. |
559685be | 553 | |
11af02d8 | 554 | Each time we're called, we have to look through the path and see |
1b83778e | 555 | if a second block needs to be duplicated. |
11af02d8 | 556 | |
557 | Note the search starts with the third edge on the path. The first | |
558 | edge is the incoming edge, the second edge always has its source | |
559 | duplicated. Thus we start our search with the third edge. */ | |
560 | vec<jump_thread_edge *> *path = rd->path; | |
561 | for (unsigned int i = 2; i < path->length (); i++) | |
562 | { | |
563 | if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK | |
564 | || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
565 | { | |
566 | create_block_for_threading ((*path)[i]->e->src, rd, 1); | |
567 | break; | |
568 | } | |
569 | } | |
1b83778e | 570 | |
778182c1 | 571 | /* Create a template block if we have not done so already. Otherwise |
572 | use the template to create a new block. */ | |
573 | if (local_info->template_block == NULL) | |
574 | { | |
11af02d8 | 575 | create_block_for_threading ((*path)[1]->e->src, rd, 0); |
576 | local_info->template_block = rd->dup_blocks[0]; | |
778182c1 | 577 | |
578 | /* We do not create any outgoing edges for the template. We will | |
579 | take care of that in a later traversal. That way we do not | |
580 | create edges that are going to just be deleted. */ | |
581 | } | |
582 | else | |
583 | { | |
11af02d8 | 584 | create_block_for_threading (local_info->template_block, rd, 0); |
778182c1 | 585 | |
586 | /* Go ahead and wire up outgoing edges and update PHIs for the duplicate | |
da81e0c5 | 587 | block. */ |
2b15d2ba | 588 | ssa_fix_duplicate_block_edges (rd, local_info); |
778182c1 | 589 | } |
590 | ||
591 | /* Keep walking the hash table. */ | |
592 | return 1; | |
593 | } | |
594 | ||
595 | /* We did not create any outgoing edges for the template block during | |
596 | block creation. This hash table traversal callback creates the | |
597 | outgoing edge for the template block. */ | |
598 | ||
2b15d2ba | 599 | inline int |
600 | ssa_fixup_template_block (struct redirection_data **slot, | |
601 | ssa_local_info_t *local_info) | |
778182c1 | 602 | { |
2b15d2ba | 603 | struct redirection_data *rd = *slot; |
778182c1 | 604 | |
da81e0c5 | 605 | /* If this is the template block halt the traversal after updating |
606 | it appropriately. | |
607 | ||
608 | If we were threading through an joiner block, then we want | |
609 | to keep its control statement and redirect an outgoing edge. | |
610 | Else we want to remove the control statement & edges, then create | |
611 | a new outgoing edge. In both cases we may need to update PHIs. */ | |
11af02d8 | 612 | if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block) |
778182c1 | 613 | { |
2b15d2ba | 614 | ssa_fix_duplicate_block_edges (rd, local_info); |
778182c1 | 615 | return 0; |
616 | } | |
617 | ||
618 | return 1; | |
619 | } | |
620 | ||
621 | /* Hash table traversal callback to redirect each incoming edge | |
622 | associated with this hash table element to its new destination. */ | |
623 | ||
2b15d2ba | 624 | int |
625 | ssa_redirect_edges (struct redirection_data **slot, | |
626 | ssa_local_info_t *local_info) | |
778182c1 | 627 | { |
2b15d2ba | 628 | struct redirection_data *rd = *slot; |
778182c1 | 629 | struct el *next, *el; |
630 | ||
631 | /* Walk over all the incoming edges associated associated with this | |
632 | hash table entry. */ | |
633 | for (el = rd->incoming_edges; el; el = next) | |
634 | { | |
635 | edge e = el->e; | |
f2981b08 | 636 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
778182c1 | 637 | |
638 | /* Go ahead and free this element from the list. Doing this now | |
639 | avoids the need for another list walk when we destroy the hash | |
640 | table. */ | |
641 | next = el->next; | |
642 | free (el); | |
643 | ||
5236b8bb | 644 | thread_stats.num_threaded_edges++; |
645 | ||
11af02d8 | 646 | if (rd->dup_blocks[0]) |
778182c1 | 647 | { |
648 | edge e2; | |
649 | ||
650 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
651 | fprintf (dump_file, " Threaded jump %d --> %d to %d\n", | |
11af02d8 | 652 | e->src->index, e->dest->index, rd->dup_blocks[0]->index); |
778182c1 | 653 | |
11af02d8 | 654 | rd->dup_blocks[0]->count += e->count; |
a22223e3 | 655 | |
656 | /* Excessive jump threading may make frequencies large enough so | |
657 | the computation overflows. */ | |
11af02d8 | 658 | if (rd->dup_blocks[0]->frequency < BB_FREQ_MAX * 2) |
659 | rd->dup_blocks[0]->frequency += EDGE_FREQUENCY (e); | |
353f9f16 | 660 | |
661 | /* In the case of threading through a joiner block, the outgoing | |
662 | edges from the duplicate block were updated when they were | |
663 | redirected during ssa_fix_duplicate_block_edges. */ | |
f2981b08 | 664 | if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK) |
11af02d8 | 665 | EDGE_SUCC (rd->dup_blocks[0], 0)->count += e->count; |
353f9f16 | 666 | |
667 | /* Redirect the incoming edge (possibly to the joiner block) to the | |
668 | appropriate duplicate block. */ | |
11af02d8 | 669 | e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]); |
7e0311ae | 670 | gcc_assert (e == e2); |
778182c1 | 671 | flush_pending_stmts (e2); |
778182c1 | 672 | } |
eb31063a | 673 | |
674 | /* Go ahead and clear E->aux. It's not needed anymore and failure | |
559685be | 675 | to clear it will cause all kinds of unpleasant problems later. */ |
6d1fdbf9 | 676 | delete_jump_thread_path (path); |
eb31063a | 677 | e->aux = NULL; |
678 | ||
778182c1 | 679 | } |
388d1fc1 | 680 | |
681 | /* Indicate that we actually threaded one or more jumps. */ | |
682 | if (rd->incoming_edges) | |
683 | local_info->jumps_threaded = true; | |
684 | ||
778182c1 | 685 | return 1; |
686 | } | |
687 | ||
aed95130 | 688 | /* Return true if this block has no executable statements other than |
689 | a simple ctrl flow instruction. When the number of outgoing edges | |
690 | is one, this is equivalent to a "forwarder" block. */ | |
691 | ||
692 | static bool | |
47aaf6e6 | 693 | redirection_block_p (basic_block bb) |
aed95130 | 694 | { |
75a70cf9 | 695 | gimple_stmt_iterator gsi; |
aed95130 | 696 | |
697 | /* Advance to the first executable statement. */ | |
75a70cf9 | 698 | gsi = gsi_start_bb (bb); |
699 | while (!gsi_end_p (gsi) | |
559685be | 700 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL |
9845d120 | 701 | || is_gimple_debug (gsi_stmt (gsi)) |
559685be | 702 | || gimple_nop_p (gsi_stmt (gsi)))) |
75a70cf9 | 703 | gsi_next (&gsi); |
48e1416a | 704 | |
aed95130 | 705 | /* Check if this is an empty block. */ |
75a70cf9 | 706 | if (gsi_end_p (gsi)) |
aed95130 | 707 | return true; |
708 | ||
709 | /* Test that we've reached the terminating control statement. */ | |
75a70cf9 | 710 | return gsi_stmt (gsi) |
559685be | 711 | && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND |
712 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO | |
713 | || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH); | |
aed95130 | 714 | } |
715 | ||
a8046f60 | 716 | /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB |
717 | is reached via one or more specific incoming edges, we know which | |
718 | outgoing edge from BB will be traversed. | |
719 | ||
778182c1 | 720 | We want to redirect those incoming edges to the target of the |
a8046f60 | 721 | appropriate outgoing edge. Doing so avoids a conditional branch |
722 | and may expose new optimization opportunities. Note that we have | |
723 | to update dominator tree and SSA graph after such changes. | |
724 | ||
597ff315 | 725 | The key to keeping the SSA graph update manageable is to duplicate |
91275768 | 726 | the side effects occurring in BB so that those side effects still |
a8046f60 | 727 | occur on the paths which bypass BB after redirecting edges. |
728 | ||
729 | We accomplish this by creating duplicates of BB and arranging for | |
730 | the duplicates to unconditionally pass control to one specific | |
731 | successor of BB. We then revector the incoming edges into BB to | |
732 | the appropriate duplicate of BB. | |
733 | ||
7e0311ae | 734 | If NOLOOP_ONLY is true, we only perform the threading as long as it |
1b83778e | 735 | does not affect the structure of the loops in a nontrivial way. |
ed4feca1 | 736 | |
737 | If JOINERS is true, then thread through joiner blocks as well. */ | |
a8046f60 | 738 | |
388d1fc1 | 739 | static bool |
ed4feca1 | 740 | thread_block_1 (basic_block bb, bool noloop_only, bool joiners) |
a8046f60 | 741 | { |
742 | /* E is an incoming edge into BB that we may or may not want to | |
743 | redirect to a duplicate of BB. */ | |
7e0311ae | 744 | edge e, e2; |
cd665a06 | 745 | edge_iterator ei; |
2b15d2ba | 746 | ssa_local_info_t local_info; |
7e0311ae | 747 | struct loop *loop = bb->loop_father; |
388d1fc1 | 748 | |
778182c1 | 749 | /* To avoid scanning a linear array for the element we need we instead |
c5d4a10b | 750 | use a hash table. For normal code there should be no noticeable |
778182c1 | 751 | difference. However, if we have a block with a large number of |
752 | incoming and outgoing edges such linear searches can get expensive. */ | |
2b15d2ba | 753 | redirection_data.create (EDGE_COUNT (bb->succs)); |
778182c1 | 754 | |
7e0311ae | 755 | /* If we thread the latch of the loop to its exit, the loop ceases to |
756 | exist. Make sure we do not restrict ourselves in order to preserve | |
757 | this loop. */ | |
7a3bf727 | 758 | if (loop->header == bb) |
7e0311ae | 759 | { |
760 | e = loop_latch_edge (loop); | |
f2981b08 | 761 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
eb31063a | 762 | |
ed4feca1 | 763 | if (path |
764 | && (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && joiners) | |
765 | || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && !joiners))) | |
7e0311ae | 766 | { |
f2981b08 | 767 | for (unsigned int i = 1; i < path->length (); i++) |
768 | { | |
769 | edge e2 = (*path)[i]->e; | |
770 | ||
771 | if (loop_exit_edge_p (loop, e2)) | |
772 | { | |
773 | loop->header = NULL; | |
774 | loop->latch = NULL; | |
775 | loops_state_set (LOOPS_NEED_FIXUP); | |
776 | } | |
777 | } | |
7e0311ae | 778 | } |
779 | } | |
388d1fc1 | 780 | |
778182c1 | 781 | /* Record each unique threaded destination into a hash table for |
782 | efficient lookups. */ | |
cd665a06 | 783 | FOR_EACH_EDGE (e, ei, bb->preds) |
a8046f60 | 784 | { |
eb31063a | 785 | if (e->aux == NULL) |
786 | continue; | |
787 | ||
f2981b08 | 788 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
ed4feca1 | 789 | |
790 | if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners) | |
791 | || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners)) | |
792 | continue; | |
793 | ||
f2981b08 | 794 | e2 = path->last ()->e; |
e2b72d6c | 795 | if (!e2 || noloop_only) |
796 | { | |
7e0311ae | 797 | /* If NOLOOP_ONLY is true, we only allow threading through the |
559685be | 798 | header of a loop to exit edges. */ |
e2b72d6c | 799 | |
559685be | 800 | /* One case occurs when there was loop header buried in a jump |
801 | threading path that crosses loop boundaries. We do not try | |
802 | and thread this elsewhere, so just cancel the jump threading | |
803 | request by clearing the AUX field now. */ | |
bb66e2d1 | 804 | if ((bb->loop_father != e2->src->loop_father |
805 | && !loop_exit_edge_p (e2->src->loop_father, e2)) | |
806 | || (e2->src->loop_father != e2->dest->loop_father | |
807 | && !loop_exit_edge_p (e2->src->loop_father, e2))) | |
e2b72d6c | 808 | { |
809 | /* Since this case is not handled by our special code | |
810 | to thread through a loop header, we must explicitly | |
811 | cancel the threading request here. */ | |
6d1fdbf9 | 812 | delete_jump_thread_path (path); |
e2b72d6c | 813 | e->aux = NULL; |
814 | continue; | |
815 | } | |
559685be | 816 | |
817 | /* Another case occurs when trying to thread through our | |
818 | own loop header, possibly from inside the loop. | |
819 | ||
820 | If our loop header is buried in the path, then go ahead | |
821 | and cancel the jump threading request here. This likely | |
822 | will need updating for the FSA/FSM coremark case. | |
823 | ||
824 | Other cases (BB is the loop header) are handled elsewhere. */ | |
825 | unsigned int i; | |
826 | for (i = 1; i < path->length (); i++) | |
827 | { | |
828 | if ((*path)[i]->e->src == bb->loop_father->header | |
829 | && (!loop_exit_edge_p (bb->loop_father, e2) | |
830 | || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)) | |
831 | { | |
832 | /* If i != 1, then it's a buried header that will not | |
833 | be handled elsehwere. */ | |
834 | if (i != 1) | |
835 | { | |
836 | delete_jump_thread_path (path); | |
837 | e->aux = NULL; | |
838 | } | |
839 | break; | |
840 | } | |
841 | } | |
842 | ||
843 | if (i != path->length ()) | |
844 | continue; | |
e2b72d6c | 845 | } |
778182c1 | 846 | |
42b013bc | 847 | if (e->dest == e2->src) |
848 | update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), | |
559685be | 849 | e->count, (*THREAD_PATH (e))[1]->e); |
7e0311ae | 850 | |
851 | /* Insert the outgoing edge into the hash table if it is not | |
852 | already in the hash table. */ | |
da81e0c5 | 853 | lookup_redirection_data (e, INSERT); |
a8046f60 | 854 | } |
855 | ||
3f9439d7 | 856 | /* We do not update dominance info. */ |
857 | free_dominance_info (CDI_DOMINATORS); | |
858 | ||
d906930c | 859 | /* We know we only thread through the loop header to loop exits. |
860 | Let the basic block duplication hook know we are not creating | |
861 | a multiple entry loop. */ | |
862 | if (noloop_only | |
863 | && bb == bb->loop_father->header) | |
864 | set_loop_copy (bb->loop_father, loop_outer (bb->loop_father)); | |
865 | ||
778182c1 | 866 | /* Now create duplicates of BB. |
f582bb6c | 867 | |
868 | Note that for a block with a high outgoing degree we can waste | |
869 | a lot of time and memory creating and destroying useless edges. | |
870 | ||
871 | So we first duplicate BB and remove the control structure at the | |
872 | tail of the duplicate as well as all outgoing edges from the | |
873 | duplicate. We then use that duplicate block as a template for | |
874 | the rest of the duplicates. */ | |
778182c1 | 875 | local_info.template_block = NULL; |
876 | local_info.bb = bb; | |
388d1fc1 | 877 | local_info.jumps_threaded = false; |
2b15d2ba | 878 | redirection_data.traverse <ssa_local_info_t *, ssa_create_duplicates> |
879 | (&local_info); | |
f582bb6c | 880 | |
778182c1 | 881 | /* The template does not have an outgoing edge. Create that outgoing |
882 | edge and update PHI nodes as the edge's target as necessary. | |
f582bb6c | 883 | |
778182c1 | 884 | We do this after creating all the duplicates to avoid creating |
885 | unnecessary edges. */ | |
2b15d2ba | 886 | redirection_data.traverse <ssa_local_info_t *, ssa_fixup_template_block> |
887 | (&local_info); | |
f582bb6c | 888 | |
778182c1 | 889 | /* The hash table traversals above created the duplicate blocks (and the |
890 | statements within the duplicate blocks). This loop creates PHI nodes for | |
891 | the duplicated blocks and redirects the incoming edges into BB to reach | |
892 | the duplicates of BB. */ | |
2b15d2ba | 893 | redirection_data.traverse <ssa_local_info_t *, ssa_redirect_edges> |
894 | (&local_info); | |
a8046f60 | 895 | |
a3d0fd80 | 896 | /* Done with this block. Clear REDIRECTION_DATA. */ |
2b15d2ba | 897 | redirection_data.dispose (); |
388d1fc1 | 898 | |
d906930c | 899 | if (noloop_only |
900 | && bb == bb->loop_father->header) | |
901 | set_loop_copy (bb->loop_father, NULL); | |
902 | ||
388d1fc1 | 903 | /* Indicate to our caller whether or not any jumps were threaded. */ |
904 | return local_info.jumps_threaded; | |
a8046f60 | 905 | } |
906 | ||
ed4feca1 | 907 | /* Wrapper for thread_block_1 so that we can first handle jump |
908 | thread paths which do not involve copying joiner blocks, then | |
909 | handle jump thread paths which have joiner blocks. | |
910 | ||
911 | By doing things this way we can be as aggressive as possible and | |
912 | not worry that copying a joiner block will create a jump threading | |
913 | opportunity. */ | |
1b83778e | 914 | |
ed4feca1 | 915 | static bool |
916 | thread_block (basic_block bb, bool noloop_only) | |
917 | { | |
918 | bool retval; | |
919 | retval = thread_block_1 (bb, noloop_only, false); | |
920 | retval |= thread_block_1 (bb, noloop_only, true); | |
921 | return retval; | |
922 | } | |
923 | ||
924 | ||
eb31063a | 925 | /* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the |
926 | copy of E->dest created during threading, or E->dest if it was not necessary | |
7e0311ae | 927 | to copy it (E is its single predecessor). */ |
928 | ||
929 | static basic_block | |
930 | thread_single_edge (edge e) | |
931 | { | |
932 | basic_block bb = e->dest; | |
7e0311ae | 933 | struct redirection_data rd; |
f2981b08 | 934 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
935 | edge eto = (*path)[1]->e; | |
7e0311ae | 936 | |
f2981b08 | 937 | for (unsigned int i = 0; i < path->length (); i++) |
938 | delete (*path)[i]; | |
939 | delete path; | |
7e0311ae | 940 | e->aux = NULL; |
941 | ||
942 | thread_stats.num_threaded_edges++; | |
943 | ||
944 | if (single_pred_p (bb)) | |
945 | { | |
946 | /* If BB has just a single predecessor, we should only remove the | |
947 | control statements at its end, and successors except for ETO. */ | |
948 | remove_ctrl_stmt_and_useless_edges (bb, eto->dest); | |
ad330780 | 949 | |
950 | /* And fixup the flags on the single remaining edge. */ | |
951 | eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL); | |
952 | eto->flags |= EDGE_FALLTHRU; | |
953 | ||
7e0311ae | 954 | return bb; |
955 | } | |
956 | ||
957 | /* Otherwise, we need to create a copy. */ | |
42b013bc | 958 | if (e->dest == eto->src) |
959 | update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto); | |
7e0311ae | 960 | |
5fe6149c | 961 | vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> (); |
962 | jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD); | |
963 | npath->safe_push (x); | |
964 | ||
965 | x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK); | |
966 | npath->safe_push (x); | |
967 | rd.path = npath; | |
7e0311ae | 968 | |
11af02d8 | 969 | create_block_for_threading (bb, &rd, 0); |
970 | remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL); | |
971 | create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0]); | |
7e0311ae | 972 | |
973 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
974 | fprintf (dump_file, " Threaded jump %d --> %d to %d\n", | |
11af02d8 | 975 | e->src->index, e->dest->index, rd.dup_blocks[0]->index); |
7e0311ae | 976 | |
11af02d8 | 977 | rd.dup_blocks[0]->count = e->count; |
978 | rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e); | |
979 | single_succ_edge (rd.dup_blocks[0])->count = e->count; | |
980 | redirect_edge_and_branch (e, rd.dup_blocks[0]); | |
7e0311ae | 981 | flush_pending_stmts (e); |
982 | ||
11af02d8 | 983 | return rd.dup_blocks[0]; |
7e0311ae | 984 | } |
985 | ||
986 | /* Callback for dfs_enumerate_from. Returns true if BB is different | |
987 | from STOP and DBDS_CE_STOP. */ | |
988 | ||
989 | static basic_block dbds_ce_stop; | |
990 | static bool | |
7ecb5bb2 | 991 | dbds_continue_enumeration_p (const_basic_block bb, const void *stop) |
7e0311ae | 992 | { |
7ecb5bb2 | 993 | return (bb != (const_basic_block) stop |
7e0311ae | 994 | && bb != dbds_ce_stop); |
995 | } | |
996 | ||
997 | /* Evaluates the dominance relationship of latch of the LOOP and BB, and | |
998 | returns the state. */ | |
999 | ||
1000 | enum bb_dom_status | |
1001 | { | |
1002 | /* BB does not dominate latch of the LOOP. */ | |
1003 | DOMST_NONDOMINATING, | |
1004 | /* The LOOP is broken (there is no path from the header to its latch. */ | |
1005 | DOMST_LOOP_BROKEN, | |
1006 | /* BB dominates the latch of the LOOP. */ | |
1007 | DOMST_DOMINATING | |
1008 | }; | |
1009 | ||
1010 | static enum bb_dom_status | |
1011 | determine_bb_domination_status (struct loop *loop, basic_block bb) | |
1012 | { | |
1013 | basic_block *bblocks; | |
1014 | unsigned nblocks, i; | |
1015 | bool bb_reachable = false; | |
1016 | edge_iterator ei; | |
1017 | edge e; | |
1018 | ||
42b013bc | 1019 | /* This function assumes BB is a successor of LOOP->header. |
1020 | If that is not the case return DOMST_NONDOMINATING which | |
1021 | is always safe. */ | |
7e0311ae | 1022 | { |
1023 | bool ok = false; | |
1024 | ||
1025 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1026 | { | |
1027 | if (e->src == loop->header) | |
1028 | { | |
1029 | ok = true; | |
1030 | break; | |
1031 | } | |
1032 | } | |
1033 | ||
42b013bc | 1034 | if (!ok) |
1035 | return DOMST_NONDOMINATING; | |
7e0311ae | 1036 | } |
7e0311ae | 1037 | |
1038 | if (bb == loop->latch) | |
1039 | return DOMST_DOMINATING; | |
1040 | ||
1041 | /* Check that BB dominates LOOP->latch, and that it is back-reachable | |
1042 | from it. */ | |
1043 | ||
1044 | bblocks = XCNEWVEC (basic_block, loop->num_nodes); | |
1045 | dbds_ce_stop = loop->header; | |
1046 | nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p, | |
1047 | bblocks, loop->num_nodes, bb); | |
1048 | for (i = 0; i < nblocks; i++) | |
1049 | FOR_EACH_EDGE (e, ei, bblocks[i]->preds) | |
1050 | { | |
1051 | if (e->src == loop->header) | |
1052 | { | |
1053 | free (bblocks); | |
1054 | return DOMST_NONDOMINATING; | |
1055 | } | |
1056 | if (e->src == bb) | |
1057 | bb_reachable = true; | |
1058 | } | |
1059 | ||
1060 | free (bblocks); | |
1061 | return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN); | |
1062 | } | |
1063 | ||
6eb99d8a | 1064 | /* Return true if BB is part of the new pre-header that is created |
1065 | when threading the latch to DATA. */ | |
1066 | ||
1067 | static bool | |
1068 | def_split_header_continue_p (const_basic_block bb, const void *data) | |
1069 | { | |
1070 | const_basic_block new_header = (const_basic_block) data; | |
a934d302 | 1071 | const struct loop *l; |
1072 | ||
1073 | if (bb == new_header | |
1074 | || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father)) | |
1075 | return false; | |
1076 | for (l = bb->loop_father; l; l = loop_outer (l)) | |
1077 | if (l == new_header->loop_father) | |
1078 | return true; | |
1079 | return false; | |
6eb99d8a | 1080 | } |
1081 | ||
7e0311ae | 1082 | /* Thread jumps through the header of LOOP. Returns true if cfg changes. |
1083 | If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges | |
1084 | to the inside of the loop. */ | |
1085 | ||
1086 | static bool | |
1087 | thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers) | |
1088 | { | |
1089 | basic_block header = loop->header; | |
1090 | edge e, tgt_edge, latch = loop_latch_edge (loop); | |
1091 | edge_iterator ei; | |
1092 | basic_block tgt_bb, atgt_bb; | |
1093 | enum bb_dom_status domst; | |
1094 | ||
1095 | /* We have already threaded through headers to exits, so all the threading | |
1096 | requests now are to the inside of the loop. We need to avoid creating | |
1097 | irreducible regions (i.e., loops with more than one entry block), and | |
1098 | also loop with several latch edges, or new subloops of the loop (although | |
1099 | there are cases where it might be appropriate, it is difficult to decide, | |
1100 | and doing it wrongly may confuse other optimizers). | |
1101 | ||
1102 | We could handle more general cases here. However, the intention is to | |
1103 | preserve some information about the loop, which is impossible if its | |
1104 | structure changes significantly, in a way that is not well understood. | |
1105 | Thus we only handle few important special cases, in which also updating | |
1106 | of the loop-carried information should be feasible: | |
1107 | ||
1108 | 1) Propagation of latch edge to a block that dominates the latch block | |
1109 | of a loop. This aims to handle the following idiom: | |
1110 | ||
1111 | first = 1; | |
1112 | while (1) | |
1113 | { | |
1114 | if (first) | |
1115 | initialize; | |
1116 | first = 0; | |
1117 | body; | |
1118 | } | |
1119 | ||
1120 | After threading the latch edge, this becomes | |
1121 | ||
1122 | first = 1; | |
1123 | if (first) | |
1124 | initialize; | |
1125 | while (1) | |
1126 | { | |
1127 | first = 0; | |
1128 | body; | |
1129 | } | |
1130 | ||
1131 | The original header of the loop is moved out of it, and we may thread | |
1132 | the remaining edges through it without further constraints. | |
1133 | ||
1134 | 2) All entry edges are propagated to a single basic block that dominates | |
1135 | the latch block of the loop. This aims to handle the following idiom | |
1136 | (normally created for "for" loops): | |
1137 | ||
1138 | i = 0; | |
1139 | while (1) | |
1140 | { | |
1141 | if (i >= 100) | |
1142 | break; | |
1143 | body; | |
1144 | i++; | |
1145 | } | |
1146 | ||
1147 | This becomes | |
1148 | ||
1149 | i = 0; | |
1150 | while (1) | |
1151 | { | |
1152 | body; | |
1153 | i++; | |
1154 | if (i >= 100) | |
1155 | break; | |
1156 | } | |
1157 | */ | |
1158 | ||
1159 | /* Threading through the header won't improve the code if the header has just | |
1160 | one successor. */ | |
1161 | if (single_succ_p (header)) | |
1162 | goto fail; | |
1163 | ||
98685018 | 1164 | /* If we threaded the latch using a joiner block, we cancel the |
1165 | threading opportunity out of an abundance of caution. However, | |
1166 | still allow threading from outside to inside the loop. */ | |
7e0311ae | 1167 | if (latch->aux) |
1168 | { | |
f2981b08 | 1169 | vec<jump_thread_edge *> *path = THREAD_PATH (latch); |
1170 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
98685018 | 1171 | { |
1172 | delete_jump_thread_path (path); | |
1173 | latch->aux = NULL; | |
1174 | } | |
1175 | } | |
1176 | ||
1177 | if (latch->aux) | |
1178 | { | |
1179 | vec<jump_thread_edge *> *path = THREAD_PATH (latch); | |
f2981b08 | 1180 | tgt_edge = (*path)[1]->e; |
7e0311ae | 1181 | tgt_bb = tgt_edge->dest; |
1182 | } | |
1183 | else if (!may_peel_loop_headers | |
1184 | && !redirection_block_p (loop->header)) | |
1185 | goto fail; | |
1186 | else | |
1187 | { | |
1188 | tgt_bb = NULL; | |
1189 | tgt_edge = NULL; | |
1190 | FOR_EACH_EDGE (e, ei, header->preds) | |
1191 | { | |
1192 | if (!e->aux) | |
1193 | { | |
1194 | if (e == latch) | |
1195 | continue; | |
1196 | ||
1197 | /* If latch is not threaded, and there is a header | |
1198 | edge that is not threaded, we would create loop | |
1199 | with multiple entries. */ | |
1200 | goto fail; | |
1201 | } | |
1202 | ||
f2981b08 | 1203 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1204 | ||
1205 | if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
da81e0c5 | 1206 | goto fail; |
f2981b08 | 1207 | tgt_edge = (*path)[1]->e; |
7e0311ae | 1208 | atgt_bb = tgt_edge->dest; |
1209 | if (!tgt_bb) | |
1210 | tgt_bb = atgt_bb; | |
1211 | /* Two targets of threading would make us create loop | |
1212 | with multiple entries. */ | |
1213 | else if (tgt_bb != atgt_bb) | |
1214 | goto fail; | |
1215 | } | |
1216 | ||
1217 | if (!tgt_bb) | |
1218 | { | |
1219 | /* There are no threading requests. */ | |
1220 | return false; | |
1221 | } | |
1222 | ||
1223 | /* Redirecting to empty loop latch is useless. */ | |
1224 | if (tgt_bb == loop->latch | |
1225 | && empty_block_p (loop->latch)) | |
1226 | goto fail; | |
1227 | } | |
1228 | ||
1229 | /* The target block must dominate the loop latch, otherwise we would be | |
1230 | creating a subloop. */ | |
1231 | domst = determine_bb_domination_status (loop, tgt_bb); | |
1232 | if (domst == DOMST_NONDOMINATING) | |
1233 | goto fail; | |
1234 | if (domst == DOMST_LOOP_BROKEN) | |
1235 | { | |
1236 | /* If the loop ceased to exist, mark it as such, and thread through its | |
1237 | original header. */ | |
1238 | loop->header = NULL; | |
1239 | loop->latch = NULL; | |
79f958cb | 1240 | loops_state_set (LOOPS_NEED_FIXUP); |
7e0311ae | 1241 | return thread_block (header, false); |
1242 | } | |
1243 | ||
1244 | if (tgt_bb->loop_father->header == tgt_bb) | |
1245 | { | |
1246 | /* If the target of the threading is a header of a subloop, we need | |
1247 | to create a preheader for it, so that the headers of the two loops | |
1248 | do not merge. */ | |
1249 | if (EDGE_COUNT (tgt_bb->preds) > 2) | |
1250 | { | |
1251 | tgt_bb = create_preheader (tgt_bb->loop_father, 0); | |
1252 | gcc_assert (tgt_bb != NULL); | |
1253 | } | |
1254 | else | |
1255 | tgt_bb = split_edge (tgt_edge); | |
1256 | } | |
48e1416a | 1257 | |
7e0311ae | 1258 | if (latch->aux) |
1259 | { | |
6eb99d8a | 1260 | basic_block *bblocks; |
1261 | unsigned nblocks, i; | |
1262 | ||
35c67c83 | 1263 | /* First handle the case latch edge is redirected. We are copying |
559685be | 1264 | the loop header but not creating a multiple entry loop. Make the |
35c67c83 | 1265 | cfg manipulation code aware of that fact. */ |
1266 | set_loop_copy (loop, loop); | |
7e0311ae | 1267 | loop->latch = thread_single_edge (latch); |
35c67c83 | 1268 | set_loop_copy (loop, NULL); |
7e0311ae | 1269 | gcc_assert (single_succ (loop->latch) == tgt_bb); |
1270 | loop->header = tgt_bb; | |
1271 | ||
6eb99d8a | 1272 | /* Remove the new pre-header blocks from our loop. */ |
1273 | bblocks = XCNEWVEC (basic_block, loop->num_nodes); | |
1274 | nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p, | |
1275 | bblocks, loop->num_nodes, tgt_bb); | |
1276 | for (i = 0; i < nblocks; i++) | |
c99897b6 | 1277 | if (bblocks[i]->loop_father == loop) |
1278 | { | |
1279 | remove_bb_from_loops (bblocks[i]); | |
1280 | add_bb_to_loop (bblocks[i], loop_outer (loop)); | |
1281 | } | |
6eb99d8a | 1282 | free (bblocks); |
1283 | ||
bb722af4 | 1284 | /* If the new header has multiple latches mark it so. */ |
1285 | FOR_EACH_EDGE (e, ei, loop->header->preds) | |
1286 | if (e->src->loop_father == loop | |
1287 | && e->src != loop->latch) | |
1288 | { | |
1289 | loop->latch = NULL; | |
1290 | loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES); | |
1291 | } | |
1292 | ||
6eb99d8a | 1293 | /* Cancel remaining threading requests that would make the |
1294 | loop a multiple entry loop. */ | |
1295 | FOR_EACH_EDGE (e, ei, header->preds) | |
1296 | { | |
1297 | edge e2; | |
bb722af4 | 1298 | |
6eb99d8a | 1299 | if (e->aux == NULL) |
1300 | continue; | |
1301 | ||
f2981b08 | 1302 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1303 | e2 = path->last ()->e; | |
6eb99d8a | 1304 | |
1305 | if (e->src->loop_father != e2->dest->loop_father | |
1306 | && e2->dest != loop->header) | |
1307 | { | |
6d1fdbf9 | 1308 | delete_jump_thread_path (path); |
6eb99d8a | 1309 | e->aux = NULL; |
1310 | } | |
1311 | } | |
1312 | ||
7e0311ae | 1313 | /* Thread the remaining edges through the former header. */ |
1314 | thread_block (header, false); | |
1315 | } | |
1316 | else | |
1317 | { | |
1318 | basic_block new_preheader; | |
1319 | ||
1320 | /* Now consider the case entry edges are redirected to the new entry | |
1321 | block. Remember one entry edge, so that we can find the new | |
eb31063a | 1322 | preheader (its destination after threading). */ |
7e0311ae | 1323 | FOR_EACH_EDGE (e, ei, header->preds) |
1324 | { | |
1325 | if (e->aux) | |
1326 | break; | |
1327 | } | |
1328 | ||
1329 | /* The duplicate of the header is the new preheader of the loop. Ensure | |
1330 | that it is placed correctly in the loop hierarchy. */ | |
96c90e5e | 1331 | set_loop_copy (loop, loop_outer (loop)); |
7e0311ae | 1332 | |
1333 | thread_block (header, false); | |
96c90e5e | 1334 | set_loop_copy (loop, NULL); |
7e0311ae | 1335 | new_preheader = e->dest; |
1336 | ||
1337 | /* Create the new latch block. This is always necessary, as the latch | |
1338 | must have only a single successor, but the original header had at | |
1339 | least two successors. */ | |
1340 | loop->latch = NULL; | |
1341 | mfb_kj_edge = single_succ_edge (new_preheader); | |
1342 | loop->header = mfb_kj_edge->dest; | |
1343 | latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL); | |
1344 | loop->header = latch->dest; | |
1345 | loop->latch = latch->src; | |
1346 | } | |
48e1416a | 1347 | |
7e0311ae | 1348 | return true; |
1349 | ||
1350 | fail: | |
1351 | /* We failed to thread anything. Cancel the requests. */ | |
1352 | FOR_EACH_EDGE (e, ei, header->preds) | |
1353 | { | |
f2981b08 | 1354 | vec<jump_thread_edge *> *path = THREAD_PATH (e); |
1355 | ||
1356 | if (path) | |
1357 | { | |
6d1fdbf9 | 1358 | delete_jump_thread_path (path); |
f2981b08 | 1359 | e->aux = NULL; |
1360 | } | |
7e0311ae | 1361 | } |
1362 | return false; | |
1363 | } | |
1364 | ||
b99a7d6d | 1365 | /* E1 and E2 are edges into the same basic block. Return TRUE if the |
1366 | PHI arguments associated with those edges are equal or there are no | |
1367 | PHI arguments, otherwise return FALSE. */ | |
1368 | ||
1369 | static bool | |
1370 | phi_args_equal_on_edges (edge e1, edge e2) | |
1371 | { | |
1372 | gimple_stmt_iterator gsi; | |
1373 | int indx1 = e1->dest_idx; | |
1374 | int indx2 = e2->dest_idx; | |
1375 | ||
1376 | for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1377 | { | |
1378 | gimple phi = gsi_stmt (gsi); | |
1379 | ||
1380 | if (!operand_equal_p (gimple_phi_arg_def (phi, indx1), | |
1381 | gimple_phi_arg_def (phi, indx2), 0)) | |
1382 | return false; | |
1383 | } | |
1384 | return true; | |
1385 | } | |
1386 | ||
3cebc9d2 | 1387 | /* Walk through the registered jump threads and convert them into a |
334ec2d8 | 1388 | form convenient for this pass. |
3cebc9d2 | 1389 | |
1390 | Any block which has incoming edges threaded to outgoing edges | |
1391 | will have its entry in THREADED_BLOCK set. | |
a8046f60 | 1392 | |
3cebc9d2 | 1393 | Any threaded edge will have its new outgoing edge stored in the |
1394 | original edge's AUX field. | |
a8046f60 | 1395 | |
3cebc9d2 | 1396 | This form avoids the need to walk all the edges in the CFG to |
1397 | discover blocks which need processing and avoids unnecessary | |
1398 | hash table lookups to map from threaded edge to new target. */ | |
a8046f60 | 1399 | |
3cebc9d2 | 1400 | static void |
1401 | mark_threaded_blocks (bitmap threaded_blocks) | |
1402 | { | |
1403 | unsigned int i; | |
7e0311ae | 1404 | bitmap_iterator bi; |
1405 | bitmap tmp = BITMAP_ALLOC (NULL); | |
1406 | basic_block bb; | |
1407 | edge e; | |
1408 | edge_iterator ei; | |
3cebc9d2 | 1409 | |
ed4feca1 | 1410 | /* Move the jump threading requests from PATHS to each edge |
1411 | which starts a jump thread path. */ | |
f2981b08 | 1412 | for (i = 0; i < paths.length (); i++) |
3cebc9d2 | 1413 | { |
f2981b08 | 1414 | vec<jump_thread_edge *> *path = paths[i]; |
ed4feca1 | 1415 | edge e = (*path)[0]->e; |
1416 | e->aux = (void *)path; | |
1417 | bitmap_set_bit (tmp, e->dest->index); | |
1f3976e7 | 1418 | } |
1419 | ||
b99a7d6d | 1420 | |
1f3976e7 | 1421 | |
7e0311ae | 1422 | /* If optimizing for size, only thread through block if we don't have |
1423 | to duplicate it or it's an otherwise empty redirection block. */ | |
0bfd8d5c | 1424 | if (optimize_function_for_size_p (cfun)) |
7e0311ae | 1425 | { |
1426 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1427 | { | |
1428 | bb = BASIC_BLOCK (i); | |
1429 | if (EDGE_COUNT (bb->preds) > 1 | |
1430 | && !redirection_block_p (bb)) | |
1431 | { | |
1432 | FOR_EACH_EDGE (e, ei, bb->preds) | |
eb31063a | 1433 | { |
f2981b08 | 1434 | if (e->aux) |
1435 | { | |
1436 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
6d1fdbf9 | 1437 | delete_jump_thread_path (path); |
f2981b08 | 1438 | e->aux = NULL; |
1439 | } | |
eb31063a | 1440 | } |
7e0311ae | 1441 | } |
1442 | else | |
1443 | bitmap_set_bit (threaded_blocks, i); | |
1444 | } | |
3cebc9d2 | 1445 | } |
7e0311ae | 1446 | else |
1447 | bitmap_copy (threaded_blocks, tmp); | |
1448 | ||
6328e25d | 1449 | /* Look for jump threading paths which cross multiple loop headers. |
1450 | ||
1451 | The code to thread through loop headers will change the CFG in ways | |
1452 | that break assumptions made by the loop optimization code. | |
1453 | ||
1454 | We don't want to blindly cancel the requests. We can instead do better | |
1455 | by trimming off the end of the jump thread path. */ | |
1456 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1457 | { | |
1458 | basic_block bb = BASIC_BLOCK (i); | |
1459 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1460 | { | |
1461 | if (e->aux) | |
1462 | { | |
1463 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1464 | ||
1465 | /* Basically we're looking for a situation where we can see | |
1466 | 3 or more loop structures on a jump threading path. */ | |
1467 | ||
1468 | struct loop *first_father = (*path)[0]->e->src->loop_father; | |
1469 | struct loop *second_father = NULL; | |
1470 | for (unsigned int i = 0; i < path->length (); i++) | |
1471 | { | |
1472 | /* See if this is a loop father we have not seen before. */ | |
1473 | if ((*path)[i]->e->dest->loop_father != first_father | |
1474 | && (*path)[i]->e->dest->loop_father != second_father) | |
1475 | { | |
1476 | /* We've already seen two loop fathers, so we | |
1477 | need to trim this jump threading path. */ | |
1478 | if (second_father != NULL) | |
1479 | { | |
1480 | /* Trim from entry I onwards. */ | |
1481 | for (unsigned int j = i; j < path->length (); j++) | |
1482 | delete (*path)[j]; | |
1483 | path->truncate (i); | |
1484 | ||
1485 | /* Now that we've truncated the path, make sure | |
1486 | what's left is still valid. We need at least | |
1487 | two edges on the path and the last edge can not | |
1488 | be a joiner. This should never happen, but let's | |
1489 | be safe. */ | |
1490 | if (path->length () < 2 | |
1491 | || (path->last ()->type | |
1492 | == EDGE_COPY_SRC_JOINER_BLOCK)) | |
1493 | { | |
6d1fdbf9 | 1494 | delete_jump_thread_path (path); |
6328e25d | 1495 | e->aux = NULL; |
1496 | } | |
1497 | break; | |
1498 | } | |
1499 | else | |
1500 | { | |
1501 | second_father = (*path)[i]->e->dest->loop_father; | |
1502 | } | |
1503 | } | |
1504 | } | |
1505 | } | |
1506 | } | |
1507 | } | |
1508 | ||
af6b6631 | 1509 | /* If we have a joiner block (J) which has two successors S1 and S2 and |
1510 | we are threading though S1 and the final destination of the thread | |
1511 | is S2, then we must verify that any PHI nodes in S2 have the same | |
1512 | PHI arguments for the edge J->S2 and J->S1->...->S2. | |
1513 | ||
1514 | We used to detect this prior to registering the jump thread, but | |
1515 | that prohibits propagation of edge equivalences into non-dominated | |
1516 | PHI nodes as the equivalency test might occur before propagation. | |
1517 | ||
1518 | This must also occur after we truncate any jump threading paths | |
1519 | as this scenario may only show up after truncation. | |
1520 | ||
1521 | This works for now, but will need improvement as part of the FSA | |
1522 | optimization. | |
1523 | ||
1524 | Note since we've moved the thread request data to the edges, | |
1525 | we have to iterate on those rather than the threaded_edges vector. */ | |
1526 | EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) | |
1527 | { | |
1528 | bb = BASIC_BLOCK (i); | |
1529 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1530 | { | |
1531 | if (e->aux) | |
1532 | { | |
1533 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1534 | bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK); | |
1535 | ||
1536 | if (have_joiner) | |
1537 | { | |
1538 | basic_block joiner = e->dest; | |
1539 | edge final_edge = path->last ()->e; | |
1540 | basic_block final_dest = final_edge->dest; | |
1541 | edge e2 = find_edge (joiner, final_dest); | |
1542 | ||
1543 | if (e2 && !phi_args_equal_on_edges (e2, final_edge)) | |
1544 | { | |
1545 | delete_jump_thread_path (path); | |
1546 | e->aux = NULL; | |
1547 | } | |
1548 | } | |
1549 | } | |
1550 | } | |
1551 | } | |
1552 | ||
9af5ce0c | 1553 | BITMAP_FREE (tmp); |
3cebc9d2 | 1554 | } |
1555 | ||
1556 | ||
1557 | /* Walk through all blocks and thread incoming edges to the appropriate | |
1558 | outgoing edge for each edge pair recorded in THREADED_EDGES. | |
a8046f60 | 1559 | |
1560 | It is the caller's responsibility to fix the dominance information | |
1561 | and rewrite duplicated SSA_NAMEs back into SSA form. | |
1562 | ||
7e0311ae | 1563 | If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through |
1564 | loop headers if it does not simplify the loop. | |
1565 | ||
dac49aa5 | 1566 | Returns true if one or more edges were threaded, false otherwise. */ |
a8046f60 | 1567 | |
1568 | bool | |
7e0311ae | 1569 | thread_through_all_blocks (bool may_peel_loop_headers) |
a8046f60 | 1570 | { |
a8046f60 | 1571 | bool retval = false; |
7ea47fbd | 1572 | unsigned int i; |
1573 | bitmap_iterator bi; | |
3cebc9d2 | 1574 | bitmap threaded_blocks; |
7e0311ae | 1575 | struct loop *loop; |
3cebc9d2 | 1576 | |
7a3bf727 | 1577 | /* We must know about loops in order to preserve them. */ |
1578 | gcc_assert (current_loops != NULL); | |
1579 | ||
f2981b08 | 1580 | if (!paths.exists ()) |
3cebc9d2 | 1581 | return false; |
a8046f60 | 1582 | |
3cebc9d2 | 1583 | threaded_blocks = BITMAP_ALLOC (NULL); |
5236b8bb | 1584 | memset (&thread_stats, 0, sizeof (thread_stats)); |
388d1fc1 | 1585 | |
3cebc9d2 | 1586 | mark_threaded_blocks (threaded_blocks); |
1587 | ||
96c90e5e | 1588 | initialize_original_copy_tables (); |
7e0311ae | 1589 | |
1590 | /* First perform the threading requests that do not affect | |
1591 | loop structure. */ | |
7ea47fbd | 1592 | EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi) |
a8046f60 | 1593 | { |
7ea47fbd | 1594 | basic_block bb = BASIC_BLOCK (i); |
1595 | ||
1596 | if (EDGE_COUNT (bb->preds) > 0) | |
7e0311ae | 1597 | retval |= thread_block (bb, true); |
1598 | } | |
1599 | ||
1600 | /* Then perform the threading through loop headers. We start with the | |
1601 | innermost loop, so that the changes in cfg we perform won't affect | |
1602 | further threading. */ | |
f21d4d00 | 1603 | FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) |
7e0311ae | 1604 | { |
7a3bf727 | 1605 | if (!loop->header |
1606 | || !bitmap_bit_p (threaded_blocks, loop->header->index)) | |
1607 | continue; | |
7e0311ae | 1608 | |
7a3bf727 | 1609 | retval |= thread_through_loop_header (loop, may_peel_loop_headers); |
a8046f60 | 1610 | } |
388d1fc1 | 1611 | |
ed4feca1 | 1612 | /* Assume we had a jump thread path which went from the latch to the exit |
1b83778e | 1613 | and a path which goes from outside to inside the same loop. |
ed4feca1 | 1614 | |
1615 | If the latch to exit was handled first, we will thread it and clear | |
1616 | loop->header. | |
1617 | ||
1618 | The second path will be ignored by thread_block because we're going | |
1619 | through a loop header. It will also be ignored by the loop above | |
1620 | because loop->header is NULL. | |
1621 | ||
1622 | This results in the second path never being threaded. The failure | |
1623 | mode is a dangling AUX field. | |
1624 | ||
1625 | This is inherently a bit of a pain to fix, so we just walk all the | |
1626 | blocks and all the incoming edges to those blocks and clear their | |
1627 | AUX fields. */ | |
1628 | basic_block bb; | |
1629 | edge_iterator ei; | |
1630 | edge e; | |
1631 | FOR_EACH_BB (bb) | |
1632 | { | |
1633 | FOR_EACH_EDGE (e, ei, bb->preds) | |
1634 | if (e->aux) | |
1635 | { | |
1636 | vec<jump_thread_edge *> *path = THREAD_PATH (e); | |
1637 | ||
6d1fdbf9 | 1638 | delete_jump_thread_path (path); |
ed4feca1 | 1639 | e->aux = NULL; |
1640 | } | |
1641 | } | |
1642 | ||
581f8050 | 1643 | statistics_counter_event (cfun, "Jumps threaded", |
1644 | thread_stats.num_threaded_edges); | |
5236b8bb | 1645 | |
96c90e5e | 1646 | free_original_copy_tables (); |
1647 | ||
3cebc9d2 | 1648 | BITMAP_FREE (threaded_blocks); |
1649 | threaded_blocks = NULL; | |
f2981b08 | 1650 | paths.release (); |
7e0311ae | 1651 | |
eb2a640e | 1652 | if (retval) |
f24ec26f | 1653 | loops_state_set (LOOPS_NEED_FIXUP); |
eb2a640e | 1654 | |
a8046f60 | 1655 | return retval; |
1656 | } | |
3cebc9d2 | 1657 | |
6d1fdbf9 | 1658 | /* Delete the jump threading path PATH. We have to explcitly delete |
1659 | each entry in the vector, then the container. */ | |
1660 | ||
1661 | void | |
1662 | delete_jump_thread_path (vec<jump_thread_edge *> *path) | |
1663 | { | |
1664 | for (unsigned int i = 0; i < path->length (); i++) | |
1665 | delete (*path)[i]; | |
1666 | path->release(); | |
1667 | } | |
1668 | ||
0c5b289a | 1669 | /* Dump a jump threading path, including annotations about each |
1670 | edge in the path. */ | |
1671 | ||
1672 | static void | |
1673 | dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path) | |
1674 | { | |
1675 | fprintf (dump_file, | |
1676 | " Registering jump thread: (%d, %d) incoming edge; ", | |
1677 | path[0]->e->src->index, path[0]->e->dest->index); | |
1678 | ||
1679 | for (unsigned int i = 1; i < path.length (); i++) | |
1680 | { | |
1681 | /* We can get paths with a NULL edge when the final destination | |
1682 | of a jump thread turns out to be a constant address. We dump | |
1683 | those paths when debugging, so we have to be prepared for that | |
1684 | possibility here. */ | |
1685 | if (path[i]->e == NULL) | |
1686 | continue; | |
1687 | ||
1688 | if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK) | |
1689 | fprintf (dump_file, " (%d, %d) joiner; ", | |
1690 | path[i]->e->src->index, path[i]->e->dest->index); | |
1691 | if (path[i]->type == EDGE_COPY_SRC_BLOCK) | |
1692 | fprintf (dump_file, " (%d, %d) normal;", | |
1693 | path[i]->e->src->index, path[i]->e->dest->index); | |
1694 | if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK) | |
1695 | fprintf (dump_file, " (%d, %d) nocopy;", | |
1696 | path[i]->e->src->index, path[i]->e->dest->index); | |
1697 | } | |
1698 | fputc ('\n', dump_file); | |
1699 | } | |
1700 | ||
3cebc9d2 | 1701 | /* Register a jump threading opportunity. We queue up all the jump |
1702 | threading opportunities discovered by a pass and update the CFG | |
1703 | and SSA form all at once. | |
1704 | ||
f0b5f617 | 1705 | E is the edge we can thread, E2 is the new target edge, i.e., we |
3cebc9d2 | 1706 | are effectively recording that E->dest can be changed to E2->dest |
1707 | after fixing the SSA graph. */ | |
1708 | ||
1709 | void | |
f2981b08 | 1710 | register_jump_thread (vec<jump_thread_edge *> *path) |
3cebc9d2 | 1711 | { |
a3724f9d | 1712 | if (!dbg_cnt (registered_jump_thread)) |
1713 | { | |
6d1fdbf9 | 1714 | delete_jump_thread_path (path); |
a3724f9d | 1715 | return; |
1716 | } | |
1717 | ||
0c5b289a | 1718 | /* First make sure there are no NULL outgoing edges on the jump threading |
1719 | path. That can happen for jumping to a constant address. */ | |
f2981b08 | 1720 | for (unsigned int i = 0; i < path->length (); i++) |
1721 | if ((*path)[i]->e == NULL) | |
0c5b289a | 1722 | { |
1723 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1724 | { | |
1725 | fprintf (dump_file, | |
1726 | "Found NULL edge in jump threading path. Cancelling jump thread:\n"); | |
f2981b08 | 1727 | dump_jump_thread_path (dump_file, *path); |
0c5b289a | 1728 | } |
f2981b08 | 1729 | |
6d1fdbf9 | 1730 | delete_jump_thread_path (path); |
0c5b289a | 1731 | return; |
1732 | } | |
5411af4e | 1733 | |
631d940c | 1734 | if (dump_file && (dump_flags & TDF_DETAILS)) |
f2981b08 | 1735 | dump_jump_thread_path (dump_file, *path); |
631d940c | 1736 | |
f2981b08 | 1737 | if (!paths.exists ()) |
1738 | paths.create (5); | |
631d940c | 1739 | |
f2981b08 | 1740 | paths.safe_push (path); |
3cebc9d2 | 1741 | } |