]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lcm.c
alias.c [...]: Remove unnecessary casts.
[thirdparty/gcc.git] / gcc / lcm.c
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 /* These routines are meant to be used by various optimization
23 passes which can be modeled as lazy code motion problems.
24 Including, but not limited to:
25
26 * Traditional partial redundancy elimination.
27
28 * Placement of caller/caller register save/restores.
29
30 * Load/store motion.
31
32 * Copy motion.
33
34 * Conversion of flat register files to a stacked register
35 model.
36
37 * Dead load/store elimination.
38
39 These routines accept as input:
40
41 * Basic block information (number of blocks, lists of
42 predecessors and successors). Note the granularity
43 does not need to be basic block, they could be statements
44 or functions.
45
46 * Bitmaps of local properties (computed, transparent and
47 anticipatable expressions).
48
49 The output of these routines is bitmap of redundant computations
50 and a bitmap of optimal placement points. */
51
52
53 #include "config.h"
54 #include "system.h"
55 #include "coretypes.h"
56 #include "tm.h"
57 #include "rtl.h"
58 #include "regs.h"
59 #include "hard-reg-set.h"
60 #include "flags.h"
61 #include "real.h"
62 #include "insn-config.h"
63 #include "recog.h"
64 #include "basic-block.h"
65 #include "output.h"
66 #include "tm_p.h"
67 #include "function.h"
68
69 /* We want target macros for the mode switching code to be able to refer
70 to instruction attribute values. */
71 #include "insn-attr.h"
72
73 /* Edge based LCM routines. */
74 static void compute_antinout_edge (sbitmap *, sbitmap *, sbitmap *, sbitmap *);
75 static void compute_earliest (struct edge_list *, int, sbitmap *, sbitmap *,
76 sbitmap *, sbitmap *, sbitmap *);
77 static void compute_laterin (struct edge_list *, sbitmap *, sbitmap *,
78 sbitmap *, sbitmap *);
79 static void compute_insert_delete (struct edge_list *edge_list, sbitmap *,
80 sbitmap *, sbitmap *, sbitmap *, sbitmap *);
81
82 /* Edge based LCM routines on a reverse flowgraph. */
83 static void compute_farthest (struct edge_list *, int, sbitmap *, sbitmap *,
84 sbitmap*, sbitmap *, sbitmap *);
85 static void compute_nearerout (struct edge_list *, sbitmap *, sbitmap *,
86 sbitmap *, sbitmap *);
87 static void compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *,
88 sbitmap *, sbitmap *, sbitmap *,
89 sbitmap *);
90 \f
91 /* Edge based lcm routines. */
92
93 /* Compute expression anticipatability at entrance and exit of each block.
94 This is done based on the flow graph, and not on the pred-succ lists.
95 Other than that, its pretty much identical to compute_antinout. */
96
97 static void
98 compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin,
99 sbitmap *antout)
100 {
101 basic_block bb;
102 edge e;
103 basic_block *worklist, *qin, *qout, *qend;
104 unsigned int qlen;
105
106 /* Allocate a worklist array/queue. Entries are only added to the
107 list if they were not already on the list. So the size is
108 bounded by the number of basic blocks. */
109 qin = qout = worklist = xmalloc (sizeof (basic_block) * n_basic_blocks);
110
111 /* We want a maximal solution, so make an optimistic initialization of
112 ANTIN. */
113 sbitmap_vector_ones (antin, last_basic_block);
114
115 /* Put every block on the worklist; this is necessary because of the
116 optimistic initialization of ANTIN above. */
117 FOR_EACH_BB_REVERSE (bb)
118 {
119 *qin++ = bb;
120 bb->aux = bb;
121 }
122
123 qin = worklist;
124 qend = &worklist[n_basic_blocks];
125 qlen = n_basic_blocks;
126
127 /* Mark blocks which are predecessors of the exit block so that we
128 can easily identify them below. */
129 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
130 e->src->aux = EXIT_BLOCK_PTR;
131
132 /* Iterate until the worklist is empty. */
133 while (qlen)
134 {
135 /* Take the first entry off the worklist. */
136 bb = *qout++;
137 qlen--;
138
139 if (qout >= qend)
140 qout = worklist;
141
142 if (bb->aux == EXIT_BLOCK_PTR)
143 /* Do not clear the aux field for blocks which are predecessors of
144 the EXIT block. That way we never add then to the worklist
145 again. */
146 sbitmap_zero (antout[bb->index]);
147 else
148 {
149 /* Clear the aux field of this block so that it can be added to
150 the worklist again if necessary. */
151 bb->aux = NULL;
152 sbitmap_intersection_of_succs (antout[bb->index], antin, bb->index);
153 }
154
155 if (sbitmap_a_or_b_and_c_cg (antin[bb->index], antloc[bb->index],
156 transp[bb->index], antout[bb->index]))
157 /* If the in state of this block changed, then we need
158 to add the predecessors of this block to the worklist
159 if they are not already on the worklist. */
160 for (e = bb->pred; e; e = e->pred_next)
161 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
162 {
163 *qin++ = e->src;
164 e->src->aux = e;
165 qlen++;
166 if (qin >= qend)
167 qin = worklist;
168 }
169 }
170
171 clear_aux_for_edges ();
172 clear_aux_for_blocks ();
173 free (worklist);
174 }
175
176 /* Compute the earliest vector for edge based lcm. */
177
178 static void
179 compute_earliest (struct edge_list *edge_list, int n_exprs, sbitmap *antin,
180 sbitmap *antout, sbitmap *avout, sbitmap *kill,
181 sbitmap *earliest)
182 {
183 sbitmap difference, temp_bitmap;
184 int x, num_edges;
185 basic_block pred, succ;
186
187 num_edges = NUM_EDGES (edge_list);
188
189 difference = sbitmap_alloc (n_exprs);
190 temp_bitmap = sbitmap_alloc (n_exprs);
191
192 for (x = 0; x < num_edges; x++)
193 {
194 pred = INDEX_EDGE_PRED_BB (edge_list, x);
195 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
196 if (pred == ENTRY_BLOCK_PTR)
197 sbitmap_copy (earliest[x], antin[succ->index]);
198 else
199 {
200 if (succ == EXIT_BLOCK_PTR)
201 sbitmap_zero (earliest[x]);
202 else
203 {
204 sbitmap_difference (difference, antin[succ->index],
205 avout[pred->index]);
206 sbitmap_not (temp_bitmap, antout[pred->index]);
207 sbitmap_a_and_b_or_c (earliest[x], difference,
208 kill[pred->index], temp_bitmap);
209 }
210 }
211 }
212
213 sbitmap_free (temp_bitmap);
214 sbitmap_free (difference);
215 }
216
217 /* later(p,s) is dependent on the calculation of laterin(p).
218 laterin(p) is dependent on the calculation of later(p2,p).
219
220 laterin(ENTRY) is defined as all 0's
221 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
222 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
223
224 If we progress in this manner, starting with all basic blocks
225 in the work list, anytime we change later(bb), we need to add
226 succs(bb) to the worklist if they are not already on the worklist.
227
228 Boundary conditions:
229
230 We prime the worklist all the normal basic blocks. The ENTRY block can
231 never be added to the worklist since it is never the successor of any
232 block. We explicitly prevent the EXIT block from being added to the
233 worklist.
234
235 We optimistically initialize LATER. That is the only time this routine
236 will compute LATER for an edge out of the entry block since the entry
237 block is never on the worklist. Thus, LATERIN is neither used nor
238 computed for the ENTRY block.
239
240 Since the EXIT block is never added to the worklist, we will neither
241 use nor compute LATERIN for the exit block. Edges which reach the
242 EXIT block are handled in the normal fashion inside the loop. However,
243 the insertion/deletion computation needs LATERIN(EXIT), so we have
244 to compute it. */
245
246 static void
247 compute_laterin (struct edge_list *edge_list, sbitmap *earliest,
248 sbitmap *antloc, sbitmap *later, sbitmap *laterin)
249 {
250 int num_edges, i;
251 edge e;
252 basic_block *worklist, *qin, *qout, *qend, bb;
253 unsigned int qlen;
254
255 num_edges = NUM_EDGES (edge_list);
256
257 /* Allocate a worklist array/queue. Entries are only added to the
258 list if they were not already on the list. So the size is
259 bounded by the number of basic blocks. */
260 qin = qout = worklist
261 = xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
262
263 /* Initialize a mapping from each edge to its index. */
264 for (i = 0; i < num_edges; i++)
265 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
266
267 /* We want a maximal solution, so initially consider LATER true for
268 all edges. This allows propagation through a loop since the incoming
269 loop edge will have LATER set, so if all the other incoming edges
270 to the loop are set, then LATERIN will be set for the head of the
271 loop.
272
273 If the optimistic setting of LATER on that edge was incorrect (for
274 example the expression is ANTLOC in a block within the loop) then
275 this algorithm will detect it when we process the block at the head
276 of the optimistic edge. That will requeue the affected blocks. */
277 sbitmap_vector_ones (later, num_edges);
278
279 /* Note that even though we want an optimistic setting of LATER, we
280 do not want to be overly optimistic. Consider an outgoing edge from
281 the entry block. That edge should always have a LATER value the
282 same as EARLIEST for that edge. */
283 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
284 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
285
286 /* Add all the blocks to the worklist. This prevents an early exit from
287 the loop given our optimistic initialization of LATER above. */
288 FOR_EACH_BB (bb)
289 {
290 *qin++ = bb;
291 bb->aux = bb;
292 }
293 qin = worklist;
294 /* Note that we do not use the last allocated element for our queue,
295 as EXIT_BLOCK is never inserted into it. In fact the above allocation
296 of n_basic_blocks + 1 elements is not necessary. */
297 qend = &worklist[n_basic_blocks];
298 qlen = n_basic_blocks;
299
300 /* Iterate until the worklist is empty. */
301 while (qlen)
302 {
303 /* Take the first entry off the worklist. */
304 bb = *qout++;
305 bb->aux = NULL;
306 qlen--;
307 if (qout >= qend)
308 qout = worklist;
309
310 /* Compute the intersection of LATERIN for each incoming edge to B. */
311 sbitmap_ones (laterin[bb->index]);
312 for (e = bb->pred; e != NULL; e = e->pred_next)
313 sbitmap_a_and_b (laterin[bb->index], laterin[bb->index], later[(size_t)e->aux]);
314
315 /* Calculate LATER for all outgoing edges. */
316 for (e = bb->succ; e != NULL; e = e->succ_next)
317 if (sbitmap_union_of_diff_cg (later[(size_t) e->aux],
318 earliest[(size_t) e->aux],
319 laterin[e->src->index],
320 antloc[e->src->index])
321 /* If LATER for an outgoing edge was changed, then we need
322 to add the target of the outgoing edge to the worklist. */
323 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
324 {
325 *qin++ = e->dest;
326 e->dest->aux = e;
327 qlen++;
328 if (qin >= qend)
329 qin = worklist;
330 }
331 }
332
333 /* Computation of insertion and deletion points requires computing LATERIN
334 for the EXIT block. We allocated an extra entry in the LATERIN array
335 for just this purpose. */
336 sbitmap_ones (laterin[last_basic_block]);
337 for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next)
338 sbitmap_a_and_b (laterin[last_basic_block],
339 laterin[last_basic_block],
340 later[(size_t) e->aux]);
341
342 clear_aux_for_edges ();
343 free (worklist);
344 }
345
346 /* Compute the insertion and deletion points for edge based LCM. */
347
348 static void
349 compute_insert_delete (struct edge_list *edge_list, sbitmap *antloc,
350 sbitmap *later, sbitmap *laterin, sbitmap *insert,
351 sbitmap *delete)
352 {
353 int x;
354 basic_block bb;
355
356 FOR_EACH_BB (bb)
357 sbitmap_difference (delete[bb->index], antloc[bb->index], laterin[bb->index]);
358
359 for (x = 0; x < NUM_EDGES (edge_list); x++)
360 {
361 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
362
363 if (b == EXIT_BLOCK_PTR)
364 sbitmap_difference (insert[x], later[x], laterin[last_basic_block]);
365 else
366 sbitmap_difference (insert[x], later[x], laterin[b->index]);
367 }
368 }
369
370 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
371 delete vectors for edge based LCM. Returns an edgelist which is used to
372 map the insert vector to what edge an expression should be inserted on. */
373
374 struct edge_list *
375 pre_edge_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
376 sbitmap *avloc, sbitmap *antloc, sbitmap *kill,
377 sbitmap **insert, sbitmap **delete)
378 {
379 sbitmap *antin, *antout, *earliest;
380 sbitmap *avin, *avout;
381 sbitmap *later, *laterin;
382 struct edge_list *edge_list;
383 int num_edges;
384
385 edge_list = create_edge_list ();
386 num_edges = NUM_EDGES (edge_list);
387
388 #ifdef LCM_DEBUG_INFO
389 if (file)
390 {
391 fprintf (file, "Edge List:\n");
392 verify_edge_list (file, edge_list);
393 print_edge_list (file, edge_list);
394 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
395 dump_sbitmap_vector (file, "antloc", "", antloc, last_basic_block);
396 dump_sbitmap_vector (file, "avloc", "", avloc, last_basic_block);
397 dump_sbitmap_vector (file, "kill", "", kill, last_basic_block);
398 }
399 #endif
400
401 /* Compute global availability. */
402 avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
403 avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
404 compute_available (avloc, kill, avout, avin);
405 sbitmap_vector_free (avin);
406
407 /* Compute global anticipatability. */
408 antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
409 antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
410 compute_antinout_edge (antloc, transp, antin, antout);
411
412 #ifdef LCM_DEBUG_INFO
413 if (file)
414 {
415 dump_sbitmap_vector (file, "antin", "", antin, last_basic_block);
416 dump_sbitmap_vector (file, "antout", "", antout, last_basic_block);
417 }
418 #endif
419
420 /* Compute earliestness. */
421 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
422 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
423
424 #ifdef LCM_DEBUG_INFO
425 if (file)
426 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
427 #endif
428
429 sbitmap_vector_free (antout);
430 sbitmap_vector_free (antin);
431 sbitmap_vector_free (avout);
432
433 later = sbitmap_vector_alloc (num_edges, n_exprs);
434
435 /* Allocate an extra element for the exit block in the laterin vector. */
436 laterin = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
437 compute_laterin (edge_list, earliest, antloc, later, laterin);
438
439 #ifdef LCM_DEBUG_INFO
440 if (file)
441 {
442 dump_sbitmap_vector (file, "laterin", "", laterin, last_basic_block + 1);
443 dump_sbitmap_vector (file, "later", "", later, num_edges);
444 }
445 #endif
446
447 sbitmap_vector_free (earliest);
448
449 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
450 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
451 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
452
453 sbitmap_vector_free (laterin);
454 sbitmap_vector_free (later);
455
456 #ifdef LCM_DEBUG_INFO
457 if (file)
458 {
459 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
460 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
461 last_basic_block);
462 }
463 #endif
464
465 return edge_list;
466 }
467
468 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
469 Return the number of passes we performed to iterate to a solution. */
470
471 void
472 compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout,
473 sbitmap *avin)
474 {
475 edge e;
476 basic_block *worklist, *qin, *qout, *qend, bb;
477 unsigned int qlen;
478
479 /* Allocate a worklist array/queue. Entries are only added to the
480 list if they were not already on the list. So the size is
481 bounded by the number of basic blocks. */
482 qin = qout = worklist = xmalloc (sizeof (basic_block) * n_basic_blocks);
483
484 /* We want a maximal solution. */
485 sbitmap_vector_ones (avout, last_basic_block);
486
487 /* Put every block on the worklist; this is necessary because of the
488 optimistic initialization of AVOUT above. */
489 FOR_EACH_BB (bb)
490 {
491 *qin++ = bb;
492 bb->aux = bb;
493 }
494
495 qin = worklist;
496 qend = &worklist[n_basic_blocks];
497 qlen = n_basic_blocks;
498
499 /* Mark blocks which are successors of the entry block so that we
500 can easily identify them below. */
501 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
502 e->dest->aux = ENTRY_BLOCK_PTR;
503
504 /* Iterate until the worklist is empty. */
505 while (qlen)
506 {
507 /* Take the first entry off the worklist. */
508 bb = *qout++;
509 qlen--;
510
511 if (qout >= qend)
512 qout = worklist;
513
514 /* If one of the predecessor blocks is the ENTRY block, then the
515 intersection of avouts is the null set. We can identify such blocks
516 by the special value in the AUX field in the block structure. */
517 if (bb->aux == ENTRY_BLOCK_PTR)
518 /* Do not clear the aux field for blocks which are successors of the
519 ENTRY block. That way we never add then to the worklist again. */
520 sbitmap_zero (avin[bb->index]);
521 else
522 {
523 /* Clear the aux field of this block so that it can be added to
524 the worklist again if necessary. */
525 bb->aux = NULL;
526 sbitmap_intersection_of_preds (avin[bb->index], avout, bb->index);
527 }
528
529 if (sbitmap_union_of_diff_cg (avout[bb->index], avloc[bb->index], avin[bb->index], kill[bb->index]))
530 /* If the out state of this block changed, then we need
531 to add the successors of this block to the worklist
532 if they are not already on the worklist. */
533 for (e = bb->succ; e; e = e->succ_next)
534 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
535 {
536 *qin++ = e->dest;
537 e->dest->aux = e;
538 qlen++;
539
540 if (qin >= qend)
541 qin = worklist;
542 }
543 }
544
545 clear_aux_for_edges ();
546 clear_aux_for_blocks ();
547 free (worklist);
548 }
549
550 /* Compute the farthest vector for edge based lcm. */
551
552 static void
553 compute_farthest (struct edge_list *edge_list, int n_exprs,
554 sbitmap *st_avout, sbitmap *st_avin, sbitmap *st_antin,
555 sbitmap *kill, sbitmap *farthest)
556 {
557 sbitmap difference, temp_bitmap;
558 int x, num_edges;
559 basic_block pred, succ;
560
561 num_edges = NUM_EDGES (edge_list);
562
563 difference = sbitmap_alloc (n_exprs);
564 temp_bitmap = sbitmap_alloc (n_exprs);
565
566 for (x = 0; x < num_edges; x++)
567 {
568 pred = INDEX_EDGE_PRED_BB (edge_list, x);
569 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
570 if (succ == EXIT_BLOCK_PTR)
571 sbitmap_copy (farthest[x], st_avout[pred->index]);
572 else
573 {
574 if (pred == ENTRY_BLOCK_PTR)
575 sbitmap_zero (farthest[x]);
576 else
577 {
578 sbitmap_difference (difference, st_avout[pred->index],
579 st_antin[succ->index]);
580 sbitmap_not (temp_bitmap, st_avin[succ->index]);
581 sbitmap_a_and_b_or_c (farthest[x], difference,
582 kill[succ->index], temp_bitmap);
583 }
584 }
585 }
586
587 sbitmap_free (temp_bitmap);
588 sbitmap_free (difference);
589 }
590
591 /* Compute nearer and nearerout vectors for edge based lcm.
592
593 This is the mirror of compute_laterin, additional comments on the
594 implementation can be found before compute_laterin. */
595
596 static void
597 compute_nearerout (struct edge_list *edge_list, sbitmap *farthest,
598 sbitmap *st_avloc, sbitmap *nearer, sbitmap *nearerout)
599 {
600 int num_edges, i;
601 edge e;
602 basic_block *worklist, *tos, bb;
603
604 num_edges = NUM_EDGES (edge_list);
605
606 /* Allocate a worklist array/queue. Entries are only added to the
607 list if they were not already on the list. So the size is
608 bounded by the number of basic blocks. */
609 tos = worklist = xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
610
611 /* Initialize NEARER for each edge and build a mapping from an edge to
612 its index. */
613 for (i = 0; i < num_edges; i++)
614 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
615
616 /* We want a maximal solution. */
617 sbitmap_vector_ones (nearer, num_edges);
618
619 /* Note that even though we want an optimistic setting of NEARER, we
620 do not want to be overly optimistic. Consider an incoming edge to
621 the exit block. That edge should always have a NEARER value the
622 same as FARTHEST for that edge. */
623 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
624 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
625
626 /* Add all the blocks to the worklist. This prevents an early exit
627 from the loop given our optimistic initialization of NEARER. */
628 FOR_EACH_BB (bb)
629 {
630 *tos++ = bb;
631 bb->aux = bb;
632 }
633
634 /* Iterate until the worklist is empty. */
635 while (tos != worklist)
636 {
637 /* Take the first entry off the worklist. */
638 bb = *--tos;
639 bb->aux = NULL;
640
641 /* Compute the intersection of NEARER for each outgoing edge from B. */
642 sbitmap_ones (nearerout[bb->index]);
643 for (e = bb->succ; e != NULL; e = e->succ_next)
644 sbitmap_a_and_b (nearerout[bb->index], nearerout[bb->index],
645 nearer[(size_t) e->aux]);
646
647 /* Calculate NEARER for all incoming edges. */
648 for (e = bb->pred; e != NULL; e = e->pred_next)
649 if (sbitmap_union_of_diff_cg (nearer[(size_t) e->aux],
650 farthest[(size_t) e->aux],
651 nearerout[e->dest->index],
652 st_avloc[e->dest->index])
653 /* If NEARER for an incoming edge was changed, then we need
654 to add the source of the incoming edge to the worklist. */
655 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
656 {
657 *tos++ = e->src;
658 e->src->aux = e;
659 }
660 }
661
662 /* Computation of insertion and deletion points requires computing NEAREROUT
663 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
664 for just this purpose. */
665 sbitmap_ones (nearerout[last_basic_block]);
666 for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next)
667 sbitmap_a_and_b (nearerout[last_basic_block],
668 nearerout[last_basic_block],
669 nearer[(size_t) e->aux]);
670
671 clear_aux_for_edges ();
672 free (tos);
673 }
674
675 /* Compute the insertion and deletion points for edge based LCM. */
676
677 static void
678 compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *st_avloc,
679 sbitmap *nearer, sbitmap *nearerout,
680 sbitmap *insert, sbitmap *delete)
681 {
682 int x;
683 basic_block bb;
684
685 FOR_EACH_BB (bb)
686 sbitmap_difference (delete[bb->index], st_avloc[bb->index], nearerout[bb->index]);
687
688 for (x = 0; x < NUM_EDGES (edge_list); x++)
689 {
690 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
691 if (b == ENTRY_BLOCK_PTR)
692 sbitmap_difference (insert[x], nearer[x], nearerout[last_basic_block]);
693 else
694 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
695 }
696 }
697
698 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
699 insert and delete vectors for edge based reverse LCM. Returns an
700 edgelist which is used to map the insert vector to what edge
701 an expression should be inserted on. */
702
703 struct edge_list *
704 pre_edge_rev_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
705 sbitmap *st_avloc, sbitmap *st_antloc, sbitmap *kill,
706 sbitmap **insert, sbitmap **delete)
707 {
708 sbitmap *st_antin, *st_antout;
709 sbitmap *st_avout, *st_avin, *farthest;
710 sbitmap *nearer, *nearerout;
711 struct edge_list *edge_list;
712 int num_edges;
713
714 edge_list = create_edge_list ();
715 num_edges = NUM_EDGES (edge_list);
716
717 st_antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
718 st_antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
719 sbitmap_vector_zero (st_antin, last_basic_block);
720 sbitmap_vector_zero (st_antout, last_basic_block);
721 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
722
723 /* Compute global anticipatability. */
724 st_avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
725 st_avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
726 compute_available (st_avloc, kill, st_avout, st_avin);
727
728 #ifdef LCM_DEBUG_INFO
729 if (file)
730 {
731 fprintf (file, "Edge List:\n");
732 verify_edge_list (file, edge_list);
733 print_edge_list (file, edge_list);
734 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
735 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, last_basic_block);
736 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, last_basic_block);
737 dump_sbitmap_vector (file, "st_antin", "", st_antin, last_basic_block);
738 dump_sbitmap_vector (file, "st_antout", "", st_antout, last_basic_block);
739 dump_sbitmap_vector (file, "st_kill", "", kill, last_basic_block);
740 }
741 #endif
742
743 #ifdef LCM_DEBUG_INFO
744 if (file)
745 {
746 dump_sbitmap_vector (file, "st_avout", "", st_avout, last_basic_block);
747 dump_sbitmap_vector (file, "st_avin", "", st_avin, last_basic_block);
748 }
749 #endif
750
751 /* Compute farthestness. */
752 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
753 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
754 kill, farthest);
755
756 #ifdef LCM_DEBUG_INFO
757 if (file)
758 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
759 #endif
760
761 sbitmap_vector_free (st_antin);
762 sbitmap_vector_free (st_antout);
763
764 sbitmap_vector_free (st_avin);
765 sbitmap_vector_free (st_avout);
766
767 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
768
769 /* Allocate an extra element for the entry block. */
770 nearerout = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
771 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
772
773 #ifdef LCM_DEBUG_INFO
774 if (file)
775 {
776 dump_sbitmap_vector (file, "nearerout", "", nearerout,
777 last_basic_block + 1);
778 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
779 }
780 #endif
781
782 sbitmap_vector_free (farthest);
783
784 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
785 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
786 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
787 *insert, *delete);
788
789 sbitmap_vector_free (nearerout);
790 sbitmap_vector_free (nearer);
791
792 #ifdef LCM_DEBUG_INFO
793 if (file)
794 {
795 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
796 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
797 last_basic_block);
798 }
799 #endif
800 return edge_list;
801 }
802
803 /* Mode switching:
804
805 The algorithm for setting the modes consists of scanning the insn list
806 and finding all the insns which require a specific mode. Each insn gets
807 a unique struct seginfo element. These structures are inserted into a list
808 for each basic block. For each entity, there is an array of bb_info over
809 the flow graph basic blocks (local var 'bb_info'), and contains a list
810 of all insns within that basic block, in the order they are encountered.
811
812 For each entity, any basic block WITHOUT any insns requiring a specific
813 mode are given a single entry, without a mode. (Each basic block
814 in the flow graph must have at least one entry in the segment table.)
815
816 The LCM algorithm is then run over the flow graph to determine where to
817 place the sets to the highest-priority value in respect of first the first
818 insn in any one block. Any adjustments required to the transparency
819 vectors are made, then the next iteration starts for the next-lower
820 priority mode, till for each entity all modes are exhausted.
821
822 More details are located in the code for optimize_mode_switching(). */
823
824 /* This structure contains the information for each insn which requires
825 either single or double mode to be set.
826 MODE is the mode this insn must be executed in.
827 INSN_PTR is the insn to be executed (may be the note that marks the
828 beginning of a basic block).
829 BBNUM is the flow graph basic block this insn occurs in.
830 NEXT is the next insn in the same basic block. */
831 struct seginfo
832 {
833 int mode;
834 rtx insn_ptr;
835 int bbnum;
836 struct seginfo *next;
837 HARD_REG_SET regs_live;
838 };
839
840 struct bb_info
841 {
842 struct seginfo *seginfo;
843 int computing;
844 };
845
846 /* These bitmaps are used for the LCM algorithm. */
847
848 #ifdef OPTIMIZE_MODE_SWITCHING
849 static sbitmap *antic;
850 static sbitmap *transp;
851 static sbitmap *comp;
852 static sbitmap *delete;
853 static sbitmap *insert;
854
855 static struct seginfo * new_seginfo (int, rtx, int, HARD_REG_SET);
856 static void add_seginfo (struct bb_info *, struct seginfo *);
857 static void reg_dies (rtx, HARD_REG_SET);
858 static void reg_becomes_live (rtx, rtx, void *);
859 static void make_preds_opaque (basic_block, int);
860 #endif
861 \f
862 #ifdef OPTIMIZE_MODE_SWITCHING
863
864 /* This function will allocate a new BBINFO structure, initialized
865 with the MODE, INSN, and basic block BB parameters. */
866
867 static struct seginfo *
868 new_seginfo (int mode, rtx insn, int bb, HARD_REG_SET regs_live)
869 {
870 struct seginfo *ptr;
871 ptr = xmalloc (sizeof (struct seginfo));
872 ptr->mode = mode;
873 ptr->insn_ptr = insn;
874 ptr->bbnum = bb;
875 ptr->next = NULL;
876 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
877 return ptr;
878 }
879
880 /* Add a seginfo element to the end of a list.
881 HEAD is a pointer to the list beginning.
882 INFO is the structure to be linked in. */
883
884 static void
885 add_seginfo (struct bb_info *head, struct seginfo *info)
886 {
887 struct seginfo *ptr;
888
889 if (head->seginfo == NULL)
890 head->seginfo = info;
891 else
892 {
893 ptr = head->seginfo;
894 while (ptr->next != NULL)
895 ptr = ptr->next;
896 ptr->next = info;
897 }
898 }
899
900 /* Make all predecessors of basic block B opaque, recursively, till we hit
901 some that are already non-transparent, or an edge where aux is set; that
902 denotes that a mode set is to be done on that edge.
903 J is the bit number in the bitmaps that corresponds to the entity that
904 we are currently handling mode-switching for. */
905
906 static void
907 make_preds_opaque (basic_block b, int j)
908 {
909 edge e;
910
911 for (e = b->pred; e; e = e->pred_next)
912 {
913 basic_block pb = e->src;
914
915 if (e->aux || ! TEST_BIT (transp[pb->index], j))
916 continue;
917
918 RESET_BIT (transp[pb->index], j);
919 make_preds_opaque (pb, j);
920 }
921 }
922
923 /* Record in LIVE that register REG died. */
924
925 static void
926 reg_dies (rtx reg, HARD_REG_SET live)
927 {
928 int regno, nregs;
929
930 if (GET_CODE (reg) != REG)
931 return;
932
933 regno = REGNO (reg);
934 if (regno < FIRST_PSEUDO_REGISTER)
935 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
936 nregs--)
937 CLEAR_HARD_REG_BIT (live, regno + nregs);
938 }
939
940 /* Record in LIVE that register REG became live.
941 This is called via note_stores. */
942
943 static void
944 reg_becomes_live (rtx reg, rtx setter ATTRIBUTE_UNUSED, void *live)
945 {
946 int regno, nregs;
947
948 if (GET_CODE (reg) == SUBREG)
949 reg = SUBREG_REG (reg);
950
951 if (GET_CODE (reg) != REG)
952 return;
953
954 regno = REGNO (reg);
955 if (regno < FIRST_PSEUDO_REGISTER)
956 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
957 nregs--)
958 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
959 }
960
961 /* Find all insns that need a particular mode setting, and insert the
962 necessary mode switches. Return true if we did work. */
963
964 int
965 optimize_mode_switching (FILE *file)
966 {
967 rtx insn;
968 int e;
969 basic_block bb;
970 int need_commit = 0;
971 sbitmap *kill;
972 struct edge_list *edge_list;
973 static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
974 #define N_ENTITIES ARRAY_SIZE (num_modes)
975 int entity_map[N_ENTITIES];
976 struct bb_info *bb_info[N_ENTITIES];
977 int i, j;
978 int n_entities;
979 int max_num_modes = 0;
980 bool emited = false;
981 basic_block post_entry ATTRIBUTE_UNUSED, pre_exit ATTRIBUTE_UNUSED;
982
983 clear_bb_flags ();
984
985 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
986 if (OPTIMIZE_MODE_SWITCHING (e))
987 {
988 int entry_exit_extra = 0;
989
990 /* Create the list of segments within each basic block.
991 If NORMAL_MODE is defined, allow for two extra
992 blocks split from the entry and exit block. */
993 #ifdef NORMAL_MODE
994 entry_exit_extra = 2;
995 #endif
996 bb_info[n_entities]
997 = xcalloc (last_basic_block + entry_exit_extra, sizeof **bb_info);
998 entity_map[n_entities++] = e;
999 if (num_modes[e] > max_num_modes)
1000 max_num_modes = num_modes[e];
1001 }
1002
1003 if (! n_entities)
1004 return 0;
1005
1006 #ifdef NORMAL_MODE
1007 {
1008 /* Split the edge from the entry block and the fallthrough edge to the
1009 exit block, so that we can note that there NORMAL_MODE is supplied /
1010 required. */
1011 edge eg;
1012 post_entry = split_edge (ENTRY_BLOCK_PTR->succ);
1013 /* The only non-call predecessor at this stage is a block with a
1014 fallthrough edge; there can be at most one, but there could be
1015 none at all, e.g. when exit is called. */
1016 for (pre_exit = 0, eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1017 if (eg->flags & EDGE_FALLTHRU)
1018 {
1019 regset live_at_end = eg->src->global_live_at_end;
1020
1021 if (pre_exit)
1022 abort ();
1023 pre_exit = split_edge (eg);
1024 COPY_REG_SET (pre_exit->global_live_at_start, live_at_end);
1025 COPY_REG_SET (pre_exit->global_live_at_end, live_at_end);
1026 }
1027 }
1028 #endif
1029
1030 /* Create the bitmap vectors. */
1031
1032 antic = sbitmap_vector_alloc (last_basic_block, n_entities);
1033 transp = sbitmap_vector_alloc (last_basic_block, n_entities);
1034 comp = sbitmap_vector_alloc (last_basic_block, n_entities);
1035
1036 sbitmap_vector_ones (transp, last_basic_block);
1037
1038 for (j = n_entities - 1; j >= 0; j--)
1039 {
1040 int e = entity_map[j];
1041 int no_mode = num_modes[e];
1042 struct bb_info *info = bb_info[j];
1043
1044 /* Determine what the first use (if any) need for a mode of entity E is.
1045 This will be the mode that is anticipatable for this block.
1046 Also compute the initial transparency settings. */
1047 FOR_EACH_BB (bb)
1048 {
1049 struct seginfo *ptr;
1050 int last_mode = no_mode;
1051 HARD_REG_SET live_now;
1052
1053 REG_SET_TO_HARD_REG_SET (live_now,
1054 bb->global_live_at_start);
1055 for (insn = bb->head;
1056 insn != NULL && insn != NEXT_INSN (bb->end);
1057 insn = NEXT_INSN (insn))
1058 {
1059 if (INSN_P (insn))
1060 {
1061 int mode = MODE_NEEDED (e, insn);
1062 rtx link;
1063
1064 if (mode != no_mode && mode != last_mode)
1065 {
1066 last_mode = mode;
1067 ptr = new_seginfo (mode, insn, bb->index, live_now);
1068 add_seginfo (info + bb->index, ptr);
1069 RESET_BIT (transp[bb->index], j);
1070 }
1071
1072 /* Update LIVE_NOW. */
1073 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1074 if (REG_NOTE_KIND (link) == REG_DEAD)
1075 reg_dies (XEXP (link, 0), live_now);
1076
1077 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1078 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1079 if (REG_NOTE_KIND (link) == REG_UNUSED)
1080 reg_dies (XEXP (link, 0), live_now);
1081 }
1082 }
1083
1084 info[bb->index].computing = last_mode;
1085 /* Check for blocks without ANY mode requirements. */
1086 if (last_mode == no_mode)
1087 {
1088 ptr = new_seginfo (no_mode, bb->end, bb->index, live_now);
1089 add_seginfo (info + bb->index, ptr);
1090 }
1091 }
1092 #ifdef NORMAL_MODE
1093 {
1094 int mode = NORMAL_MODE (e);
1095
1096 if (mode != no_mode)
1097 {
1098 bb = post_entry;
1099
1100 /* By always making this nontransparent, we save
1101 an extra check in make_preds_opaque. We also
1102 need this to avoid confusing pre_edge_lcm when
1103 antic is cleared but transp and comp are set. */
1104 RESET_BIT (transp[bb->index], j);
1105
1106 /* Insert a fake computing definition of MODE into entry
1107 blocks which compute no mode. This represents the mode on
1108 entry. */
1109 info[bb->index].computing = mode;
1110
1111 if (pre_exit)
1112 info[pre_exit->index].seginfo->mode = mode;
1113 }
1114 }
1115 #endif /* NORMAL_MODE */
1116 }
1117
1118 kill = sbitmap_vector_alloc (last_basic_block, n_entities);
1119 for (i = 0; i < max_num_modes; i++)
1120 {
1121 int current_mode[N_ENTITIES];
1122
1123 /* Set the anticipatable and computing arrays. */
1124 sbitmap_vector_zero (antic, last_basic_block);
1125 sbitmap_vector_zero (comp, last_basic_block);
1126 for (j = n_entities - 1; j >= 0; j--)
1127 {
1128 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1129 struct bb_info *info = bb_info[j];
1130
1131 FOR_EACH_BB (bb)
1132 {
1133 if (info[bb->index].seginfo->mode == m)
1134 SET_BIT (antic[bb->index], j);
1135
1136 if (info[bb->index].computing == m)
1137 SET_BIT (comp[bb->index], j);
1138 }
1139 }
1140
1141 /* Calculate the optimal locations for the
1142 placement mode switches to modes with priority I. */
1143
1144 FOR_EACH_BB (bb)
1145 sbitmap_not (kill[bb->index], transp[bb->index]);
1146 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1147 kill, &insert, &delete);
1148
1149 for (j = n_entities - 1; j >= 0; j--)
1150 {
1151 /* Insert all mode sets that have been inserted by lcm. */
1152 int no_mode = num_modes[entity_map[j]];
1153
1154 /* Wherever we have moved a mode setting upwards in the flow graph,
1155 the blocks between the new setting site and the now redundant
1156 computation ceases to be transparent for any lower-priority
1157 mode of the same entity. First set the aux field of each
1158 insertion site edge non-transparent, then propagate the new
1159 non-transparency from the redundant computation upwards till
1160 we hit an insertion site or an already non-transparent block. */
1161 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1162 {
1163 edge eg = INDEX_EDGE (edge_list, e);
1164 int mode;
1165 basic_block src_bb;
1166 HARD_REG_SET live_at_edge;
1167 rtx mode_set;
1168
1169 eg->aux = 0;
1170
1171 if (! TEST_BIT (insert[e], j))
1172 continue;
1173
1174 eg->aux = (void *)1;
1175
1176 mode = current_mode[j];
1177 src_bb = eg->src;
1178
1179 REG_SET_TO_HARD_REG_SET (live_at_edge,
1180 src_bb->global_live_at_end);
1181
1182 start_sequence ();
1183 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1184 mode_set = get_insns ();
1185 end_sequence ();
1186
1187 /* Do not bother to insert empty sequence. */
1188 if (mode_set == NULL_RTX)
1189 continue;
1190
1191 /* If this is an abnormal edge, we'll insert at the end
1192 of the previous block. */
1193 if (eg->flags & EDGE_ABNORMAL)
1194 {
1195 emited = true;
1196 if (GET_CODE (src_bb->end) == JUMP_INSN)
1197 emit_insn_before (mode_set, src_bb->end);
1198 /* It doesn't make sense to switch to normal mode
1199 after a CALL_INSN, so we're going to abort if we
1200 find one. The cases in which a CALL_INSN may
1201 have an abnormal edge are sibcalls and EH edges.
1202 In the case of sibcalls, the dest basic-block is
1203 the EXIT_BLOCK, that runs in normal mode; it is
1204 assumed that a sibcall insn requires normal mode
1205 itself, so no mode switch would be required after
1206 the call (it wouldn't make sense, anyway). In
1207 the case of EH edges, EH entry points also start
1208 in normal mode, so a similar reasoning applies. */
1209 else if (GET_CODE (src_bb->end) == INSN)
1210 emit_insn_after (mode_set, src_bb->end);
1211 else
1212 abort ();
1213 bb_info[j][src_bb->index].computing = mode;
1214 RESET_BIT (transp[src_bb->index], j);
1215 }
1216 else
1217 {
1218 need_commit = 1;
1219 insert_insn_on_edge (mode_set, eg);
1220 }
1221 }
1222
1223 FOR_EACH_BB_REVERSE (bb)
1224 if (TEST_BIT (delete[bb->index], j))
1225 {
1226 make_preds_opaque (bb, j);
1227 /* Cancel the 'deleted' mode set. */
1228 bb_info[j][bb->index].seginfo->mode = no_mode;
1229 }
1230 }
1231
1232 clear_aux_for_edges ();
1233 free_edge_list (edge_list);
1234 }
1235
1236 /* Now output the remaining mode sets in all the segments. */
1237 for (j = n_entities - 1; j >= 0; j--)
1238 {
1239 int no_mode = num_modes[entity_map[j]];
1240
1241 FOR_EACH_BB_REVERSE (bb)
1242 {
1243 struct seginfo *ptr, *next;
1244 for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
1245 {
1246 next = ptr->next;
1247 if (ptr->mode != no_mode)
1248 {
1249 rtx mode_set;
1250
1251 start_sequence ();
1252 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1253 mode_set = get_insns ();
1254 end_sequence ();
1255
1256 /* Do not bother to insert empty sequence. */
1257 if (mode_set == NULL_RTX)
1258 continue;
1259
1260 emited = true;
1261 if (GET_CODE (ptr->insn_ptr) == NOTE
1262 && (NOTE_LINE_NUMBER (ptr->insn_ptr)
1263 == NOTE_INSN_BASIC_BLOCK))
1264 emit_insn_after (mode_set, ptr->insn_ptr);
1265 else
1266 emit_insn_before (mode_set, ptr->insn_ptr);
1267 }
1268
1269 free (ptr);
1270 }
1271 }
1272
1273 free (bb_info[j]);
1274 }
1275
1276 /* Finished. Free up all the things we've allocated. */
1277
1278 sbitmap_vector_free (kill);
1279 sbitmap_vector_free (antic);
1280 sbitmap_vector_free (transp);
1281 sbitmap_vector_free (comp);
1282 sbitmap_vector_free (delete);
1283 sbitmap_vector_free (insert);
1284
1285 if (need_commit)
1286 commit_edge_insertions ();
1287
1288 #ifdef NORMAL_MODE
1289 cleanup_cfg (CLEANUP_NO_INSN_DEL);
1290 #else
1291 if (!need_commit && !emited)
1292 return 0;
1293 #endif
1294
1295 max_regno = max_reg_num ();
1296 allocate_reg_info (max_regno, FALSE, FALSE);
1297 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
1298 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1299 | PROP_SCAN_DEAD_CODE));
1300
1301 return 1;
1302 }
1303 #endif /* OPTIMIZE_MODE_SWITCHING */