]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/loop-unroll.c
In PR70010, a function is marked with target(no-vsx) to disable VSX code
[thirdparty/gcc.git] / gcc / loop-unroll.c
1 /* Loop unrolling.
2 Copyright (C) 2002-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "cfghooks.h"
28 #include "memmodel.h"
29 #include "optabs.h"
30 #include "emit-rtl.h"
31 #include "recog.h"
32 #include "profile.h"
33 #include "cfgrtl.h"
34 #include "cfgloop.h"
35 #include "params.h"
36 #include "dojump.h"
37 #include "expr.h"
38 #include "dumpfile.h"
39
40 /* This pass performs loop unrolling. We only perform this
41 optimization on innermost loops (with single exception) because
42 the impact on performance is greatest here, and we want to avoid
43 unnecessary code size growth. The gain is caused by greater sequentiality
44 of code, better code to optimize for further passes and in some cases
45 by fewer testings of exit conditions. The main problem is code growth,
46 that impacts performance negatively due to effect of caches.
47
48 What we do:
49
50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
60
61 There is a lot of parameters (defined and described in params.def) that
62 control how much we unroll.
63
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
67 */
68
69 /* Information about induction variables to split. */
70
71 struct iv_to_split
72 {
73 rtx_insn *insn; /* The insn in that the induction variable occurs. */
74 rtx orig_var; /* The variable (register) for the IV before split. */
75 rtx base_var; /* The variable on that the values in the further
76 iterations are based. */
77 rtx step; /* Step of the induction variable. */
78 struct iv_to_split *next; /* Next entry in walking order. */
79 };
80
81 /* Information about accumulators to expand. */
82
83 struct var_to_expand
84 {
85 rtx_insn *insn; /* The insn in that the variable expansion occurs. */
86 rtx reg; /* The accumulator which is expanded. */
87 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */
88 struct var_to_expand *next; /* Next entry in walking order. */
89 enum rtx_code op; /* The type of the accumulation - addition, subtraction
90 or multiplication. */
91 int expansion_count; /* Count the number of expansions generated so far. */
92 int reuse_expansion; /* The expansion we intend to reuse to expand
93 the accumulator. If REUSE_EXPANSION is 0 reuse
94 the original accumulator. Else use
95 var_expansions[REUSE_EXPANSION - 1]. */
96 };
97
98 /* Hashtable helper for iv_to_split. */
99
100 struct iv_split_hasher : free_ptr_hash <iv_to_split>
101 {
102 static inline hashval_t hash (const iv_to_split *);
103 static inline bool equal (const iv_to_split *, const iv_to_split *);
104 };
105
106
107 /* A hash function for information about insns to split. */
108
109 inline hashval_t
110 iv_split_hasher::hash (const iv_to_split *ivts)
111 {
112 return (hashval_t) INSN_UID (ivts->insn);
113 }
114
115 /* An equality functions for information about insns to split. */
116
117 inline bool
118 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
119 {
120 return i1->insn == i2->insn;
121 }
122
123 /* Hashtable helper for iv_to_split. */
124
125 struct var_expand_hasher : free_ptr_hash <var_to_expand>
126 {
127 static inline hashval_t hash (const var_to_expand *);
128 static inline bool equal (const var_to_expand *, const var_to_expand *);
129 };
130
131 /* Return a hash for VES. */
132
133 inline hashval_t
134 var_expand_hasher::hash (const var_to_expand *ves)
135 {
136 return (hashval_t) INSN_UID (ves->insn);
137 }
138
139 /* Return true if I1 and I2 refer to the same instruction. */
140
141 inline bool
142 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
143 {
144 return i1->insn == i2->insn;
145 }
146
147 /* Information about optimization applied in
148 the unrolled loop. */
149
150 struct opt_info
151 {
152 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
153 split. */
154 struct iv_to_split *iv_to_split_head; /* The first iv to split. */
155 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
156 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
157 insns with accumulators to expand. */
158 struct var_to_expand *var_to_expand_head; /* The first var to expand. */
159 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
160 unsigned first_new_block; /* The first basic block that was
161 duplicated. */
162 basic_block loop_exit; /* The loop exit basic block. */
163 basic_block loop_preheader; /* The loop preheader basic block. */
164 };
165
166 static void decide_unroll_stupid (class loop *, int);
167 static void decide_unroll_constant_iterations (class loop *, int);
168 static void decide_unroll_runtime_iterations (class loop *, int);
169 static void unroll_loop_stupid (class loop *);
170 static void decide_unrolling (int);
171 static void unroll_loop_constant_iterations (class loop *);
172 static void unroll_loop_runtime_iterations (class loop *);
173 static struct opt_info *analyze_insns_in_loop (class loop *);
174 static void opt_info_start_duplication (struct opt_info *);
175 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
176 static void free_opt_info (struct opt_info *);
177 static struct var_to_expand *analyze_insn_to_expand_var (class loop*, rtx_insn *);
178 static bool referenced_in_one_insn_in_loop_p (class loop *, rtx, int *);
179 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
180 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
181 static void insert_var_expansion_initialization (struct var_to_expand *,
182 basic_block);
183 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
184 basic_block);
185 static rtx get_expansion (struct var_to_expand *);
186
187 /* Emit a message summarizing the unroll that will be
188 performed for LOOP, along with the loop's location LOCUS, if
189 appropriate given the dump or -fopt-info settings. */
190
191 static void
192 report_unroll (class loop *loop, dump_location_t locus)
193 {
194 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
195
196 if (loop->lpt_decision.decision == LPT_NONE)
197 return;
198
199 if (!dump_enabled_p ())
200 return;
201
202 dump_metadata_t metadata (report_flags, locus.get_impl_location ());
203 dump_printf_loc (metadata, locus.get_user_location (),
204 "loop unrolled %d times",
205 loop->lpt_decision.times);
206 if (profile_info && loop->header->count.initialized_p ())
207 dump_printf (metadata,
208 " (header execution count %d)",
209 (int)loop->header->count.to_gcov_type ());
210
211 dump_printf (metadata, "\n");
212 }
213
214 /* Decide whether unroll loops and how much. */
215 static void
216 decide_unrolling (int flags)
217 {
218 class loop *loop;
219
220 /* Scan the loops, inner ones first. */
221 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
222 {
223 loop->lpt_decision.decision = LPT_NONE;
224 dump_user_location_t locus = get_loop_location (loop);
225
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE, locus,
228 "considering unrolling loop %d at BB %d\n",
229 loop->num, loop->header->index);
230
231 if (loop->unroll == 1)
232 {
233 if (dump_file)
234 fprintf (dump_file,
235 ";; Not unrolling loop, user didn't want it unrolled\n");
236 continue;
237 }
238
239 /* Do not peel cold areas. */
240 if (optimize_loop_for_size_p (loop))
241 {
242 if (dump_file)
243 fprintf (dump_file, ";; Not considering loop, cold area\n");
244 continue;
245 }
246
247 /* Can the loop be manipulated? */
248 if (!can_duplicate_loop_p (loop))
249 {
250 if (dump_file)
251 fprintf (dump_file,
252 ";; Not considering loop, cannot duplicate\n");
253 continue;
254 }
255
256 /* Skip non-innermost loops. */
257 if (loop->inner)
258 {
259 if (dump_file)
260 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
261 continue;
262 }
263
264 loop->ninsns = num_loop_insns (loop);
265 loop->av_ninsns = average_num_loop_insns (loop);
266
267 /* Try transformations one by one in decreasing order of priority. */
268 decide_unroll_constant_iterations (loop, flags);
269 if (loop->lpt_decision.decision == LPT_NONE)
270 decide_unroll_runtime_iterations (loop, flags);
271 if (loop->lpt_decision.decision == LPT_NONE)
272 decide_unroll_stupid (loop, flags);
273
274 report_unroll (loop, locus);
275 }
276 }
277
278 /* Unroll LOOPS. */
279 void
280 unroll_loops (int flags)
281 {
282 class loop *loop;
283 bool changed = false;
284
285 /* Now decide rest of unrolling. */
286 decide_unrolling (flags);
287
288 /* Scan the loops, inner ones first. */
289 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
290 {
291 /* And perform the appropriate transformations. */
292 switch (loop->lpt_decision.decision)
293 {
294 case LPT_UNROLL_CONSTANT:
295 unroll_loop_constant_iterations (loop);
296 changed = true;
297 break;
298 case LPT_UNROLL_RUNTIME:
299 unroll_loop_runtime_iterations (loop);
300 changed = true;
301 break;
302 case LPT_UNROLL_STUPID:
303 unroll_loop_stupid (loop);
304 changed = true;
305 break;
306 case LPT_NONE:
307 break;
308 default:
309 gcc_unreachable ();
310 }
311 }
312
313 if (changed)
314 {
315 calculate_dominance_info (CDI_DOMINATORS);
316 fix_loop_structure (NULL);
317 }
318
319 iv_analysis_done ();
320 }
321
322 /* Check whether exit of the LOOP is at the end of loop body. */
323
324 static bool
325 loop_exit_at_end_p (class loop *loop)
326 {
327 class niter_desc *desc = get_simple_loop_desc (loop);
328 rtx_insn *insn;
329
330 /* We should never have conditional in latch block. */
331 gcc_assert (desc->in_edge->dest != loop->header);
332
333 if (desc->in_edge->dest != loop->latch)
334 return false;
335
336 /* Check that the latch is empty. */
337 FOR_BB_INSNS (loop->latch, insn)
338 {
339 if (INSN_P (insn) && active_insn_p (insn))
340 return false;
341 }
342
343 return true;
344 }
345
346 /* Decide whether to unroll LOOP iterating constant number of times
347 and how much. */
348
349 static void
350 decide_unroll_constant_iterations (class loop *loop, int flags)
351 {
352 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
353 class niter_desc *desc;
354 widest_int iterations;
355
356 /* If we were not asked to unroll this loop, just return back silently. */
357 if (!(flags & UAP_UNROLL) && !loop->unroll)
358 return;
359
360 if (dump_enabled_p ())
361 dump_printf (MSG_NOTE,
362 "considering unrolling loop with constant "
363 "number of iterations\n");
364
365 /* nunroll = total number of copies of the original loop body in
366 unrolled loop (i.e. if it is 2, we have to duplicate loop body once). */
367 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
368 nunroll_by_av
369 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
370 if (nunroll > nunroll_by_av)
371 nunroll = nunroll_by_av;
372 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
373 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
374
375 if (targetm.loop_unroll_adjust)
376 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
377
378 /* Skip big loops. */
379 if (nunroll <= 1)
380 {
381 if (dump_file)
382 fprintf (dump_file, ";; Not considering loop, is too big\n");
383 return;
384 }
385
386 /* Check for simple loops. */
387 desc = get_simple_loop_desc (loop);
388
389 /* Check number of iterations. */
390 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
391 {
392 if (dump_file)
393 fprintf (dump_file,
394 ";; Unable to prove that the loop iterates constant times\n");
395 return;
396 }
397
398 /* Check for an explicit unrolling factor. */
399 if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
400 {
401 /* However we cannot unroll completely at the RTL level a loop with
402 constant number of iterations; it should have been peeled instead. */
403 if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1)
404 {
405 if (dump_file)
406 fprintf (dump_file, ";; Loop should have been peeled\n");
407 }
408 else
409 {
410 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
411 loop->lpt_decision.times = loop->unroll - 1;
412 }
413 return;
414 }
415
416 /* Check whether the loop rolls enough to consider.
417 Consult also loop bounds and profile; in the case the loop has more
418 than one exit it may well loop less than determined maximal number
419 of iterations. */
420 if (desc->niter < 2 * nunroll
421 || ((get_estimated_loop_iterations (loop, &iterations)
422 || get_likely_max_loop_iterations (loop, &iterations))
423 && wi::ltu_p (iterations, 2 * nunroll)))
424 {
425 if (dump_file)
426 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
427 return;
428 }
429
430 /* Success; now compute number of iterations to unroll. We alter
431 nunroll so that as few as possible copies of loop body are
432 necessary, while still not decreasing the number of unrollings
433 too much (at most by 1). */
434 best_copies = 2 * nunroll + 10;
435
436 i = 2 * nunroll + 2;
437 if (i > desc->niter - 2)
438 i = desc->niter - 2;
439
440 for (; i >= nunroll - 1; i--)
441 {
442 unsigned exit_mod = desc->niter % (i + 1);
443
444 if (!loop_exit_at_end_p (loop))
445 n_copies = exit_mod + i + 1;
446 else if (exit_mod != (unsigned) i
447 || desc->noloop_assumptions != NULL_RTX)
448 n_copies = exit_mod + i + 2;
449 else
450 n_copies = i + 1;
451
452 if (n_copies < best_copies)
453 {
454 best_copies = n_copies;
455 best_unroll = i;
456 }
457 }
458
459 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
460 loop->lpt_decision.times = best_unroll;
461 }
462
463 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
464 The transformation does this:
465
466 for (i = 0; i < 102; i++)
467 body;
468
469 ==> (LOOP->LPT_DECISION.TIMES == 3)
470
471 i = 0;
472 body; i++;
473 body; i++;
474 while (i < 102)
475 {
476 body; i++;
477 body; i++;
478 body; i++;
479 body; i++;
480 }
481 */
482 static void
483 unroll_loop_constant_iterations (class loop *loop)
484 {
485 unsigned HOST_WIDE_INT niter;
486 unsigned exit_mod;
487 unsigned i;
488 edge e;
489 unsigned max_unroll = loop->lpt_decision.times;
490 class niter_desc *desc = get_simple_loop_desc (loop);
491 bool exit_at_end = loop_exit_at_end_p (loop);
492 struct opt_info *opt_info = NULL;
493 bool ok;
494
495 niter = desc->niter;
496
497 /* Should not get here (such loop should be peeled instead). */
498 gcc_assert (niter > max_unroll + 1);
499
500 exit_mod = niter % (max_unroll + 1);
501
502 auto_sbitmap wont_exit (max_unroll + 2);
503 bitmap_ones (wont_exit);
504
505 auto_vec<edge> remove_edges;
506 if (flag_split_ivs_in_unroller
507 || flag_variable_expansion_in_unroller)
508 opt_info = analyze_insns_in_loop (loop);
509
510 if (!exit_at_end)
511 {
512 /* The exit is not at the end of the loop; leave exit test
513 in the first copy, so that the loops that start with test
514 of exit condition have continuous body after unrolling. */
515
516 if (dump_file)
517 fprintf (dump_file, ";; Condition at beginning of loop.\n");
518
519 /* Peel exit_mod iterations. */
520 bitmap_clear_bit (wont_exit, 0);
521 if (desc->noloop_assumptions)
522 bitmap_clear_bit (wont_exit, 1);
523
524 if (exit_mod)
525 {
526 opt_info_start_duplication (opt_info);
527 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
528 exit_mod,
529 wont_exit, desc->out_edge,
530 &remove_edges,
531 DLTHE_FLAG_UPDATE_FREQ
532 | (opt_info && exit_mod > 1
533 ? DLTHE_RECORD_COPY_NUMBER
534 : 0));
535 gcc_assert (ok);
536
537 if (opt_info && exit_mod > 1)
538 apply_opt_in_copies (opt_info, exit_mod, false, false);
539
540 desc->noloop_assumptions = NULL_RTX;
541 desc->niter -= exit_mod;
542 loop->nb_iterations_upper_bound -= exit_mod;
543 if (loop->any_estimate
544 && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
545 loop->nb_iterations_estimate -= exit_mod;
546 else
547 loop->any_estimate = false;
548 if (loop->any_likely_upper_bound
549 && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
550 loop->nb_iterations_likely_upper_bound -= exit_mod;
551 else
552 loop->any_likely_upper_bound = false;
553 }
554
555 bitmap_set_bit (wont_exit, 1);
556 }
557 else
558 {
559 /* Leave exit test in last copy, for the same reason as above if
560 the loop tests the condition at the end of loop body. */
561
562 if (dump_file)
563 fprintf (dump_file, ";; Condition at end of loop.\n");
564
565 /* We know that niter >= max_unroll + 2; so we do not need to care of
566 case when we would exit before reaching the loop. So just peel
567 exit_mod + 1 iterations. */
568 if (exit_mod != max_unroll
569 || desc->noloop_assumptions)
570 {
571 bitmap_clear_bit (wont_exit, 0);
572 if (desc->noloop_assumptions)
573 bitmap_clear_bit (wont_exit, 1);
574
575 opt_info_start_duplication (opt_info);
576 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
577 exit_mod + 1,
578 wont_exit, desc->out_edge,
579 &remove_edges,
580 DLTHE_FLAG_UPDATE_FREQ
581 | (opt_info && exit_mod > 0
582 ? DLTHE_RECORD_COPY_NUMBER
583 : 0));
584 gcc_assert (ok);
585
586 if (opt_info && exit_mod > 0)
587 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
588
589 desc->niter -= exit_mod + 1;
590 loop->nb_iterations_upper_bound -= exit_mod + 1;
591 if (loop->any_estimate
592 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
593 loop->nb_iterations_estimate -= exit_mod + 1;
594 else
595 loop->any_estimate = false;
596 if (loop->any_likely_upper_bound
597 && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
598 loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
599 else
600 loop->any_likely_upper_bound = false;
601 desc->noloop_assumptions = NULL_RTX;
602
603 bitmap_set_bit (wont_exit, 0);
604 bitmap_set_bit (wont_exit, 1);
605 }
606
607 bitmap_clear_bit (wont_exit, max_unroll);
608 }
609
610 /* Now unroll the loop. */
611
612 opt_info_start_duplication (opt_info);
613 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
614 max_unroll,
615 wont_exit, desc->out_edge,
616 &remove_edges,
617 DLTHE_FLAG_UPDATE_FREQ
618 | (opt_info
619 ? DLTHE_RECORD_COPY_NUMBER
620 : 0));
621 gcc_assert (ok);
622
623 if (opt_info)
624 {
625 apply_opt_in_copies (opt_info, max_unroll, true, true);
626 free_opt_info (opt_info);
627 }
628
629 if (exit_at_end)
630 {
631 basic_block exit_block = get_bb_copy (desc->in_edge->src);
632 /* Find a new in and out edge; they are in the last copy we have made. */
633
634 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
635 {
636 desc->out_edge = EDGE_SUCC (exit_block, 0);
637 desc->in_edge = EDGE_SUCC (exit_block, 1);
638 }
639 else
640 {
641 desc->out_edge = EDGE_SUCC (exit_block, 1);
642 desc->in_edge = EDGE_SUCC (exit_block, 0);
643 }
644 }
645
646 desc->niter /= max_unroll + 1;
647 loop->nb_iterations_upper_bound
648 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
649 if (loop->any_estimate)
650 loop->nb_iterations_estimate
651 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
652 if (loop->any_likely_upper_bound)
653 loop->nb_iterations_likely_upper_bound
654 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
655 desc->niter_expr = gen_int_mode (desc->niter, desc->mode);
656
657 /* Remove the edges. */
658 FOR_EACH_VEC_ELT (remove_edges, i, e)
659 remove_path (e);
660
661 if (dump_file)
662 fprintf (dump_file,
663 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
664 max_unroll, num_loop_insns (loop));
665 }
666
667 /* Decide whether to unroll LOOP iterating runtime computable number of times
668 and how much. */
669 static void
670 decide_unroll_runtime_iterations (class loop *loop, int flags)
671 {
672 unsigned nunroll, nunroll_by_av, i;
673 class niter_desc *desc;
674 widest_int iterations;
675
676 /* If we were not asked to unroll this loop, just return back silently. */
677 if (!(flags & UAP_UNROLL) && !loop->unroll)
678 return;
679
680 if (dump_enabled_p ())
681 dump_printf (MSG_NOTE,
682 "considering unrolling loop with runtime-"
683 "computable number of iterations\n");
684
685 /* nunroll = total number of copies of the original loop body in
686 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
687 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
688 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
689 if (nunroll > nunroll_by_av)
690 nunroll = nunroll_by_av;
691 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
692 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
693
694 if (targetm.loop_unroll_adjust)
695 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
696
697 if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
698 nunroll = loop->unroll;
699
700 /* Skip big loops. */
701 if (nunroll <= 1)
702 {
703 if (dump_file)
704 fprintf (dump_file, ";; Not considering loop, is too big\n");
705 return;
706 }
707
708 /* Check for simple loops. */
709 desc = get_simple_loop_desc (loop);
710
711 /* Check simpleness. */
712 if (!desc->simple_p || desc->assumptions)
713 {
714 if (dump_file)
715 fprintf (dump_file,
716 ";; Unable to prove that the number of iterations "
717 "can be counted in runtime\n");
718 return;
719 }
720
721 if (desc->const_iter)
722 {
723 if (dump_file)
724 fprintf (dump_file, ";; Loop iterates constant times\n");
725 return;
726 }
727
728 /* Check whether the loop rolls. */
729 if ((get_estimated_loop_iterations (loop, &iterations)
730 || get_likely_max_loop_iterations (loop, &iterations))
731 && wi::ltu_p (iterations, 2 * nunroll))
732 {
733 if (dump_file)
734 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
735 return;
736 }
737
738 /* Success; now force nunroll to be power of 2, as code-gen
739 requires it, we are unable to cope with overflows in
740 computation of number of iterations. */
741 for (i = 1; 2 * i <= nunroll; i *= 2)
742 continue;
743
744 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
745 loop->lpt_decision.times = i - 1;
746 }
747
748 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
749 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
750 and NULL is returned instead. */
751
752 basic_block
753 split_edge_and_insert (edge e, rtx_insn *insns)
754 {
755 basic_block bb;
756
757 if (!insns)
758 return NULL;
759 bb = split_edge (e);
760 emit_insn_after (insns, BB_END (bb));
761
762 /* ??? We used to assume that INSNS can contain control flow insns, and
763 that we had to try to find sub basic blocks in BB to maintain a valid
764 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
765 and call break_superblocks when going out of cfglayout mode. But it
766 turns out that this never happens; and that if it does ever happen,
767 the verify_flow_info at the end of the RTL loop passes would fail.
768
769 There are two reasons why we expected we could have control flow insns
770 in INSNS. The first is when a comparison has to be done in parts, and
771 the second is when the number of iterations is computed for loops with
772 the number of iterations known at runtime. In both cases, test cases
773 to get control flow in INSNS appear to be impossible to construct:
774
775 * If do_compare_rtx_and_jump needs several branches to do comparison
776 in a mode that needs comparison by parts, we cannot analyze the
777 number of iterations of the loop, and we never get to unrolling it.
778
779 * The code in expand_divmod that was suspected to cause creation of
780 branching code seems to be only accessed for signed division. The
781 divisions used by # of iterations analysis are always unsigned.
782 Problems might arise on architectures that emits branching code
783 for some operations that may appear in the unroller (especially
784 for division), but we have no such architectures.
785
786 Considering all this, it was decided that we should for now assume
787 that INSNS can in theory contain control flow insns, but in practice
788 it never does. So we don't handle the theoretical case, and should
789 a real failure ever show up, we have a pretty good clue for how to
790 fix it. */
791
792 return bb;
793 }
794
795 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
796 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
797 in order to create a jump. */
798
799 static rtx_insn *
800 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
801 rtx_code_label *label, profile_probability prob,
802 rtx_insn *cinsn)
803 {
804 rtx_insn *seq;
805 rtx_jump_insn *jump;
806 rtx cond;
807 machine_mode mode;
808
809 mode = GET_MODE (op0);
810 if (mode == VOIDmode)
811 mode = GET_MODE (op1);
812
813 start_sequence ();
814 if (GET_MODE_CLASS (mode) == MODE_CC)
815 {
816 /* A hack -- there seems to be no easy generic way how to make a
817 conditional jump from a ccmode comparison. */
818 gcc_assert (cinsn);
819 cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
820 gcc_assert (GET_CODE (cond) == comp);
821 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
822 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
823 emit_jump_insn (copy_insn (PATTERN (cinsn)));
824 jump = as_a <rtx_jump_insn *> (get_last_insn ());
825 JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
826 LABEL_NUSES (JUMP_LABEL (jump))++;
827 redirect_jump (jump, label, 0);
828 }
829 else
830 {
831 gcc_assert (!cinsn);
832
833 op0 = force_operand (op0, NULL_RTX);
834 op1 = force_operand (op1, NULL_RTX);
835 do_compare_rtx_and_jump (op0, op1, comp, 0,
836 mode, NULL_RTX, NULL, label,
837 profile_probability::uninitialized ());
838 jump = as_a <rtx_jump_insn *> (get_last_insn ());
839 jump->set_jump_target (label);
840 LABEL_NUSES (label)++;
841 }
842 if (prob.initialized_p ())
843 add_reg_br_prob_note (jump, prob);
844
845 seq = get_insns ();
846 end_sequence ();
847
848 return seq;
849 }
850
851 /* Unroll LOOP for which we are able to count number of iterations in
852 runtime LOOP->LPT_DECISION.TIMES times. The times value must be a
853 power of two. The transformation does this (with some extra care
854 for case n < 0):
855
856 for (i = 0; i < n; i++)
857 body;
858
859 ==> (LOOP->LPT_DECISION.TIMES == 3)
860
861 i = 0;
862 mod = n % 4;
863
864 switch (mod)
865 {
866 case 3:
867 body; i++;
868 case 2:
869 body; i++;
870 case 1:
871 body; i++;
872 case 0: ;
873 }
874
875 while (i < n)
876 {
877 body; i++;
878 body; i++;
879 body; i++;
880 body; i++;
881 }
882 */
883 static void
884 unroll_loop_runtime_iterations (class loop *loop)
885 {
886 rtx old_niter, niter, tmp;
887 rtx_insn *init_code, *branch_code;
888 unsigned i, j;
889 profile_probability p;
890 basic_block preheader, *body, swtch, ezc_swtch = NULL;
891 int may_exit_copy;
892 profile_count iter_count, new_count;
893 unsigned n_peel;
894 edge e;
895 bool extra_zero_check, last_may_exit;
896 unsigned max_unroll = loop->lpt_decision.times;
897 class niter_desc *desc = get_simple_loop_desc (loop);
898 bool exit_at_end = loop_exit_at_end_p (loop);
899 struct opt_info *opt_info = NULL;
900 bool ok;
901
902 if (flag_split_ivs_in_unroller
903 || flag_variable_expansion_in_unroller)
904 opt_info = analyze_insns_in_loop (loop);
905
906 /* Remember blocks whose dominators will have to be updated. */
907 auto_vec<basic_block> dom_bbs;
908
909 body = get_loop_body (loop);
910 for (i = 0; i < loop->num_nodes; i++)
911 {
912 vec<basic_block> ldom;
913 basic_block bb;
914
915 ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
916 FOR_EACH_VEC_ELT (ldom, j, bb)
917 if (!flow_bb_inside_loop_p (loop, bb))
918 dom_bbs.safe_push (bb);
919
920 ldom.release ();
921 }
922 free (body);
923
924 if (!exit_at_end)
925 {
926 /* Leave exit in first copy (for explanation why see comment in
927 unroll_loop_constant_iterations). */
928 may_exit_copy = 0;
929 n_peel = max_unroll - 1;
930 extra_zero_check = true;
931 last_may_exit = false;
932 }
933 else
934 {
935 /* Leave exit in last copy (for explanation why see comment in
936 unroll_loop_constant_iterations). */
937 may_exit_copy = max_unroll;
938 n_peel = max_unroll;
939 extra_zero_check = false;
940 last_may_exit = true;
941 }
942
943 /* Get expression for number of iterations. */
944 start_sequence ();
945 old_niter = niter = gen_reg_rtx (desc->mode);
946 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
947 if (tmp != niter)
948 emit_move_insn (niter, tmp);
949
950 /* For loops that exit at end and whose number of iterations is reliable,
951 add one to niter to account for first pass through loop body before
952 reaching exit test. */
953 if (exit_at_end && !desc->noloop_assumptions)
954 {
955 niter = expand_simple_binop (desc->mode, PLUS,
956 niter, const1_rtx,
957 NULL_RTX, 0, OPTAB_LIB_WIDEN);
958 old_niter = niter;
959 }
960
961 /* Count modulo by ANDing it with max_unroll; we use the fact that
962 the number of unrollings is a power of two, and thus this is correct
963 even if there is overflow in the computation. */
964 niter = expand_simple_binop (desc->mode, AND,
965 niter, gen_int_mode (max_unroll, desc->mode),
966 NULL_RTX, 0, OPTAB_LIB_WIDEN);
967
968 init_code = get_insns ();
969 end_sequence ();
970 unshare_all_rtl_in_chain (init_code);
971
972 /* Precondition the loop. */
973 split_edge_and_insert (loop_preheader_edge (loop), init_code);
974
975 auto_vec<edge> remove_edges;
976
977 auto_sbitmap wont_exit (max_unroll + 2);
978
979 if (extra_zero_check || desc->noloop_assumptions)
980 {
981 /* Peel the first copy of loop body. Leave the exit test if the number
982 of iterations is not reliable. Also record the place of the extra zero
983 check. */
984 bitmap_clear (wont_exit);
985 if (!desc->noloop_assumptions)
986 bitmap_set_bit (wont_exit, 1);
987 ezc_swtch = loop_preheader_edge (loop)->src;
988 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
989 1, wont_exit, desc->out_edge,
990 &remove_edges,
991 DLTHE_FLAG_UPDATE_FREQ);
992 gcc_assert (ok);
993 }
994
995 /* Record the place where switch will be built for preconditioning. */
996 swtch = split_edge (loop_preheader_edge (loop));
997
998 /* Compute count increments for each switch block and initialize
999 innermost switch block. Switch blocks and peeled loop copies are built
1000 from innermost outward. */
1001 iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
1002 swtch->count = new_count;
1003
1004 for (i = 0; i < n_peel; i++)
1005 {
1006 /* Peel the copy. */
1007 bitmap_clear (wont_exit);
1008 if (i != n_peel - 1 || !last_may_exit)
1009 bitmap_set_bit (wont_exit, 1);
1010 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1011 1, wont_exit, desc->out_edge,
1012 &remove_edges,
1013 DLTHE_FLAG_UPDATE_FREQ);
1014 gcc_assert (ok);
1015
1016 /* Create item for switch. */
1017 j = n_peel - i - (extra_zero_check ? 0 : 1);
1018 p = profile_probability::always ().apply_scale (1, i + 2);
1019
1020 preheader = split_edge (loop_preheader_edge (loop));
1021 /* Add in count of edge from switch block. */
1022 preheader->count += iter_count;
1023 branch_code = compare_and_jump_seq (copy_rtx (niter),
1024 gen_int_mode (j, desc->mode), EQ,
1025 block_label (preheader), p, NULL);
1026
1027 /* We rely on the fact that the compare and jump cannot be optimized out,
1028 and hence the cfg we create is correct. */
1029 gcc_assert (branch_code != NULL_RTX);
1030
1031 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1032 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1033 single_succ_edge (swtch)->probability = p.invert ();
1034 new_count += iter_count;
1035 swtch->count = new_count;
1036 e = make_edge (swtch, preheader,
1037 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1038 e->probability = p;
1039 }
1040
1041 if (extra_zero_check)
1042 {
1043 /* Add branch for zero iterations. */
1044 p = profile_probability::always ().apply_scale (1, max_unroll + 1);
1045 swtch = ezc_swtch;
1046 preheader = split_edge (loop_preheader_edge (loop));
1047 /* Recompute count adjustments since initial peel copy may
1048 have exited and reduced those values that were computed above. */
1049 iter_count = swtch->count.apply_scale (1, max_unroll + 1);
1050 /* Add in count of edge from switch block. */
1051 preheader->count += iter_count;
1052 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1053 block_label (preheader), p,
1054 NULL);
1055 gcc_assert (branch_code != NULL_RTX);
1056
1057 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1058 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1059 single_succ_edge (swtch)->probability = p.invert ();
1060 e = make_edge (swtch, preheader,
1061 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1062 e->probability = p;
1063 }
1064
1065 /* Recount dominators for outer blocks. */
1066 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1067
1068 /* And unroll loop. */
1069
1070 bitmap_ones (wont_exit);
1071 bitmap_clear_bit (wont_exit, may_exit_copy);
1072 opt_info_start_duplication (opt_info);
1073
1074 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1075 max_unroll,
1076 wont_exit, desc->out_edge,
1077 &remove_edges,
1078 DLTHE_FLAG_UPDATE_FREQ
1079 | (opt_info
1080 ? DLTHE_RECORD_COPY_NUMBER
1081 : 0));
1082 gcc_assert (ok);
1083
1084 if (opt_info)
1085 {
1086 apply_opt_in_copies (opt_info, max_unroll, true, true);
1087 free_opt_info (opt_info);
1088 }
1089
1090 if (exit_at_end)
1091 {
1092 basic_block exit_block = get_bb_copy (desc->in_edge->src);
1093 /* Find a new in and out edge; they are in the last copy we have
1094 made. */
1095
1096 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1097 {
1098 desc->out_edge = EDGE_SUCC (exit_block, 0);
1099 desc->in_edge = EDGE_SUCC (exit_block, 1);
1100 }
1101 else
1102 {
1103 desc->out_edge = EDGE_SUCC (exit_block, 1);
1104 desc->in_edge = EDGE_SUCC (exit_block, 0);
1105 }
1106 }
1107
1108 /* Remove the edges. */
1109 FOR_EACH_VEC_ELT (remove_edges, i, e)
1110 remove_path (e);
1111
1112 /* We must be careful when updating the number of iterations due to
1113 preconditioning and the fact that the value must be valid at entry
1114 of the loop. After passing through the above code, we see that
1115 the correct new number of iterations is this: */
1116 gcc_assert (!desc->const_iter);
1117 desc->niter_expr =
1118 simplify_gen_binary (UDIV, desc->mode, old_niter,
1119 gen_int_mode (max_unroll + 1, desc->mode));
1120 loop->nb_iterations_upper_bound
1121 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1122 if (loop->any_estimate)
1123 loop->nb_iterations_estimate
1124 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1125 if (loop->any_likely_upper_bound)
1126 loop->nb_iterations_likely_upper_bound
1127 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
1128 if (exit_at_end)
1129 {
1130 desc->niter_expr =
1131 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1132 desc->noloop_assumptions = NULL_RTX;
1133 --loop->nb_iterations_upper_bound;
1134 if (loop->any_estimate
1135 && loop->nb_iterations_estimate != 0)
1136 --loop->nb_iterations_estimate;
1137 else
1138 loop->any_estimate = false;
1139 if (loop->any_likely_upper_bound
1140 && loop->nb_iterations_likely_upper_bound != 0)
1141 --loop->nb_iterations_likely_upper_bound;
1142 else
1143 loop->any_likely_upper_bound = false;
1144 }
1145
1146 if (dump_file)
1147 fprintf (dump_file,
1148 ";; Unrolled loop %d times, counting # of iterations "
1149 "in runtime, %i insns\n",
1150 max_unroll, num_loop_insns (loop));
1151 }
1152
1153 /* Decide whether to unroll LOOP stupidly and how much. */
1154 static void
1155 decide_unroll_stupid (class loop *loop, int flags)
1156 {
1157 unsigned nunroll, nunroll_by_av, i;
1158 class niter_desc *desc;
1159 widest_int iterations;
1160
1161 /* If we were not asked to unroll this loop, just return back silently. */
1162 if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
1163 return;
1164
1165 if (dump_enabled_p ())
1166 dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
1167
1168 /* nunroll = total number of copies of the original loop body in
1169 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1170 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1171 nunroll_by_av
1172 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1173 if (nunroll > nunroll_by_av)
1174 nunroll = nunroll_by_av;
1175 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1176 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1177
1178 if (targetm.loop_unroll_adjust)
1179 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1180
1181 if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
1182 nunroll = loop->unroll;
1183
1184 /* Skip big loops. */
1185 if (nunroll <= 1)
1186 {
1187 if (dump_file)
1188 fprintf (dump_file, ";; Not considering loop, is too big\n");
1189 return;
1190 }
1191
1192 /* Check for simple loops. */
1193 desc = get_simple_loop_desc (loop);
1194
1195 /* Check simpleness. */
1196 if (desc->simple_p && !desc->assumptions)
1197 {
1198 if (dump_file)
1199 fprintf (dump_file, ";; Loop is simple\n");
1200 return;
1201 }
1202
1203 /* Do not unroll loops with branches inside -- it increases number
1204 of mispredicts.
1205 TODO: this heuristic needs tunning; call inside the loop body
1206 is also relatively good reason to not unroll. */
1207 if (num_loop_branches (loop) > 1)
1208 {
1209 if (dump_file)
1210 fprintf (dump_file, ";; Not unrolling, contains branches\n");
1211 return;
1212 }
1213
1214 /* Check whether the loop rolls. */
1215 if ((get_estimated_loop_iterations (loop, &iterations)
1216 || get_likely_max_loop_iterations (loop, &iterations))
1217 && wi::ltu_p (iterations, 2 * nunroll))
1218 {
1219 if (dump_file)
1220 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1221 return;
1222 }
1223
1224 /* Success. Now force nunroll to be power of 2, as it seems that this
1225 improves results (partially because of better alignments, partially
1226 because of some dark magic). */
1227 for (i = 1; 2 * i <= nunroll; i *= 2)
1228 continue;
1229
1230 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1231 loop->lpt_decision.times = i - 1;
1232 }
1233
1234 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
1235
1236 while (cond)
1237 body;
1238
1239 ==> (LOOP->LPT_DECISION.TIMES == 3)
1240
1241 while (cond)
1242 {
1243 body;
1244 if (!cond) break;
1245 body;
1246 if (!cond) break;
1247 body;
1248 if (!cond) break;
1249 body;
1250 }
1251 */
1252 static void
1253 unroll_loop_stupid (class loop *loop)
1254 {
1255 unsigned nunroll = loop->lpt_decision.times;
1256 class niter_desc *desc = get_simple_loop_desc (loop);
1257 struct opt_info *opt_info = NULL;
1258 bool ok;
1259
1260 if (flag_split_ivs_in_unroller
1261 || flag_variable_expansion_in_unroller)
1262 opt_info = analyze_insns_in_loop (loop);
1263
1264 auto_sbitmap wont_exit (nunroll + 1);
1265 bitmap_clear (wont_exit);
1266 opt_info_start_duplication (opt_info);
1267
1268 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1269 nunroll, wont_exit,
1270 NULL, NULL,
1271 DLTHE_FLAG_UPDATE_FREQ
1272 | (opt_info
1273 ? DLTHE_RECORD_COPY_NUMBER
1274 : 0));
1275 gcc_assert (ok);
1276
1277 if (opt_info)
1278 {
1279 apply_opt_in_copies (opt_info, nunroll, true, true);
1280 free_opt_info (opt_info);
1281 }
1282
1283 if (desc->simple_p)
1284 {
1285 /* We indeed may get here provided that there are nontrivial assumptions
1286 for a loop to be really simple. We could update the counts, but the
1287 problem is that we are unable to decide which exit will be taken
1288 (not really true in case the number of iterations is constant,
1289 but no one will do anything with this information, so we do not
1290 worry about it). */
1291 desc->simple_p = false;
1292 }
1293
1294 if (dump_file)
1295 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1296 nunroll, num_loop_insns (loop));
1297 }
1298
1299 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1300 Set *DEBUG_USES to the number of debug insns that reference the
1301 variable. */
1302
1303 static bool
1304 referenced_in_one_insn_in_loop_p (class loop *loop, rtx reg,
1305 int *debug_uses)
1306 {
1307 basic_block *body, bb;
1308 unsigned i;
1309 int count_ref = 0;
1310 rtx_insn *insn;
1311
1312 body = get_loop_body (loop);
1313 for (i = 0; i < loop->num_nodes; i++)
1314 {
1315 bb = body[i];
1316
1317 FOR_BB_INSNS (bb, insn)
1318 if (!rtx_referenced_p (reg, insn))
1319 continue;
1320 else if (DEBUG_INSN_P (insn))
1321 ++*debug_uses;
1322 else if (++count_ref > 1)
1323 break;
1324 }
1325 free (body);
1326 return (count_ref == 1);
1327 }
1328
1329 /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
1330
1331 static void
1332 reset_debug_uses_in_loop (class loop *loop, rtx reg, int debug_uses)
1333 {
1334 basic_block *body, bb;
1335 unsigned i;
1336 rtx_insn *insn;
1337
1338 body = get_loop_body (loop);
1339 for (i = 0; debug_uses && i < loop->num_nodes; i++)
1340 {
1341 bb = body[i];
1342
1343 FOR_BB_INSNS (bb, insn)
1344 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1345 continue;
1346 else
1347 {
1348 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1349 gen_rtx_UNKNOWN_VAR_LOC (), 0);
1350 if (!--debug_uses)
1351 break;
1352 }
1353 }
1354 free (body);
1355 }
1356
1357 /* Determine whether INSN contains an accumulator
1358 which can be expanded into separate copies,
1359 one for each copy of the LOOP body.
1360
1361 for (i = 0 ; i < n; i++)
1362 sum += a[i];
1363
1364 ==>
1365
1366 sum += a[i]
1367 ....
1368 i = i+1;
1369 sum1 += a[i]
1370 ....
1371 i = i+1
1372 sum2 += a[i];
1373 ....
1374
1375 Return NULL if INSN contains no opportunity for expansion of accumulator.
1376 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1377 information and return a pointer to it.
1378 */
1379
1380 static struct var_to_expand *
1381 analyze_insn_to_expand_var (class loop *loop, rtx_insn *insn)
1382 {
1383 rtx set, dest, src;
1384 struct var_to_expand *ves;
1385 unsigned accum_pos;
1386 enum rtx_code code;
1387 int debug_uses = 0;
1388
1389 set = single_set (insn);
1390 if (!set)
1391 return NULL;
1392
1393 dest = SET_DEST (set);
1394 src = SET_SRC (set);
1395 code = GET_CODE (src);
1396
1397 if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1398 return NULL;
1399
1400 if (FLOAT_MODE_P (GET_MODE (dest)))
1401 {
1402 if (!flag_associative_math)
1403 return NULL;
1404 /* In the case of FMA, we're also changing the rounding. */
1405 if (code == FMA && !flag_unsafe_math_optimizations)
1406 return NULL;
1407 }
1408
1409 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1410 in MD. But if there is no optab to generate the insn, we cannot
1411 perform the variable expansion. This can happen if an MD provides
1412 an insn but not a named pattern to generate it, for example to avoid
1413 producing code that needs additional mode switches like for x87/mmx.
1414
1415 So we check have_insn_for which looks for an optab for the operation
1416 in SRC. If it doesn't exist, we can't perform the expansion even
1417 though INSN is valid. */
1418 if (!have_insn_for (code, GET_MODE (src)))
1419 return NULL;
1420
1421 if (!REG_P (dest)
1422 && !(GET_CODE (dest) == SUBREG
1423 && REG_P (SUBREG_REG (dest))))
1424 return NULL;
1425
1426 /* Find the accumulator use within the operation. */
1427 if (code == FMA)
1428 {
1429 /* We only support accumulation via FMA in the ADD position. */
1430 if (!rtx_equal_p (dest, XEXP (src, 2)))
1431 return NULL;
1432 accum_pos = 2;
1433 }
1434 else if (rtx_equal_p (dest, XEXP (src, 0)))
1435 accum_pos = 0;
1436 else if (rtx_equal_p (dest, XEXP (src, 1)))
1437 {
1438 /* The method of expansion that we are using; which includes the
1439 initialization of the expansions with zero and the summation of
1440 the expansions at the end of the computation will yield wrong
1441 results for (x = something - x) thus avoid using it in that case. */
1442 if (code == MINUS)
1443 return NULL;
1444 accum_pos = 1;
1445 }
1446 else
1447 return NULL;
1448
1449 /* It must not otherwise be used. */
1450 if (code == FMA)
1451 {
1452 if (rtx_referenced_p (dest, XEXP (src, 0))
1453 || rtx_referenced_p (dest, XEXP (src, 1)))
1454 return NULL;
1455 }
1456 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1457 return NULL;
1458
1459 /* It must be used in exactly one insn. */
1460 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1461 return NULL;
1462
1463 if (dump_file)
1464 {
1465 fprintf (dump_file, "\n;; Expanding Accumulator ");
1466 print_rtl (dump_file, dest);
1467 fprintf (dump_file, "\n");
1468 }
1469
1470 if (debug_uses)
1471 /* Instead of resetting the debug insns, we could replace each
1472 debug use in the loop with the sum or product of all expanded
1473 accumulators. Since we'll only know of all expansions at the
1474 end, we'd have to keep track of which vars_to_expand a debug
1475 insn in the loop references, take note of each copy of the
1476 debug insn during unrolling, and when it's all done, compute
1477 the sum or product of each variable and adjust the original
1478 debug insn and each copy thereof. What a pain! */
1479 reset_debug_uses_in_loop (loop, dest, debug_uses);
1480
1481 /* Record the accumulator to expand. */
1482 ves = XNEW (struct var_to_expand);
1483 ves->insn = insn;
1484 ves->reg = copy_rtx (dest);
1485 ves->var_expansions.create (1);
1486 ves->next = NULL;
1487 ves->op = GET_CODE (src);
1488 ves->expansion_count = 0;
1489 ves->reuse_expansion = 0;
1490 return ves;
1491 }
1492
1493 /* Determine whether there is an induction variable in INSN that
1494 we would like to split during unrolling.
1495
1496 I.e. replace
1497
1498 i = i + 1;
1499 ...
1500 i = i + 1;
1501 ...
1502 i = i + 1;
1503 ...
1504
1505 type chains by
1506
1507 i0 = i + 1
1508 ...
1509 i = i0 + 1
1510 ...
1511 i = i0 + 2
1512 ...
1513
1514 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
1515 an IV_TO_SPLIT structure, fill it with the relevant information and return a
1516 pointer to it. */
1517
1518 static struct iv_to_split *
1519 analyze_iv_to_split_insn (rtx_insn *insn)
1520 {
1521 rtx set, dest;
1522 class rtx_iv iv;
1523 struct iv_to_split *ivts;
1524 scalar_int_mode mode;
1525 bool ok;
1526
1527 /* For now we just split the basic induction variables. Later this may be
1528 extended for example by selecting also addresses of memory references. */
1529 set = single_set (insn);
1530 if (!set)
1531 return NULL;
1532
1533 dest = SET_DEST (set);
1534 if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
1535 return NULL;
1536
1537 if (!biv_p (insn, mode, dest))
1538 return NULL;
1539
1540 ok = iv_analyze_result (insn, dest, &iv);
1541
1542 /* This used to be an assert under the assumption that if biv_p returns
1543 true that iv_analyze_result must also return true. However, that
1544 assumption is not strictly correct as evidenced by pr25569.
1545
1546 Returning NULL when iv_analyze_result returns false is safe and
1547 avoids the problems in pr25569 until the iv_analyze_* routines
1548 can be fixed, which is apparently hard and time consuming
1549 according to their author. */
1550 if (! ok)
1551 return NULL;
1552
1553 if (iv.step == const0_rtx
1554 || iv.mode != iv.extend_mode)
1555 return NULL;
1556
1557 /* Record the insn to split. */
1558 ivts = XNEW (struct iv_to_split);
1559 ivts->insn = insn;
1560 ivts->orig_var = dest;
1561 ivts->base_var = NULL_RTX;
1562 ivts->step = iv.step;
1563 ivts->next = NULL;
1564
1565 return ivts;
1566 }
1567
1568 /* Determines which of insns in LOOP can be optimized.
1569 Return a OPT_INFO struct with the relevant hash tables filled
1570 with all insns to be optimized. The FIRST_NEW_BLOCK field
1571 is undefined for the return value. */
1572
1573 static struct opt_info *
1574 analyze_insns_in_loop (class loop *loop)
1575 {
1576 basic_block *body, bb;
1577 unsigned i;
1578 struct opt_info *opt_info = XCNEW (struct opt_info);
1579 rtx_insn *insn;
1580 struct iv_to_split *ivts = NULL;
1581 struct var_to_expand *ves = NULL;
1582 iv_to_split **slot1;
1583 var_to_expand **slot2;
1584 vec<edge> edges = get_loop_exit_edges (loop);
1585 edge exit;
1586 bool can_apply = false;
1587
1588 iv_analysis_loop_init (loop);
1589
1590 body = get_loop_body (loop);
1591
1592 if (flag_split_ivs_in_unroller)
1593 {
1594 opt_info->insns_to_split
1595 = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1596 opt_info->iv_to_split_head = NULL;
1597 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1598 }
1599
1600 /* Record the loop exit bb and loop preheader before the unrolling. */
1601 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1602
1603 if (edges.length () == 1)
1604 {
1605 exit = edges[0];
1606 if (!(exit->flags & EDGE_COMPLEX))
1607 {
1608 opt_info->loop_exit = split_edge (exit);
1609 can_apply = true;
1610 }
1611 }
1612
1613 if (flag_variable_expansion_in_unroller
1614 && can_apply)
1615 {
1616 opt_info->insns_with_var_to_expand
1617 = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1618 opt_info->var_to_expand_head = NULL;
1619 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1620 }
1621
1622 for (i = 0; i < loop->num_nodes; i++)
1623 {
1624 bb = body[i];
1625 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1626 continue;
1627
1628 FOR_BB_INSNS (bb, insn)
1629 {
1630 if (!INSN_P (insn))
1631 continue;
1632
1633 if (opt_info->insns_to_split)
1634 ivts = analyze_iv_to_split_insn (insn);
1635
1636 if (ivts)
1637 {
1638 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1639 gcc_assert (*slot1 == NULL);
1640 *slot1 = ivts;
1641 *opt_info->iv_to_split_tail = ivts;
1642 opt_info->iv_to_split_tail = &ivts->next;
1643 continue;
1644 }
1645
1646 if (opt_info->insns_with_var_to_expand)
1647 ves = analyze_insn_to_expand_var (loop, insn);
1648
1649 if (ves)
1650 {
1651 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1652 gcc_assert (*slot2 == NULL);
1653 *slot2 = ves;
1654 *opt_info->var_to_expand_tail = ves;
1655 opt_info->var_to_expand_tail = &ves->next;
1656 }
1657 }
1658 }
1659
1660 edges.release ();
1661 free (body);
1662 return opt_info;
1663 }
1664
1665 /* Called just before loop duplication. Records start of duplicated area
1666 to OPT_INFO. */
1667
1668 static void
1669 opt_info_start_duplication (struct opt_info *opt_info)
1670 {
1671 if (opt_info)
1672 opt_info->first_new_block = last_basic_block_for_fn (cfun);
1673 }
1674
1675 /* Determine the number of iterations between initialization of the base
1676 variable and the current copy (N_COPY). N_COPIES is the total number
1677 of newly created copies. UNROLLING is true if we are unrolling
1678 (not peeling) the loop. */
1679
1680 static unsigned
1681 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1682 {
1683 if (unrolling)
1684 {
1685 /* If we are unrolling, initialization is done in the original loop
1686 body (number 0). */
1687 return n_copy;
1688 }
1689 else
1690 {
1691 /* If we are peeling, the copy in that the initialization occurs has
1692 number 1. The original loop (number 0) is the last. */
1693 if (n_copy)
1694 return n_copy - 1;
1695 else
1696 return n_copies;
1697 }
1698 }
1699
1700 /* Allocate basic variable for the induction variable chain. */
1701
1702 static void
1703 allocate_basic_variable (struct iv_to_split *ivts)
1704 {
1705 rtx expr = SET_SRC (single_set (ivts->insn));
1706
1707 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1708 }
1709
1710 /* Insert initialization of basic variable of IVTS before INSN, taking
1711 the initial value from INSN. */
1712
1713 static void
1714 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1715 {
1716 rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1717 rtx_insn *seq;
1718
1719 start_sequence ();
1720 expr = force_operand (expr, ivts->base_var);
1721 if (expr != ivts->base_var)
1722 emit_move_insn (ivts->base_var, expr);
1723 seq = get_insns ();
1724 end_sequence ();
1725
1726 emit_insn_before (seq, insn);
1727 }
1728
1729 /* Replace the use of induction variable described in IVTS in INSN
1730 by base variable + DELTA * step. */
1731
1732 static void
1733 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1734 {
1735 rtx expr, *loc, incr, var;
1736 rtx_insn *seq;
1737 machine_mode mode = GET_MODE (ivts->base_var);
1738 rtx src, dest, set;
1739
1740 /* Construct base + DELTA * step. */
1741 if (!delta)
1742 expr = ivts->base_var;
1743 else
1744 {
1745 incr = simplify_gen_binary (MULT, mode,
1746 copy_rtx (ivts->step),
1747 gen_int_mode (delta, mode));
1748 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1749 ivts->base_var, incr);
1750 }
1751
1752 /* Figure out where to do the replacement. */
1753 loc = &SET_SRC (single_set (insn));
1754
1755 /* If we can make the replacement right away, we're done. */
1756 if (validate_change (insn, loc, expr, 0))
1757 return;
1758
1759 /* Otherwise, force EXPR into a register and try again. */
1760 start_sequence ();
1761 var = gen_reg_rtx (mode);
1762 expr = force_operand (expr, var);
1763 if (expr != var)
1764 emit_move_insn (var, expr);
1765 seq = get_insns ();
1766 end_sequence ();
1767 emit_insn_before (seq, insn);
1768
1769 if (validate_change (insn, loc, var, 0))
1770 return;
1771
1772 /* The last chance. Try recreating the assignment in insn
1773 completely from scratch. */
1774 set = single_set (insn);
1775 gcc_assert (set);
1776
1777 start_sequence ();
1778 *loc = var;
1779 src = copy_rtx (SET_SRC (set));
1780 dest = copy_rtx (SET_DEST (set));
1781 src = force_operand (src, dest);
1782 if (src != dest)
1783 emit_move_insn (dest, src);
1784 seq = get_insns ();
1785 end_sequence ();
1786
1787 emit_insn_before (seq, insn);
1788 delete_insn (insn);
1789 }
1790
1791
1792 /* Return one expansion of the accumulator recorded in struct VE. */
1793
1794 static rtx
1795 get_expansion (struct var_to_expand *ve)
1796 {
1797 rtx reg;
1798
1799 if (ve->reuse_expansion == 0)
1800 reg = ve->reg;
1801 else
1802 reg = ve->var_expansions[ve->reuse_expansion - 1];
1803
1804 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1805 ve->reuse_expansion = 0;
1806 else
1807 ve->reuse_expansion++;
1808
1809 return reg;
1810 }
1811
1812
1813 /* Given INSN replace the uses of the accumulator recorded in VE
1814 with a new register. */
1815
1816 static void
1817 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1818 {
1819 rtx new_reg, set;
1820 bool really_new_expansion = false;
1821
1822 set = single_set (insn);
1823 gcc_assert (set);
1824
1825 /* Generate a new register only if the expansion limit has not been
1826 reached. Else reuse an already existing expansion. */
1827 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1828 {
1829 really_new_expansion = true;
1830 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1831 }
1832 else
1833 new_reg = get_expansion (ve);
1834
1835 validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1836 if (apply_change_group ())
1837 if (really_new_expansion)
1838 {
1839 ve->var_expansions.safe_push (new_reg);
1840 ve->expansion_count++;
1841 }
1842 }
1843
1844 /* Initialize the variable expansions in loop preheader. PLACE is the
1845 loop-preheader basic block where the initialization of the
1846 expansions should take place. The expansions are initialized with
1847 (-0) when the operation is plus or minus to honor sign zero. This
1848 way we can prevent cases where the sign of the final result is
1849 effected by the sign of the expansion. Here is an example to
1850 demonstrate this:
1851
1852 for (i = 0 ; i < n; i++)
1853 sum += something;
1854
1855 ==>
1856
1857 sum += something
1858 ....
1859 i = i+1;
1860 sum1 += something
1861 ....
1862 i = i+1
1863 sum2 += something;
1864 ....
1865
1866 When SUM is initialized with -zero and SOMETHING is also -zero; the
1867 final result of sum should be -zero thus the expansions sum1 and sum2
1868 should be initialized with -zero as well (otherwise we will get +zero
1869 as the final result). */
1870
1871 static void
1872 insert_var_expansion_initialization (struct var_to_expand *ve,
1873 basic_block place)
1874 {
1875 rtx_insn *seq;
1876 rtx var, zero_init;
1877 unsigned i;
1878 machine_mode mode = GET_MODE (ve->reg);
1879 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1880
1881 if (ve->var_expansions.length () == 0)
1882 return;
1883
1884 start_sequence ();
1885 switch (ve->op)
1886 {
1887 case FMA:
1888 /* Note that we only accumulate FMA via the ADD operand. */
1889 case PLUS:
1890 case MINUS:
1891 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1892 {
1893 if (honor_signed_zero_p)
1894 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1895 else
1896 zero_init = CONST0_RTX (mode);
1897 emit_move_insn (var, zero_init);
1898 }
1899 break;
1900
1901 case MULT:
1902 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1903 {
1904 zero_init = CONST1_RTX (GET_MODE (var));
1905 emit_move_insn (var, zero_init);
1906 }
1907 break;
1908
1909 default:
1910 gcc_unreachable ();
1911 }
1912
1913 seq = get_insns ();
1914 end_sequence ();
1915
1916 emit_insn_after (seq, BB_END (place));
1917 }
1918
1919 /* Combine the variable expansions at the loop exit. PLACE is the
1920 loop exit basic block where the summation of the expansions should
1921 take place. */
1922
1923 static void
1924 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1925 {
1926 rtx sum = ve->reg;
1927 rtx expr, var;
1928 rtx_insn *seq, *insn;
1929 unsigned i;
1930
1931 if (ve->var_expansions.length () == 0)
1932 return;
1933
1934 /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1935 it both here and as the destination of the assignment. */
1936 sum = copy_rtx (sum);
1937 start_sequence ();
1938 switch (ve->op)
1939 {
1940 case FMA:
1941 /* Note that we only accumulate FMA via the ADD operand. */
1942 case PLUS:
1943 case MINUS:
1944 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1945 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1946 break;
1947
1948 case MULT:
1949 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1950 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1951 break;
1952
1953 default:
1954 gcc_unreachable ();
1955 }
1956
1957 expr = force_operand (sum, ve->reg);
1958 if (expr != ve->reg)
1959 emit_move_insn (ve->reg, expr);
1960 seq = get_insns ();
1961 end_sequence ();
1962
1963 insn = BB_HEAD (place);
1964 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1965 insn = NEXT_INSN (insn);
1966
1967 emit_insn_after (seq, insn);
1968 }
1969
1970 /* Strip away REG_EQUAL notes for IVs we're splitting.
1971
1972 Updating REG_EQUAL notes for IVs we split is tricky: We
1973 cannot tell until after unrolling, DF-rescanning, and liveness
1974 updating, whether an EQ_USE is reached by the split IV while
1975 the IV reg is still live. See PR55006.
1976
1977 ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1978 because RTL loop-iv requires us to defer rescanning insns and
1979 any notes attached to them. So resort to old techniques... */
1980
1981 static void
1982 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1983 {
1984 struct iv_to_split *ivts;
1985 rtx note = find_reg_equal_equiv_note (insn);
1986 if (! note)
1987 return;
1988 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1989 if (reg_mentioned_p (ivts->orig_var, note))
1990 {
1991 remove_note (insn, note);
1992 return;
1993 }
1994 }
1995
1996 /* Apply loop optimizations in loop copies using the
1997 data which gathered during the unrolling. Structure
1998 OPT_INFO record that data.
1999
2000 UNROLLING is true if we unrolled (not peeled) the loop.
2001 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2002 the loop (as it should happen in complete unrolling, but not in ordinary
2003 peeling of the loop). */
2004
2005 static void
2006 apply_opt_in_copies (struct opt_info *opt_info,
2007 unsigned n_copies, bool unrolling,
2008 bool rewrite_original_loop)
2009 {
2010 unsigned i, delta;
2011 basic_block bb, orig_bb;
2012 rtx_insn *insn, *orig_insn, *next;
2013 struct iv_to_split ivts_templ, *ivts;
2014 struct var_to_expand ve_templ, *ves;
2015
2016 /* Sanity check -- we need to put initialization in the original loop
2017 body. */
2018 gcc_assert (!unrolling || rewrite_original_loop);
2019
2020 /* Allocate the basic variables (i0). */
2021 if (opt_info->insns_to_split)
2022 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2023 allocate_basic_variable (ivts);
2024
2025 for (i = opt_info->first_new_block;
2026 i < (unsigned) last_basic_block_for_fn (cfun);
2027 i++)
2028 {
2029 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2030 orig_bb = get_bb_original (bb);
2031
2032 /* bb->aux holds position in copy sequence initialized by
2033 duplicate_loop_to_header_edge. */
2034 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2035 unrolling);
2036 bb->aux = 0;
2037 orig_insn = BB_HEAD (orig_bb);
2038 FOR_BB_INSNS_SAFE (bb, insn, next)
2039 {
2040 if (!INSN_P (insn)
2041 || (DEBUG_BIND_INSN_P (insn)
2042 && INSN_VAR_LOCATION_DECL (insn)
2043 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2044 continue;
2045
2046 while (!INSN_P (orig_insn)
2047 || (DEBUG_BIND_INSN_P (orig_insn)
2048 && INSN_VAR_LOCATION_DECL (orig_insn)
2049 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2050 == LABEL_DECL)))
2051 orig_insn = NEXT_INSN (orig_insn);
2052
2053 ivts_templ.insn = orig_insn;
2054 ve_templ.insn = orig_insn;
2055
2056 /* Apply splitting iv optimization. */
2057 if (opt_info->insns_to_split)
2058 {
2059 maybe_strip_eq_note_for_split_iv (opt_info, insn);
2060
2061 ivts = opt_info->insns_to_split->find (&ivts_templ);
2062
2063 if (ivts)
2064 {
2065 gcc_assert (GET_CODE (PATTERN (insn))
2066 == GET_CODE (PATTERN (orig_insn)));
2067
2068 if (!delta)
2069 insert_base_initialization (ivts, insn);
2070 split_iv (ivts, insn, delta);
2071 }
2072 }
2073 /* Apply variable expansion optimization. */
2074 if (unrolling && opt_info->insns_with_var_to_expand)
2075 {
2076 ves = (struct var_to_expand *)
2077 opt_info->insns_with_var_to_expand->find (&ve_templ);
2078 if (ves)
2079 {
2080 gcc_assert (GET_CODE (PATTERN (insn))
2081 == GET_CODE (PATTERN (orig_insn)));
2082 expand_var_during_unrolling (ves, insn);
2083 }
2084 }
2085 orig_insn = NEXT_INSN (orig_insn);
2086 }
2087 }
2088
2089 if (!rewrite_original_loop)
2090 return;
2091
2092 /* Initialize the variable expansions in the loop preheader
2093 and take care of combining them at the loop exit. */
2094 if (opt_info->insns_with_var_to_expand)
2095 {
2096 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2097 insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2098 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2099 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2100 }
2101
2102 /* Rewrite also the original loop body. Find them as originals of the blocks
2103 in the last copied iteration, i.e. those that have
2104 get_bb_copy (get_bb_original (bb)) == bb. */
2105 for (i = opt_info->first_new_block;
2106 i < (unsigned) last_basic_block_for_fn (cfun);
2107 i++)
2108 {
2109 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2110 orig_bb = get_bb_original (bb);
2111 if (get_bb_copy (orig_bb) != bb)
2112 continue;
2113
2114 delta = determine_split_iv_delta (0, n_copies, unrolling);
2115 for (orig_insn = BB_HEAD (orig_bb);
2116 orig_insn != NEXT_INSN (BB_END (bb));
2117 orig_insn = next)
2118 {
2119 next = NEXT_INSN (orig_insn);
2120
2121 if (!INSN_P (orig_insn))
2122 continue;
2123
2124 ivts_templ.insn = orig_insn;
2125 if (opt_info->insns_to_split)
2126 {
2127 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2128
2129 ivts = (struct iv_to_split *)
2130 opt_info->insns_to_split->find (&ivts_templ);
2131 if (ivts)
2132 {
2133 if (!delta)
2134 insert_base_initialization (ivts, orig_insn);
2135 split_iv (ivts, orig_insn, delta);
2136 continue;
2137 }
2138 }
2139
2140 }
2141 }
2142 }
2143
2144 /* Release OPT_INFO. */
2145
2146 static void
2147 free_opt_info (struct opt_info *opt_info)
2148 {
2149 delete opt_info->insns_to_split;
2150 opt_info->insns_to_split = NULL;
2151 if (opt_info->insns_with_var_to_expand)
2152 {
2153 struct var_to_expand *ves;
2154
2155 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2156 ves->var_expansions.release ();
2157 delete opt_info->insns_with_var_to_expand;
2158 opt_info->insns_with_var_to_expand = NULL;
2159 }
2160 free (opt_info);
2161 }