]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/loop-unroll.c
remove need for store_values_directly
[thirdparty/gcc.git] / gcc / loop-unroll.c
CommitLineData
c836de3f 1/* Loop unrolling.
d353bf18 2 Copyright (C) 2002-2015 Free Software Foundation, Inc.
ad5201d0 3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8c4c00c1 8Software Foundation; either version 3, or (at your option) any later
ad5201d0 9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
ad5201d0 19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
24#include "rtl.h"
b20a8bb4 25#include "hash-set.h"
26#include "machmode.h"
27#include "vec.h"
28#include "double-int.h"
29#include "input.h"
30#include "alias.h"
31#include "symtab.h"
32#include "wide-int.h"
33#include "inchash.h"
41a8aa41 34#include "tree.h"
ad5201d0 35#include "hard-reg-set.h"
42fe97ed 36#include "obstack.h"
886c1262 37#include "profile.h"
94ea8568 38#include "predict.h"
94ea8568 39#include "function.h"
40#include "dominance.h"
41#include "cfg.h"
42#include "cfgrtl.h"
ad5201d0 43#include "basic-block.h"
44#include "cfgloop.h"
ad5201d0 45#include "params.h"
34517c64 46#include "insn-codes.h"
47#include "optabs.h"
d53441c8 48#include "hashtab.h"
49#include "flags.h"
50#include "statistics.h"
51#include "real.h"
52#include "fixed-value.h"
53#include "insn-config.h"
54#include "expmed.h"
55#include "dojump.h"
56#include "explow.h"
57#include "calls.h"
58#include "emit-rtl.h"
59#include "varasm.h"
60#include "stmt.h"
ad5201d0 61#include "expr.h"
d9dd21a8 62#include "hash-table.h"
48e1416a 63#include "recog.h"
9ccaa774 64#include "target.h"
b9ed1410 65#include "dumpfile.h"
ad5201d0 66
c836de3f 67/* This pass performs loop unrolling. We only perform this
68 optimization on innermost loops (with single exception) because
ad5201d0 69 the impact on performance is greatest here, and we want to avoid
70 unnecessary code size growth. The gain is caused by greater sequentiality
917bbcab 71 of code, better code to optimize for further passes and in some cases
ad5201d0 72 by fewer testings of exit conditions. The main problem is code growth,
73 that impacts performance negatively due to effect of caches.
74
75 What we do:
76
ad5201d0 77 -- unrolling of loops that roll constant times; this is almost always
78 win, as we get rid of exit condition tests.
79 -- unrolling of loops that roll number of times that we can compute
80 in runtime; we also get rid of exit condition tests here, but there
81 is the extra expense for calculating the number of iterations
82 -- simple unrolling of remaining loops; this is performed only if we
83 are asked to, as the gain is questionable in this case and often
84 it may even slow down the code
85 For more detailed descriptions of each of those, see comments at
86 appropriate function below.
87
88 There is a lot of parameters (defined and described in params.def) that
c836de3f 89 control how much we unroll.
ad5201d0 90
91 ??? A great problem is that we don't have a good way how to determine
92 how many times we should unroll the loop; the experiments I have made
93 showed that this choice may affect performance in order of several %.
94 */
95
a9989fb4 96/* Information about induction variables to split. */
97
98struct iv_to_split
99{
222dc1d3 100 rtx_insn *insn; /* The insn in that the induction variable occurs. */
ef770610 101 rtx orig_var; /* The variable (register) for the IV before split. */
a9989fb4 102 rtx base_var; /* The variable on that the values in the further
103 iterations are based. */
104 rtx step; /* Step of the induction variable. */
b635a1a3 105 struct iv_to_split *next; /* Next entry in walking order. */
a9989fb4 106};
107
375bb675 108/* Information about accumulators to expand. */
109
110struct var_to_expand
111{
222dc1d3 112 rtx_insn *insn; /* The insn in that the variable expansion occurs. */
dac49aa5 113 rtx reg; /* The accumulator which is expanded. */
f1f41a6c 114 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */
b635a1a3 115 struct var_to_expand *next; /* Next entry in walking order. */
48e1416a 116 enum rtx_code op; /* The type of the accumulation - addition, subtraction
375bb675 117 or multiplication. */
118 int expansion_count; /* Count the number of expansions generated so far. */
119 int reuse_expansion; /* The expansion we intend to reuse to expand
48e1416a 120 the accumulator. If REUSE_EXPANSION is 0 reuse
121 the original accumulator. Else use
375bb675 122 var_expansions[REUSE_EXPANSION - 1]. */
123};
124
d9dd21a8 125/* Hashtable helper for iv_to_split. */
126
127struct iv_split_hasher : typed_free_remove <iv_to_split>
128{
9969c043 129 typedef iv_to_split *value_type;
130 typedef iv_to_split *compare_type;
131 static inline hashval_t hash (const iv_to_split *);
132 static inline bool equal (const iv_to_split *, const iv_to_split *);
d9dd21a8 133};
134
135
136/* A hash function for information about insns to split. */
137
138inline hashval_t
9969c043 139iv_split_hasher::hash (const iv_to_split *ivts)
d9dd21a8 140{
141 return (hashval_t) INSN_UID (ivts->insn);
142}
143
144/* An equality functions for information about insns to split. */
145
146inline bool
9969c043 147iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
d9dd21a8 148{
149 return i1->insn == i2->insn;
150}
151
152/* Hashtable helper for iv_to_split. */
153
154struct var_expand_hasher : typed_free_remove <var_to_expand>
155{
9969c043 156 typedef var_to_expand *value_type;
157 typedef var_to_expand *compare_type;
158 static inline hashval_t hash (const var_to_expand *);
159 static inline bool equal (const var_to_expand *, const var_to_expand *);
d9dd21a8 160};
161
162/* Return a hash for VES. */
163
164inline hashval_t
9969c043 165var_expand_hasher::hash (const var_to_expand *ves)
d9dd21a8 166{
167 return (hashval_t) INSN_UID (ves->insn);
168}
169
170/* Return true if I1 and I2 refer to the same instruction. */
171
172inline bool
9969c043 173var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
d9dd21a8 174{
175 return i1->insn == i2->insn;
176}
177
375bb675 178/* Information about optimization applied in
179 the unrolled loop. */
180
181struct opt_info
a9989fb4 182{
c1f445d2 183 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
d9dd21a8 184 split. */
b635a1a3 185 struct iv_to_split *iv_to_split_head; /* The first iv to split. */
186 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
c1f445d2 187 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
d9dd21a8 188 insns with accumulators to expand. */
b635a1a3 189 struct var_to_expand *var_to_expand_head; /* The first var to expand. */
190 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
375bb675 191 unsigned first_new_block; /* The first basic block that was
192 duplicated. */
193 basic_block loop_exit; /* The loop exit basic block. */
194 basic_block loop_preheader; /* The loop preheader basic block. */
a9989fb4 195};
196
0051c76a 197static void decide_unroll_stupid (struct loop *, int);
198static void decide_unroll_constant_iterations (struct loop *, int);
199static void decide_unroll_runtime_iterations (struct loop *, int);
7194de72 200static void unroll_loop_stupid (struct loop *);
c836de3f 201static void decide_unrolling (int);
7194de72 202static void unroll_loop_constant_iterations (struct loop *);
203static void unroll_loop_runtime_iterations (struct loop *);
375bb675 204static struct opt_info *analyze_insns_in_loop (struct loop *);
205static void opt_info_start_duplication (struct opt_info *);
206static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
207static void free_opt_info (struct opt_info *);
222dc1d3 208static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
3b3940d7 209static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
3eeb4f9a 210static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
222dc1d3 211static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
b635a1a3 212static void insert_var_expansion_initialization (struct var_to_expand *,
213 basic_block);
214static void combine_var_copies_in_loop_exit (struct var_to_expand *,
215 basic_block);
375bb675 216static rtx get_expansion (struct var_to_expand *);
ad5201d0 217
c836de3f 218/* Emit a message summarizing the unroll that will be
f55775aa 219 performed for LOOP, along with the loop's location LOCUS, if
220 appropriate given the dump or -fopt-info settings. */
221
222static void
c836de3f 223report_unroll (struct loop *loop, location_t locus)
f55775aa 224{
f55775aa 225 int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
226
830b796a 227 if (loop->lpt_decision.decision == LPT_NONE)
228 return;
229
f55775aa 230 if (!dump_enabled_p ())
231 return;
232
c836de3f 233 dump_printf_loc (report_flags, locus,
234 "loop unrolled %d times",
235 loop->lpt_decision.times);
f55775aa 236 if (profile_info)
237 dump_printf (report_flags,
c836de3f 238 " (header execution count %d)",
f55775aa 239 (int)loop->header->count);
f55775aa 240
241 dump_printf (report_flags, "\n");
242}
243
c836de3f 244/* Decide whether unroll loops and how much. */
ad5201d0 245static void
c836de3f 246decide_unrolling (int flags)
ad5201d0 247{
17519ba0 248 struct loop *loop;
ad5201d0 249
250 /* Scan the loops, inner ones first. */
f21d4d00 251 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
ad5201d0 252 {
ad5201d0 253 loop->lpt_decision.decision = LPT_NONE;
f55775aa 254 location_t locus = get_loop_location (loop);
ad5201d0 255
f55775aa 256 if (dump_enabled_p ())
257 dump_printf_loc (TDF_RTL, locus,
258 ";; *** Considering loop %d at BB %d for "
c836de3f 259 "unrolling ***\n",
f55775aa 260 loop->num, loop->header->index);
ad5201d0 261
262 /* Do not peel cold areas. */
0bfd8d5c 263 if (optimize_loop_for_size_p (loop))
ad5201d0 264 {
450d042a 265 if (dump_file)
266 fprintf (dump_file, ";; Not considering loop, cold area\n");
ad5201d0 267 continue;
268 }
269
270 /* Can the loop be manipulated? */
271 if (!can_duplicate_loop_p (loop))
272 {
450d042a 273 if (dump_file)
274 fprintf (dump_file,
ad5201d0 275 ";; Not considering loop, cannot duplicate\n");
ad5201d0 276 continue;
277 }
278
279 /* Skip non-innermost loops. */
280 if (loop->inner)
281 {
450d042a 282 if (dump_file)
283 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
ad5201d0 284 continue;
285 }
286
287 loop->ninsns = num_loop_insns (loop);
288 loop->av_ninsns = average_num_loop_insns (loop);
289
290 /* Try transformations one by one in decreasing order of
291 priority. */
292
0051c76a 293 decide_unroll_constant_iterations (loop, flags);
ad5201d0 294 if (loop->lpt_decision.decision == LPT_NONE)
0051c76a 295 decide_unroll_runtime_iterations (loop, flags);
ad5201d0 296 if (loop->lpt_decision.decision == LPT_NONE)
0051c76a 297 decide_unroll_stupid (loop, flags);
ad5201d0 298
c836de3f 299 report_unroll (loop, locus);
ad5201d0 300 }
ad5201d0 301}
302
c836de3f 303/* Unroll LOOPS. */
304void
305unroll_loops (int flags)
ad5201d0 306{
c836de3f 307 struct loop *loop;
308 bool changed = false;
ad5201d0 309
c836de3f 310 /* Now decide rest of unrolling. */
311 decide_unrolling (flags);
ad5201d0 312
c836de3f 313 /* Scan the loops, inner ones first. */
314 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
ad5201d0 315 {
c836de3f 316 /* And perform the appropriate transformations. */
317 switch (loop->lpt_decision.decision)
3ad4992f 318 {
c836de3f 319 case LPT_UNROLL_CONSTANT:
320 unroll_loop_constant_iterations (loop);
321 changed = true;
322 break;
323 case LPT_UNROLL_RUNTIME:
324 unroll_loop_runtime_iterations (loop);
325 changed = true;
326 break;
327 case LPT_UNROLL_STUPID:
328 unroll_loop_stupid (loop);
329 changed = true;
330 break;
331 case LPT_NONE:
332 break;
333 default:
334 gcc_unreachable ();
ad5201d0 335 }
ad5201d0 336 }
337
c836de3f 338 if (changed)
339 {
340 calculate_dominance_info (CDI_DOMINATORS);
341 fix_loop_structure (NULL);
342 }
3ad4992f 343
c836de3f 344 iv_analysis_done ();
345}
ad5201d0 346
c836de3f 347/* Check whether exit of the LOOP is at the end of loop body. */
3ad4992f 348
c836de3f 349static bool
350loop_exit_at_end_p (struct loop *loop)
ad5201d0 351{
f9cce2dc 352 struct niter_desc *desc = get_simple_loop_desc (loop);
c836de3f 353 rtx_insn *insn;
a5414ff5 354
c836de3f 355 /* We should never have conditional in latch block. */
356 gcc_assert (desc->in_edge->dest != loop->header);
48e1416a 357
c836de3f 358 if (desc->in_edge->dest != loop->latch)
359 return false;
a9989fb4 360
c836de3f 361 /* Check that the latch is empty. */
362 FOR_BB_INSNS (loop->latch, insn)
363 {
364 if (INSN_P (insn) && active_insn_p (insn))
365 return false;
a5414ff5 366 }
ad5201d0 367
c836de3f 368 return true;
ad5201d0 369}
370
450d042a 371/* Decide whether to unroll LOOP iterating constant number of times
372 and how much. */
f9cce2dc 373
ad5201d0 374static void
0051c76a 375decide_unroll_constant_iterations (struct loop *loop, int flags)
ad5201d0 376{
f9cce2dc 377 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
378 struct niter_desc *desc;
5de9d3ed 379 widest_int iterations;
ad5201d0 380
381 if (!(flags & UAP_UNROLL))
382 {
383 /* We were not asked to, just return back silently. */
384 return;
385 }
386
450d042a 387 if (dump_file)
388 fprintf (dump_file,
389 "\n;; Considering unrolling loop with constant "
390 "number of iterations\n");
ad5201d0 391
392 /* nunroll = total number of copies of the original loop body in
393 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
394 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
450d042a 395 nunroll_by_av
396 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
ad5201d0 397 if (nunroll > nunroll_by_av)
398 nunroll = nunroll_by_av;
399 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
400 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
401
e7f33cf8 402 if (targetm.loop_unroll_adjust)
403 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
404
ad5201d0 405 /* Skip big loops. */
406 if (nunroll <= 1)
407 {
450d042a 408 if (dump_file)
409 fprintf (dump_file, ";; Not considering loop, is too big\n");
ad5201d0 410 return;
411 }
412
413 /* Check for simple loops. */
f9cce2dc 414 desc = get_simple_loop_desc (loop);
ad5201d0 415
416 /* Check number of iterations. */
f9cce2dc 417 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
ad5201d0 418 {
450d042a 419 if (dump_file)
420 fprintf (dump_file,
421 ";; Unable to prove that the loop iterates constant times\n");
ad5201d0 422 return;
423 }
424
c8fbcf58 425 /* Check whether the loop rolls enough to consider.
426 Consult also loop bounds and profile; in the case the loop has more
427 than one exit it may well loop less than determined maximal number
428 of iterations. */
429 if (desc->niter < 2 * nunroll
f86b328b 430 || ((get_estimated_loop_iterations (loop, &iterations)
431 || get_max_loop_iterations (loop, &iterations))
796b6678 432 && wi::ltu_p (iterations, 2 * nunroll)))
ad5201d0 433 {
450d042a 434 if (dump_file)
435 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
ad5201d0 436 return;
437 }
438
439 /* Success; now compute number of iterations to unroll. We alter
440 nunroll so that as few as possible copies of loop body are
d01481af 441 necessary, while still not decreasing the number of unrollings
ad5201d0 442 too much (at most by 1). */
443 best_copies = 2 * nunroll + 10;
444
445 i = 2 * nunroll + 2;
f9cce2dc 446 if (i - 1 >= desc->niter)
447 i = desc->niter - 2;
ad5201d0 448
449 for (; i >= nunroll - 1; i--)
450 {
f9cce2dc 451 unsigned exit_mod = desc->niter % (i + 1);
ad5201d0 452
f9cce2dc 453 if (!loop_exit_at_end_p (loop))
ad5201d0 454 n_copies = exit_mod + i + 1;
f9cce2dc 455 else if (exit_mod != (unsigned) i
456 || desc->noloop_assumptions != NULL_RTX)
ad5201d0 457 n_copies = exit_mod + i + 2;
458 else
459 n_copies = i + 1;
460
461 if (n_copies < best_copies)
462 {
463 best_copies = n_copies;
464 best_unroll = i;
465 }
466 }
467
ad5201d0 468 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
469 loop->lpt_decision.times = best_unroll;
470}
471
1c18ed19 472/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
473 The transformation does this:
3ad4992f 474
ad5201d0 475 for (i = 0; i < 102; i++)
476 body;
3ad4992f 477
1c18ed19 478 ==> (LOOP->LPT_DECISION.TIMES == 3)
3ad4992f 479
ad5201d0 480 i = 0;
481 body; i++;
482 body; i++;
483 while (i < 102)
484 {
485 body; i++;
486 body; i++;
487 body; i++;
488 body; i++;
489 }
490 */
491static void
7194de72 492unroll_loop_constant_iterations (struct loop *loop)
ad5201d0 493{
494 unsigned HOST_WIDE_INT niter;
495 unsigned exit_mod;
496 sbitmap wont_exit;
f3c40e6d 497 unsigned i;
f3c40e6d 498 edge e;
ad5201d0 499 unsigned max_unroll = loop->lpt_decision.times;
f9cce2dc 500 struct niter_desc *desc = get_simple_loop_desc (loop);
501 bool exit_at_end = loop_exit_at_end_p (loop);
375bb675 502 struct opt_info *opt_info = NULL;
a53ff4c1 503 bool ok;
48e1416a 504
ad5201d0 505 niter = desc->niter;
506
a9989fb4 507 /* Should not get here (such loop should be peeled instead). */
508 gcc_assert (niter > max_unroll + 1);
ad5201d0 509
510 exit_mod = niter % (max_unroll + 1);
511
512 wont_exit = sbitmap_alloc (max_unroll + 1);
53c5d9d4 513 bitmap_ones (wont_exit);
ad5201d0 514
c2078b80 515 auto_vec<edge> remove_edges;
48e1416a 516 if (flag_split_ivs_in_unroller
375bb675 517 || flag_variable_expansion_in_unroller)
518 opt_info = analyze_insns_in_loop (loop);
48e1416a 519
f9cce2dc 520 if (!exit_at_end)
ad5201d0 521 {
f9cce2dc 522 /* The exit is not at the end of the loop; leave exit test
ad5201d0 523 in the first copy, so that the loops that start with test
524 of exit condition have continuous body after unrolling. */
525
450d042a 526 if (dump_file)
1c18ed19 527 fprintf (dump_file, ";; Condition at beginning of loop.\n");
ad5201d0 528
529 /* Peel exit_mod iterations. */
08b7917c 530 bitmap_clear_bit (wont_exit, 0);
f9cce2dc 531 if (desc->noloop_assumptions)
08b7917c 532 bitmap_clear_bit (wont_exit, 1);
ad5201d0 533
f9cce2dc 534 if (exit_mod)
535 {
375bb675 536 opt_info_start_duplication (opt_info);
a53ff4c1 537 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
7194de72 538 exit_mod,
f9cce2dc 539 wont_exit, desc->out_edge,
f3c40e6d 540 &remove_edges,
75cc36a4 541 DLTHE_FLAG_UPDATE_FREQ
542 | (opt_info && exit_mod > 1
543 ? DLTHE_RECORD_COPY_NUMBER
544 : 0));
a53ff4c1 545 gcc_assert (ok);
f9cce2dc 546
375bb675 547 if (opt_info && exit_mod > 1)
48e1416a 548 apply_opt_in_copies (opt_info, exit_mod, false, false);
549
f9cce2dc 550 desc->noloop_assumptions = NULL_RTX;
551 desc->niter -= exit_mod;
e913b5cd 552 loop->nb_iterations_upper_bound -= exit_mod;
a9ef9877 553 if (loop->any_estimate
796b6678 554 && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
e913b5cd 555 loop->nb_iterations_estimate -= exit_mod;
a9ef9877 556 else
557 loop->any_estimate = false;
f9cce2dc 558 }
ad5201d0 559
08b7917c 560 bitmap_set_bit (wont_exit, 1);
ad5201d0 561 }
562 else
563 {
564 /* Leave exit test in last copy, for the same reason as above if
565 the loop tests the condition at the end of loop body. */
566
450d042a 567 if (dump_file)
1c18ed19 568 fprintf (dump_file, ";; Condition at end of loop.\n");
ad5201d0 569
570 /* We know that niter >= max_unroll + 2; so we do not need to care of
571 case when we would exit before reaching the loop. So just peel
f9cce2dc 572 exit_mod + 1 iterations. */
573 if (exit_mod != max_unroll
574 || desc->noloop_assumptions)
ad5201d0 575 {
08b7917c 576 bitmap_clear_bit (wont_exit, 0);
f9cce2dc 577 if (desc->noloop_assumptions)
08b7917c 578 bitmap_clear_bit (wont_exit, 1);
48e1416a 579
375bb675 580 opt_info_start_duplication (opt_info);
a53ff4c1 581 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
7194de72 582 exit_mod + 1,
a53ff4c1 583 wont_exit, desc->out_edge,
f3c40e6d 584 &remove_edges,
75cc36a4 585 DLTHE_FLAG_UPDATE_FREQ
586 | (opt_info && exit_mod > 0
587 ? DLTHE_RECORD_COPY_NUMBER
588 : 0));
a53ff4c1 589 gcc_assert (ok);
48e1416a 590
375bb675 591 if (opt_info && exit_mod > 0)
592 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
a9989fb4 593
f9cce2dc 594 desc->niter -= exit_mod + 1;
e913b5cd 595 loop->nb_iterations_upper_bound -= exit_mod + 1;
a9ef9877 596 if (loop->any_estimate
796b6678 597 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
e913b5cd 598 loop->nb_iterations_estimate -= exit_mod + 1;
a9ef9877 599 else
600 loop->any_estimate = false;
f9cce2dc 601 desc->noloop_assumptions = NULL_RTX;
602
08b7917c 603 bitmap_set_bit (wont_exit, 0);
604 bitmap_set_bit (wont_exit, 1);
ad5201d0 605 }
606
08b7917c 607 bitmap_clear_bit (wont_exit, max_unroll);
ad5201d0 608 }
609
610 /* Now unroll the loop. */
48e1416a 611
375bb675 612 opt_info_start_duplication (opt_info);
a53ff4c1 613 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
7194de72 614 max_unroll,
a53ff4c1 615 wont_exit, desc->out_edge,
f3c40e6d 616 &remove_edges,
75cc36a4 617 DLTHE_FLAG_UPDATE_FREQ
618 | (opt_info
619 ? DLTHE_RECORD_COPY_NUMBER
620 : 0));
a53ff4c1 621 gcc_assert (ok);
ad5201d0 622
375bb675 623 if (opt_info)
a9989fb4 624 {
375bb675 625 apply_opt_in_copies (opt_info, max_unroll, true, true);
626 free_opt_info (opt_info);
a9989fb4 627 }
628
ad5201d0 629 free (wont_exit);
630
f9cce2dc 631 if (exit_at_end)
632 {
01020a5f 633 basic_block exit_block = get_bb_copy (desc->in_edge->src);
f9cce2dc 634 /* Find a new in and out edge; they are in the last copy we have made. */
48e1416a 635
cd665a06 636 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
f9cce2dc 637 {
cd665a06 638 desc->out_edge = EDGE_SUCC (exit_block, 0);
639 desc->in_edge = EDGE_SUCC (exit_block, 1);
f9cce2dc 640 }
641 else
642 {
cd665a06 643 desc->out_edge = EDGE_SUCC (exit_block, 1);
644 desc->in_edge = EDGE_SUCC (exit_block, 0);
f9cce2dc 645 }
646 }
647
648 desc->niter /= max_unroll + 1;
a9ef9877 649 loop->nb_iterations_upper_bound
796b6678 650 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
a9ef9877 651 if (loop->any_estimate)
652 loop->nb_iterations_estimate
796b6678 653 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
f9cce2dc 654 desc->niter_expr = GEN_INT (desc->niter);
655
ad5201d0 656 /* Remove the edges. */
f1f41a6c 657 FOR_EACH_VEC_ELT (remove_edges, i, e)
f3c40e6d 658 remove_path (e);
ad5201d0 659
450d042a 660 if (dump_file)
661 fprintf (dump_file,
662 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
663 max_unroll, num_loop_insns (loop));
ad5201d0 664}
665
666/* Decide whether to unroll LOOP iterating runtime computable number of times
667 and how much. */
668static void
0051c76a 669decide_unroll_runtime_iterations (struct loop *loop, int flags)
ad5201d0 670{
671 unsigned nunroll, nunroll_by_av, i;
f9cce2dc 672 struct niter_desc *desc;
5de9d3ed 673 widest_int iterations;
ad5201d0 674
675 if (!(flags & UAP_UNROLL))
676 {
677 /* We were not asked to, just return back silently. */
678 return;
679 }
680
450d042a 681 if (dump_file)
682 fprintf (dump_file,
683 "\n;; Considering unrolling loop with runtime "
684 "computable number of iterations\n");
ad5201d0 685
686 /* nunroll = total number of copies of the original loop body in
687 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
688 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
689 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
690 if (nunroll > nunroll_by_av)
691 nunroll = nunroll_by_av;
692 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
693 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
694
9ccaa774 695 if (targetm.loop_unroll_adjust)
696 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
697
ad5201d0 698 /* Skip big loops. */
699 if (nunroll <= 1)
700 {
450d042a 701 if (dump_file)
702 fprintf (dump_file, ";; Not considering loop, is too big\n");
ad5201d0 703 return;
704 }
705
706 /* Check for simple loops. */
f9cce2dc 707 desc = get_simple_loop_desc (loop);
ad5201d0 708
709 /* Check simpleness. */
f9cce2dc 710 if (!desc->simple_p || desc->assumptions)
ad5201d0 711 {
450d042a 712 if (dump_file)
713 fprintf (dump_file,
714 ";; Unable to prove that the number of iterations "
715 "can be counted in runtime\n");
ad5201d0 716 return;
717 }
718
f9cce2dc 719 if (desc->const_iter)
ad5201d0 720 {
450d042a 721 if (dump_file)
722 fprintf (dump_file, ";; Loop iterates constant times\n");
ad5201d0 723 return;
724 }
725
204a2e45 726 /* Check whether the loop rolls. */
f86b328b 727 if ((get_estimated_loop_iterations (loop, &iterations)
728 || get_max_loop_iterations (loop, &iterations))
796b6678 729 && wi::ltu_p (iterations, 2 * nunroll))
ad5201d0 730 {
450d042a 731 if (dump_file)
732 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
ad5201d0 733 return;
734 }
735
736 /* Success; now force nunroll to be power of 2, as we are unable to
737 cope with overflows in computation of number of iterations. */
f9cce2dc 738 for (i = 1; 2 * i <= nunroll; i *= 2)
739 continue;
ad5201d0 740
741 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
742 loop->lpt_decision.times = i - 1;
743}
744
d6a1bbdb 745/* Splits edge E and inserts the sequence of instructions INSNS on it, and
746 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
747 and NULL is returned instead. */
88e6f696 748
749basic_block
222dc1d3 750split_edge_and_insert (edge e, rtx_insn *insns)
88e6f696 751{
d6a1bbdb 752 basic_block bb;
753
754 if (!insns)
755 return NULL;
48e1416a 756 bb = split_edge (e);
88e6f696 757 emit_insn_after (insns, BB_END (bb));
db1c50be 758
759 /* ??? We used to assume that INSNS can contain control flow insns, and
760 that we had to try to find sub basic blocks in BB to maintain a valid
761 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
762 and call break_superblocks when going out of cfglayout mode. But it
763 turns out that this never happens; and that if it does ever happen,
8b88439e 764 the verify_flow_info at the end of the RTL loop passes would fail.
db1c50be 765
766 There are two reasons why we expected we could have control flow insns
767 in INSNS. The first is when a comparison has to be done in parts, and
768 the second is when the number of iterations is computed for loops with
769 the number of iterations known at runtime. In both cases, test cases
770 to get control flow in INSNS appear to be impossible to construct:
771
772 * If do_compare_rtx_and_jump needs several branches to do comparison
773 in a mode that needs comparison by parts, we cannot analyze the
774 number of iterations of the loop, and we never get to unrolling it.
775
776 * The code in expand_divmod that was suspected to cause creation of
777 branching code seems to be only accessed for signed division. The
778 divisions used by # of iterations analysis are always unsigned.
779 Problems might arise on architectures that emits branching code
780 for some operations that may appear in the unroller (especially
781 for division), but we have no such architectures.
782
783 Considering all this, it was decided that we should for now assume
784 that INSNS can in theory contain control flow insns, but in practice
785 it never does. So we don't handle the theoretical case, and should
786 a real failure ever show up, we have a pretty good clue for how to
787 fix it. */
788
88e6f696 789 return bb;
790}
791
b99ab275 792/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
793 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
794 in order to create a jump. */
795
222dc1d3 796static rtx_insn *
b99ab275 797compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
222dc1d3 798 rtx_insn *cinsn)
b99ab275 799{
222dc1d3 800 rtx_insn *seq, *jump;
801 rtx cond;
3754d046 802 machine_mode mode;
b99ab275 803
804 mode = GET_MODE (op0);
805 if (mode == VOIDmode)
806 mode = GET_MODE (op1);
807
808 start_sequence ();
809 if (GET_MODE_CLASS (mode) == MODE_CC)
810 {
811 /* A hack -- there seems to be no easy generic way how to make a
812 conditional jump from a ccmode comparison. */
813 gcc_assert (cinsn);
814 cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
815 gcc_assert (GET_CODE (cond) == comp);
816 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
817 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
818 emit_jump_insn (copy_insn (PATTERN (cinsn)));
819 jump = get_last_insn ();
820 gcc_assert (JUMP_P (jump));
821 JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
822 LABEL_NUSES (JUMP_LABEL (jump))++;
823 redirect_jump (jump, label, 0);
824 }
825 else
826 {
827 gcc_assert (!cinsn);
828
829 op0 = force_operand (op0, NULL_RTX);
830 op1 = force_operand (op1, NULL_RTX);
831 do_compare_rtx_and_jump (op0, op1, comp, 0,
832 mode, NULL_RTX, NULL_RTX, label, -1);
833 jump = get_last_insn ();
834 gcc_assert (JUMP_P (jump));
835 JUMP_LABEL (jump) = label;
836 LABEL_NUSES (label)++;
837 }
838 add_int_reg_note (jump, REG_BR_PROB, prob);
839
840 seq = get_insns ();
841 end_sequence ();
842
843 return seq;
844}
845
1c18ed19 846/* Unroll LOOP for which we are able to count number of iterations in runtime
847 LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
ad5201d0 848 extra care for case n < 0):
3ad4992f 849
ad5201d0 850 for (i = 0; i < n; i++)
851 body;
3ad4992f 852
1c18ed19 853 ==> (LOOP->LPT_DECISION.TIMES == 3)
3ad4992f 854
ad5201d0 855 i = 0;
856 mod = n % 4;
3ad4992f 857
ad5201d0 858 switch (mod)
859 {
860 case 3:
861 body; i++;
862 case 2:
863 body; i++;
864 case 1:
865 body; i++;
866 case 0: ;
867 }
3ad4992f 868
ad5201d0 869 while (i < n)
870 {
871 body; i++;
872 body; i++;
873 body; i++;
874 body; i++;
875 }
876 */
877static void
7194de72 878unroll_loop_runtime_iterations (struct loop *loop)
ad5201d0 879{
222dc1d3 880 rtx old_niter, niter, tmp;
881 rtx_insn *init_code, *branch_code;
ad5201d0 882 unsigned i, j, p;
3f9439d7 883 basic_block preheader, *body, swtch, ezc_swtch;
ad5201d0 884 sbitmap wont_exit;
885 int may_exit_copy;
f3c40e6d 886 unsigned n_peel;
f3c40e6d 887 edge e;
ad5201d0 888 bool extra_zero_check, last_may_exit;
889 unsigned max_unroll = loop->lpt_decision.times;
f9cce2dc 890 struct niter_desc *desc = get_simple_loop_desc (loop);
891 bool exit_at_end = loop_exit_at_end_p (loop);
375bb675 892 struct opt_info *opt_info = NULL;
a53ff4c1 893 bool ok;
48e1416a 894
375bb675 895 if (flag_split_ivs_in_unroller
896 || flag_variable_expansion_in_unroller)
897 opt_info = analyze_insns_in_loop (loop);
48e1416a 898
ad5201d0 899 /* Remember blocks whose dominators will have to be updated. */
c2078b80 900 auto_vec<basic_block> dom_bbs;
ad5201d0 901
902 body = get_loop_body (loop);
903 for (i = 0; i < loop->num_nodes; i++)
904 {
f1f41a6c 905 vec<basic_block> ldom;
3f9439d7 906 basic_block bb;
ad5201d0 907
3f9439d7 908 ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
f1f41a6c 909 FOR_EACH_VEC_ELT (ldom, j, bb)
3f9439d7 910 if (!flow_bb_inside_loop_p (loop, bb))
f1f41a6c 911 dom_bbs.safe_push (bb);
ad5201d0 912
f1f41a6c 913 ldom.release ();
ad5201d0 914 }
915 free (body);
916
f9cce2dc 917 if (!exit_at_end)
ad5201d0 918 {
919 /* Leave exit in first copy (for explanation why see comment in
920 unroll_loop_constant_iterations). */
921 may_exit_copy = 0;
922 n_peel = max_unroll - 1;
923 extra_zero_check = true;
924 last_may_exit = false;
925 }
926 else
927 {
928 /* Leave exit in last copy (for explanation why see comment in
929 unroll_loop_constant_iterations). */
930 may_exit_copy = max_unroll;
931 n_peel = max_unroll;
932 extra_zero_check = false;
933 last_may_exit = true;
934 }
935
936 /* Get expression for number of iterations. */
937 start_sequence ();
f9cce2dc 938 old_niter = niter = gen_reg_rtx (desc->mode);
939 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
940 if (tmp != niter)
941 emit_move_insn (niter, tmp);
ad5201d0 942
943 /* Count modulo by ANDing it with max_unroll; we use the fact that
944 the number of unrollings is a power of two, and thus this is correct
945 even if there is overflow in the computation. */
f9cce2dc 946 niter = expand_simple_binop (desc->mode, AND,
0359f9f5 947 niter, gen_int_mode (max_unroll, desc->mode),
ad5201d0 948 NULL_RTX, 0, OPTAB_LIB_WIDEN);
949
950 init_code = get_insns ();
951 end_sequence ();
ffbd194b 952 unshare_all_rtl_in_chain (init_code);
ad5201d0 953
954 /* Precondition the loop. */
88e6f696 955 split_edge_and_insert (loop_preheader_edge (loop), init_code);
ad5201d0 956
c2078b80 957 auto_vec<edge> remove_edges;
ad5201d0 958
959 wont_exit = sbitmap_alloc (max_unroll + 2);
960
961 /* Peel the first copy of loop body (almost always we must leave exit test
962 here; the only exception is when we have extra zero check and the number
f9cce2dc 963 of iterations is reliable. Also record the place of (possible) extra
964 zero check. */
53c5d9d4 965 bitmap_clear (wont_exit);
f9cce2dc 966 if (extra_zero_check
967 && !desc->noloop_assumptions)
08b7917c 968 bitmap_set_bit (wont_exit, 1);
ad5201d0 969 ezc_swtch = loop_preheader_edge (loop)->src;
a53ff4c1 970 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
7194de72 971 1, wont_exit, desc->out_edge,
f3c40e6d 972 &remove_edges,
a53ff4c1 973 DLTHE_FLAG_UPDATE_FREQ);
974 gcc_assert (ok);
ad5201d0 975
976 /* Record the place where switch will be built for preconditioning. */
88e6f696 977 swtch = split_edge (loop_preheader_edge (loop));
ad5201d0 978
979 for (i = 0; i < n_peel; i++)
980 {
981 /* Peel the copy. */
53c5d9d4 982 bitmap_clear (wont_exit);
ad5201d0 983 if (i != n_peel - 1 || !last_may_exit)
08b7917c 984 bitmap_set_bit (wont_exit, 1);
a53ff4c1 985 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
7194de72 986 1, wont_exit, desc->out_edge,
f3c40e6d 987 &remove_edges,
a53ff4c1 988 DLTHE_FLAG_UPDATE_FREQ);
989 gcc_assert (ok);
ad5201d0 990
c87a3eff 991 /* Create item for switch. */
992 j = n_peel - i - (extra_zero_check ? 0 : 1);
993 p = REG_BR_PROB_BASE / (i + 2);
994
88e6f696 995 preheader = split_edge (loop_preheader_edge (loop));
f9cce2dc 996 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
a53ff4c1 997 block_label (preheader), p,
222dc1d3 998 NULL);
c87a3eff 999
d6a1bbdb 1000 /* We rely on the fact that the compare and jump cannot be optimized out,
1001 and hence the cfg we create is correct. */
1002 gcc_assert (branch_code != NULL_RTX);
1003
88e6f696 1004 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
0051c76a 1005 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
ea091dfd 1006 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
c87a3eff 1007 e = make_edge (swtch, preheader,
ea091dfd 1008 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
a9ef9877 1009 e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
c87a3eff 1010 e->probability = p;
ad5201d0 1011 }
1012
1013 if (extra_zero_check)
1014 {
1015 /* Add branch for zero iterations. */
1016 p = REG_BR_PROB_BASE / (max_unroll + 1);
1017 swtch = ezc_swtch;
88e6f696 1018 preheader = split_edge (loop_preheader_edge (loop));
f9cce2dc 1019 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
a53ff4c1 1020 block_label (preheader), p,
222dc1d3 1021 NULL);
d6a1bbdb 1022 gcc_assert (branch_code != NULL_RTX);
ad5201d0 1023
88e6f696 1024 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
0051c76a 1025 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
ea091dfd 1026 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
a964683a 1027 e = make_edge (swtch, preheader,
ea091dfd 1028 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
a9ef9877 1029 e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
ad5201d0 1030 e->probability = p;
1031 }
1032
1033 /* Recount dominators for outer blocks. */
3f9439d7 1034 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
ad5201d0 1035
1036 /* And unroll loop. */
1037
53c5d9d4 1038 bitmap_ones (wont_exit);
08b7917c 1039 bitmap_clear_bit (wont_exit, may_exit_copy);
375bb675 1040 opt_info_start_duplication (opt_info);
48e1416a 1041
a53ff4c1 1042 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
7194de72 1043 max_unroll,
a53ff4c1 1044 wont_exit, desc->out_edge,
f3c40e6d 1045 &remove_edges,
75cc36a4 1046 DLTHE_FLAG_UPDATE_FREQ
1047 | (opt_info
1048 ? DLTHE_RECORD_COPY_NUMBER
1049 : 0));
a53ff4c1 1050 gcc_assert (ok);
48e1416a 1051
375bb675 1052 if (opt_info)
a9989fb4 1053 {
375bb675 1054 apply_opt_in_copies (opt_info, max_unroll, true, true);
1055 free_opt_info (opt_info);
a9989fb4 1056 }
1057
ad5201d0 1058 free (wont_exit);
1059
f9cce2dc 1060 if (exit_at_end)
1061 {
01020a5f 1062 basic_block exit_block = get_bb_copy (desc->in_edge->src);
a53ff4c1 1063 /* Find a new in and out edge; they are in the last copy we have
1064 made. */
48e1416a 1065
cd665a06 1066 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
f9cce2dc 1067 {
cd665a06 1068 desc->out_edge = EDGE_SUCC (exit_block, 0);
1069 desc->in_edge = EDGE_SUCC (exit_block, 1);
f9cce2dc 1070 }
1071 else
1072 {
cd665a06 1073 desc->out_edge = EDGE_SUCC (exit_block, 1);
1074 desc->in_edge = EDGE_SUCC (exit_block, 0);
f9cce2dc 1075 }
1076 }
1077
ad5201d0 1078 /* Remove the edges. */
f1f41a6c 1079 FOR_EACH_VEC_ELT (remove_edges, i, e)
f3c40e6d 1080 remove_path (e);
ad5201d0 1081
f9cce2dc 1082 /* We must be careful when updating the number of iterations due to
1083 preconditioning and the fact that the value must be valid at entry
1084 of the loop. After passing through the above code, we see that
1085 the correct new number of iterations is this: */
a9989fb4 1086 gcc_assert (!desc->const_iter);
f9cce2dc 1087 desc->niter_expr =
a53ff4c1 1088 simplify_gen_binary (UDIV, desc->mode, old_niter,
5d5ee71f 1089 gen_int_mode (max_unroll + 1, desc->mode));
a9ef9877 1090 loop->nb_iterations_upper_bound
796b6678 1091 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
a9ef9877 1092 if (loop->any_estimate)
1093 loop->nb_iterations_estimate
796b6678 1094 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
f9cce2dc 1095 if (exit_at_end)
1096 {
1097 desc->niter_expr =
1098 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1099 desc->noloop_assumptions = NULL_RTX;
a9ef9877 1100 --loop->nb_iterations_upper_bound;
1101 if (loop->any_estimate
e913b5cd 1102 && loop->nb_iterations_estimate != 0)
a9ef9877 1103 --loop->nb_iterations_estimate;
1104 else
1105 loop->any_estimate = false;
f9cce2dc 1106 }
1107
450d042a 1108 if (dump_file)
1109 fprintf (dump_file,
1110 ";; Unrolled loop %d times, counting # of iterations "
1111 "in runtime, %i insns\n",
ad5201d0 1112 max_unroll, num_loop_insns (loop));
1113}
3ad4992f 1114
ad5201d0 1115/* Decide whether to unroll LOOP stupidly and how much. */
1116static void
0051c76a 1117decide_unroll_stupid (struct loop *loop, int flags)
ad5201d0 1118{
1119 unsigned nunroll, nunroll_by_av, i;
f9cce2dc 1120 struct niter_desc *desc;
5de9d3ed 1121 widest_int iterations;
ad5201d0 1122
1123 if (!(flags & UAP_UNROLL_ALL))
1124 {
1125 /* We were not asked to, just return back silently. */
1126 return;
1127 }
1128
450d042a 1129 if (dump_file)
1130 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
ad5201d0 1131
1132 /* nunroll = total number of copies of the original loop body in
1133 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1134 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
450d042a 1135 nunroll_by_av
1136 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
ad5201d0 1137 if (nunroll > nunroll_by_av)
1138 nunroll = nunroll_by_av;
1139 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1140 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1141
9ccaa774 1142 if (targetm.loop_unroll_adjust)
1143 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1144
ad5201d0 1145 /* Skip big loops. */
1146 if (nunroll <= 1)
1147 {
450d042a 1148 if (dump_file)
1149 fprintf (dump_file, ";; Not considering loop, is too big\n");
ad5201d0 1150 return;
1151 }
1152
1153 /* Check for simple loops. */
f9cce2dc 1154 desc = get_simple_loop_desc (loop);
ad5201d0 1155
1156 /* Check simpleness. */
f9cce2dc 1157 if (desc->simple_p && !desc->assumptions)
ad5201d0 1158 {
450d042a 1159 if (dump_file)
1160 fprintf (dump_file, ";; The loop is simple\n");
ad5201d0 1161 return;
1162 }
1163
1164 /* Do not unroll loops with branches inside -- it increases number
d9459f6b 1165 of mispredicts.
1166 TODO: this heuristic needs tunning; call inside the loop body
1167 is also relatively good reason to not unroll. */
f9cce2dc 1168 if (num_loop_branches (loop) > 1)
ad5201d0 1169 {
450d042a 1170 if (dump_file)
1171 fprintf (dump_file, ";; Not unrolling, contains branches\n");
ad5201d0 1172 return;
1173 }
1174
204a2e45 1175 /* Check whether the loop rolls. */
f86b328b 1176 if ((get_estimated_loop_iterations (loop, &iterations)
1177 || get_max_loop_iterations (loop, &iterations))
796b6678 1178 && wi::ltu_p (iterations, 2 * nunroll))
ad5201d0 1179 {
450d042a 1180 if (dump_file)
1181 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
ad5201d0 1182 return;
1183 }
1184
1185 /* Success. Now force nunroll to be power of 2, as it seems that this
d01481af 1186 improves results (partially because of better alignments, partially
ad5201d0 1187 because of some dark magic). */
f9cce2dc 1188 for (i = 1; 2 * i <= nunroll; i *= 2)
1189 continue;
ad5201d0 1190
1191 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1192 loop->lpt_decision.times = i - 1;
1193}
1194
1c18ed19 1195/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
1196
ad5201d0 1197 while (cond)
1198 body;
1199
1c18ed19 1200 ==> (LOOP->LPT_DECISION.TIMES == 3)
ad5201d0 1201
1202 while (cond)
1203 {
1204 body;
1205 if (!cond) break;
1206 body;
1207 if (!cond) break;
1208 body;
1209 if (!cond) break;
1210 body;
1211 }
1212 */
1213static void
7194de72 1214unroll_loop_stupid (struct loop *loop)
ad5201d0 1215{
1216 sbitmap wont_exit;
1217 unsigned nunroll = loop->lpt_decision.times;
f9cce2dc 1218 struct niter_desc *desc = get_simple_loop_desc (loop);
375bb675 1219 struct opt_info *opt_info = NULL;
a53ff4c1 1220 bool ok;
48e1416a 1221
375bb675 1222 if (flag_split_ivs_in_unroller
1223 || flag_variable_expansion_in_unroller)
1224 opt_info = analyze_insns_in_loop (loop);
48e1416a 1225
1226
ad5201d0 1227 wont_exit = sbitmap_alloc (nunroll + 1);
53c5d9d4 1228 bitmap_clear (wont_exit);
375bb675 1229 opt_info_start_duplication (opt_info);
48e1416a 1230
a53ff4c1 1231 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
7194de72 1232 nunroll, wont_exit,
f3c40e6d 1233 NULL, NULL,
75cc36a4 1234 DLTHE_FLAG_UPDATE_FREQ
1235 | (opt_info
1236 ? DLTHE_RECORD_COPY_NUMBER
1237 : 0));
a53ff4c1 1238 gcc_assert (ok);
48e1416a 1239
375bb675 1240 if (opt_info)
a9989fb4 1241 {
375bb675 1242 apply_opt_in_copies (opt_info, nunroll, true, true);
1243 free_opt_info (opt_info);
a9989fb4 1244 }
1245
ad5201d0 1246 free (wont_exit);
3ad4992f 1247
f9cce2dc 1248 if (desc->simple_p)
1249 {
1250 /* We indeed may get here provided that there are nontrivial assumptions
1251 for a loop to be really simple. We could update the counts, but the
1252 problem is that we are unable to decide which exit will be taken
1253 (not really true in case the number of iterations is constant,
75de4aa2 1254 but no one will do anything with this information, so we do not
f9cce2dc 1255 worry about it). */
1256 desc->simple_p = false;
1257 }
1258
450d042a 1259 if (dump_file)
1260 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
ad5201d0 1261 nunroll, num_loop_insns (loop));
1262}
a9989fb4 1263
3b3940d7 1264/* Returns true if REG is referenced in one nondebug insn in LOOP.
1265 Set *DEBUG_USES to the number of debug insns that reference the
1266 variable. */
375bb675 1267
f7b51129 1268static bool
3b3940d7 1269referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1270 int *debug_uses)
375bb675 1271{
1272 basic_block *body, bb;
1273 unsigned i;
1274 int count_ref = 0;
222dc1d3 1275 rtx_insn *insn;
48e1416a 1276
1277 body = get_loop_body (loop);
375bb675 1278 for (i = 0; i < loop->num_nodes; i++)
1279 {
1280 bb = body[i];
48e1416a 1281
375bb675 1282 FOR_BB_INSNS (bb, insn)
3b3940d7 1283 if (!rtx_referenced_p (reg, insn))
1284 continue;
1285 else if (DEBUG_INSN_P (insn))
1286 ++*debug_uses;
1287 else if (++count_ref > 1)
1288 break;
375bb675 1289 }
3b3940d7 1290 free (body);
375bb675 1291 return (count_ref == 1);
1292}
1293
3b3940d7 1294/* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
1295
1296static void
1297reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1298{
1299 basic_block *body, bb;
1300 unsigned i;
222dc1d3 1301 rtx_insn *insn;
3b3940d7 1302
1303 body = get_loop_body (loop);
1304 for (i = 0; debug_uses && i < loop->num_nodes; i++)
1305 {
1306 bb = body[i];
1307
1308 FOR_BB_INSNS (bb, insn)
1309 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1310 continue;
1311 else
1312 {
1313 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1314 gen_rtx_UNKNOWN_VAR_LOC (), 0);
1315 if (!--debug_uses)
1316 break;
1317 }
1318 }
1319 free (body);
1320}
1321
375bb675 1322/* Determine whether INSN contains an accumulator
48e1416a 1323 which can be expanded into separate copies,
375bb675 1324 one for each copy of the LOOP body.
48e1416a 1325
375bb675 1326 for (i = 0 ; i < n; i++)
1327 sum += a[i];
48e1416a 1328
375bb675 1329 ==>
48e1416a 1330
375bb675 1331 sum += a[i]
1332 ....
1333 i = i+1;
1334 sum1 += a[i]
1335 ....
1336 i = i+1
1337 sum2 += a[i];
1338 ....
1339
48e1416a 1340 Return NULL if INSN contains no opportunity for expansion of accumulator.
1341 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
375bb675 1342 information and return a pointer to it.
1343*/
1344
1345static struct var_to_expand *
222dc1d3 1346analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
375bb675 1347{
7497b98c 1348 rtx set, dest, src;
375bb675 1349 struct var_to_expand *ves;
0def560a 1350 unsigned accum_pos;
7497b98c 1351 enum rtx_code code;
3b3940d7 1352 int debug_uses = 0;
0def560a 1353
375bb675 1354 set = single_set (insn);
1355 if (!set)
1356 return NULL;
48e1416a 1357
375bb675 1358 dest = SET_DEST (set);
1359 src = SET_SRC (set);
7497b98c 1360 code = GET_CODE (src);
48e1416a 1361
7497b98c 1362 if (code != PLUS && code != MINUS && code != MULT && code != FMA)
375bb675 1363 return NULL;
1c6304f6 1364
7497b98c 1365 if (FLOAT_MODE_P (GET_MODE (dest)))
1366 {
1367 if (!flag_associative_math)
1368 return NULL;
1369 /* In the case of FMA, we're also changing the rounding. */
1370 if (code == FMA && !flag_unsafe_math_optimizations)
1371 return NULL;
1372 }
1373
1c6304f6 1374 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1375 in MD. But if there is no optab to generate the insn, we can not
1376 perform the variable expansion. This can happen if an MD provides
1377 an insn but not a named pattern to generate it, for example to avoid
1378 producing code that needs additional mode switches like for x87/mmx.
1379
1380 So we check have_insn_for which looks for an optab for the operation
1381 in SRC. If it doesn't exist, we can't perform the expansion even
1382 though INSN is valid. */
7497b98c 1383 if (!have_insn_for (code, GET_MODE (src)))
1c6304f6 1384 return NULL;
1385
375bb675 1386 if (!REG_P (dest)
1387 && !(GET_CODE (dest) == SUBREG
1388 && REG_P (SUBREG_REG (dest))))
1389 return NULL;
48e1416a 1390
7497b98c 1391 /* Find the accumulator use within the operation. */
1392 if (code == FMA)
1393 {
1394 /* We only support accumulation via FMA in the ADD position. */
1395 if (!rtx_equal_p (dest, XEXP (src, 2)))
1396 return NULL;
1397 accum_pos = 2;
1398 }
1399 else if (rtx_equal_p (dest, XEXP (src, 0)))
0def560a 1400 accum_pos = 0;
7497b98c 1401 else if (rtx_equal_p (dest, XEXP (src, 1)))
1402 {
1403 /* The method of expansion that we are using; which includes the
1404 initialization of the expansions with zero and the summation of
1405 the expansions at the end of the computation will yield wrong
1406 results for (x = something - x) thus avoid using it in that case. */
1407 if (code == MINUS)
1408 return NULL;
1409 accum_pos = 1;
1410 }
0def560a 1411 else
1412 return NULL;
1413
7497b98c 1414 /* It must not otherwise be used. */
1415 if (code == FMA)
1416 {
1417 if (rtx_referenced_p (dest, XEXP (src, 0))
1418 || rtx_referenced_p (dest, XEXP (src, 1)))
1419 return NULL;
1420 }
1421 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
375bb675 1422 return NULL;
48e1416a 1423
7497b98c 1424 /* It must be used in exactly one insn. */
3b3940d7 1425 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
375bb675 1426 return NULL;
48e1416a 1427
e24a805c 1428 if (dump_file)
7497b98c 1429 {
1430 fprintf (dump_file, "\n;; Expanding Accumulator ");
1431 print_rtl (dump_file, dest);
1432 fprintf (dump_file, "\n");
1433 }
e24a805c 1434
3b3940d7 1435 if (debug_uses)
1436 /* Instead of resetting the debug insns, we could replace each
1437 debug use in the loop with the sum or product of all expanded
1438 accummulators. Since we'll only know of all expansions at the
1439 end, we'd have to keep track of which vars_to_expand a debug
1440 insn in the loop references, take note of each copy of the
1441 debug insn during unrolling, and when it's all done, compute
1442 the sum or product of each variable and adjust the original
1443 debug insn and each copy thereof. What a pain! */
1444 reset_debug_uses_in_loop (loop, dest, debug_uses);
1445
375bb675 1446 /* Record the accumulator to expand. */
4c36ffe6 1447 ves = XNEW (struct var_to_expand);
375bb675 1448 ves->insn = insn;
375bb675 1449 ves->reg = copy_rtx (dest);
f1f41a6c 1450 ves->var_expansions.create (1);
b635a1a3 1451 ves->next = NULL;
375bb675 1452 ves->op = GET_CODE (src);
1453 ves->expansion_count = 0;
1454 ves->reuse_expansion = 0;
48e1416a 1455 return ves;
375bb675 1456}
1457
a9989fb4 1458/* Determine whether there is an induction variable in INSN that
48e1416a 1459 we would like to split during unrolling.
375bb675 1460
1461 I.e. replace
1462
1463 i = i + 1;
1464 ...
1465 i = i + 1;
1466 ...
1467 i = i + 1;
1468 ...
1469
1470 type chains by
1471
1472 i0 = i + 1
1473 ...
1474 i = i0 + 1
1475 ...
1476 i = i0 + 2
1477 ...
1478
48e1416a 1479 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
375bb675 1480 an IV_TO_SPLIT structure, fill it with the relevant information and return a
a9989fb4 1481 pointer to it. */
1482
1483static struct iv_to_split *
3eeb4f9a 1484analyze_iv_to_split_insn (rtx_insn *insn)
a9989fb4 1485{
1486 rtx set, dest;
1487 struct rtx_iv iv;
1488 struct iv_to_split *ivts;
a53ff4c1 1489 bool ok;
a9989fb4 1490
1491 /* For now we just split the basic induction variables. Later this may be
1492 extended for example by selecting also addresses of memory references. */
1493 set = single_set (insn);
1494 if (!set)
1495 return NULL;
1496
1497 dest = SET_DEST (set);
1498 if (!REG_P (dest))
1499 return NULL;
1500
1501 if (!biv_p (insn, dest))
1502 return NULL;
1503
3f37d465 1504 ok = iv_analyze_result (insn, dest, &iv);
e8820970 1505
1506 /* This used to be an assert under the assumption that if biv_p returns
1507 true that iv_analyze_result must also return true. However, that
1508 assumption is not strictly correct as evidenced by pr25569.
1509
1510 Returning NULL when iv_analyze_result returns false is safe and
1511 avoids the problems in pr25569 until the iv_analyze_* routines
1512 can be fixed, which is apparently hard and time consuming
1513 according to their author. */
1514 if (! ok)
1515 return NULL;
a9989fb4 1516
1517 if (iv.step == const0_rtx
1518 || iv.mode != iv.extend_mode)
1519 return NULL;
1520
1521 /* Record the insn to split. */
4c36ffe6 1522 ivts = XNEW (struct iv_to_split);
a9989fb4 1523 ivts->insn = insn;
ef770610 1524 ivts->orig_var = dest;
a9989fb4 1525 ivts->base_var = NULL_RTX;
1526 ivts->step = iv.step;
b635a1a3 1527 ivts->next = NULL;
48e1416a 1528
a9989fb4 1529 return ivts;
1530}
1531
375bb675 1532/* Determines which of insns in LOOP can be optimized.
1533 Return a OPT_INFO struct with the relevant hash tables filled
1534 with all insns to be optimized. The FIRST_NEW_BLOCK field
a9989fb4 1535 is undefined for the return value. */
1536
375bb675 1537static struct opt_info *
1538analyze_insns_in_loop (struct loop *loop)
a9989fb4 1539{
1540 basic_block *body, bb;
749ea85f 1541 unsigned i;
4c36ffe6 1542 struct opt_info *opt_info = XCNEW (struct opt_info);
3eeb4f9a 1543 rtx_insn *insn;
375bb675 1544 struct iv_to_split *ivts = NULL;
1545 struct var_to_expand *ves = NULL;
d9dd21a8 1546 iv_to_split **slot1;
1547 var_to_expand **slot2;
f1f41a6c 1548 vec<edge> edges = get_loop_exit_edges (loop);
749ea85f 1549 edge exit;
375bb675 1550 bool can_apply = false;
48e1416a 1551
a9989fb4 1552 iv_analysis_loop_init (loop);
1553
1554 body = get_loop_body (loop);
375bb675 1555
1556 if (flag_split_ivs_in_unroller)
b635a1a3 1557 {
c1f445d2 1558 opt_info->insns_to_split
1559 = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
b635a1a3 1560 opt_info->iv_to_split_head = NULL;
1561 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1562 }
48e1416a 1563
375bb675 1564 /* Record the loop exit bb and loop preheader before the unrolling. */
88e6f696 1565 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
48e1416a 1566
f1f41a6c 1567 if (edges.length () == 1)
375bb675 1568 {
f1f41a6c 1569 exit = edges[0];
749ea85f 1570 if (!(exit->flags & EDGE_COMPLEX))
1571 {
1572 opt_info->loop_exit = split_edge (exit);
1573 can_apply = true;
1574 }
375bb675 1575 }
48e1416a 1576
375bb675 1577 if (flag_variable_expansion_in_unroller
1578 && can_apply)
b635a1a3 1579 {
c1f445d2 1580 opt_info->insns_with_var_to_expand
1581 = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
b635a1a3 1582 opt_info->var_to_expand_head = NULL;
1583 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1584 }
48e1416a 1585
a9989fb4 1586 for (i = 0; i < loop->num_nodes; i++)
1587 {
1588 bb = body[i];
1589 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1590 continue;
1591
1592 FOR_BB_INSNS (bb, insn)
375bb675 1593 {
1594 if (!INSN_P (insn))
1595 continue;
48e1416a 1596
c1f445d2 1597 if (opt_info->insns_to_split)
375bb675 1598 ivts = analyze_iv_to_split_insn (insn);
48e1416a 1599
375bb675 1600 if (ivts)
1601 {
c1f445d2 1602 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
b635a1a3 1603 gcc_assert (*slot1 == NULL);
375bb675 1604 *slot1 = ivts;
b635a1a3 1605 *opt_info->iv_to_split_tail = ivts;
1606 opt_info->iv_to_split_tail = &ivts->next;
375bb675 1607 continue;
1608 }
48e1416a 1609
c1f445d2 1610 if (opt_info->insns_with_var_to_expand)
375bb675 1611 ves = analyze_insn_to_expand_var (loop, insn);
48e1416a 1612
375bb675 1613 if (ves)
1614 {
c1f445d2 1615 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
b635a1a3 1616 gcc_assert (*slot2 == NULL);
375bb675 1617 *slot2 = ves;
b635a1a3 1618 *opt_info->var_to_expand_tail = ves;
1619 opt_info->var_to_expand_tail = &ves->next;
375bb675 1620 }
1621 }
a9989fb4 1622 }
48e1416a 1623
f1f41a6c 1624 edges.release ();
a9989fb4 1625 free (body);
375bb675 1626 return opt_info;
a9989fb4 1627}
1628
1629/* Called just before loop duplication. Records start of duplicated area
375bb675 1630 to OPT_INFO. */
a9989fb4 1631
48e1416a 1632static void
375bb675 1633opt_info_start_duplication (struct opt_info *opt_info)
a9989fb4 1634{
375bb675 1635 if (opt_info)
fe672ac0 1636 opt_info->first_new_block = last_basic_block_for_fn (cfun);
a9989fb4 1637}
1638
1639/* Determine the number of iterations between initialization of the base
1640 variable and the current copy (N_COPY). N_COPIES is the total number
1641 of newly created copies. UNROLLING is true if we are unrolling
1642 (not peeling) the loop. */
1643
1644static unsigned
1645determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1646{
1647 if (unrolling)
1648 {
1649 /* If we are unrolling, initialization is done in the original loop
1650 body (number 0). */
1651 return n_copy;
1652 }
1653 else
1654 {
1655 /* If we are peeling, the copy in that the initialization occurs has
1656 number 1. The original loop (number 0) is the last. */
1657 if (n_copy)
1658 return n_copy - 1;
1659 else
1660 return n_copies;
1661 }
1662}
1663
b635a1a3 1664/* Allocate basic variable for the induction variable chain. */
a9989fb4 1665
b635a1a3 1666static void
1667allocate_basic_variable (struct iv_to_split *ivts)
a9989fb4 1668{
686fd44c 1669 rtx expr = SET_SRC (single_set (ivts->insn));
a9989fb4 1670
1671 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
a9989fb4 1672}
1673
1674/* Insert initialization of basic variable of IVTS before INSN, taking
1675 the initial value from INSN. */
1676
1677static void
222dc1d3 1678insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
a9989fb4 1679{
686fd44c 1680 rtx expr = copy_rtx (SET_SRC (single_set (insn)));
222dc1d3 1681 rtx_insn *seq;
a9989fb4 1682
1683 start_sequence ();
1684 expr = force_operand (expr, ivts->base_var);
1685 if (expr != ivts->base_var)
1686 emit_move_insn (ivts->base_var, expr);
1687 seq = get_insns ();
1688 end_sequence ();
1689
1690 emit_insn_before (seq, insn);
1691}
1692
1693/* Replace the use of induction variable described in IVTS in INSN
1694 by base variable + DELTA * step. */
1695
1696static void
222dc1d3 1697split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
a9989fb4 1698{
222dc1d3 1699 rtx expr, *loc, incr, var;
1700 rtx_insn *seq;
3754d046 1701 machine_mode mode = GET_MODE (ivts->base_var);
a9989fb4 1702 rtx src, dest, set;
1703
1704 /* Construct base + DELTA * step. */
1705 if (!delta)
1706 expr = ivts->base_var;
1707 else
1708 {
1709 incr = simplify_gen_binary (MULT, mode,
1710 ivts->step, gen_int_mode (delta, mode));
1711 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1712 ivts->base_var, incr);
1713 }
1714
1715 /* Figure out where to do the replacement. */
686fd44c 1716 loc = &SET_SRC (single_set (insn));
a9989fb4 1717
1718 /* If we can make the replacement right away, we're done. */
1719 if (validate_change (insn, loc, expr, 0))
1720 return;
1721
1722 /* Otherwise, force EXPR into a register and try again. */
1723 start_sequence ();
1724 var = gen_reg_rtx (mode);
1725 expr = force_operand (expr, var);
1726 if (expr != var)
1727 emit_move_insn (var, expr);
1728 seq = get_insns ();
1729 end_sequence ();
1730 emit_insn_before (seq, insn);
48e1416a 1731
a9989fb4 1732 if (validate_change (insn, loc, var, 0))
1733 return;
1734
1735 /* The last chance. Try recreating the assignment in insn
1736 completely from scratch. */
1737 set = single_set (insn);
1738 gcc_assert (set);
1739
1740 start_sequence ();
1741 *loc = var;
1742 src = copy_rtx (SET_SRC (set));
1743 dest = copy_rtx (SET_DEST (set));
1744 src = force_operand (src, dest);
1745 if (src != dest)
1746 emit_move_insn (dest, src);
1747 seq = get_insns ();
1748 end_sequence ();
48e1416a 1749
a9989fb4 1750 emit_insn_before (seq, insn);
1751 delete_insn (insn);
1752}
1753
a9989fb4 1754
6dce5fff 1755/* Return one expansion of the accumulator recorded in struct VE. */
a9989fb4 1756
375bb675 1757static rtx
1758get_expansion (struct var_to_expand *ve)
1759{
1760 rtx reg;
48e1416a 1761
375bb675 1762 if (ve->reuse_expansion == 0)
1763 reg = ve->reg;
1764 else
f1f41a6c 1765 reg = ve->var_expansions[ve->reuse_expansion - 1];
48e1416a 1766
f1f41a6c 1767 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
375bb675 1768 ve->reuse_expansion = 0;
48e1416a 1769 else
375bb675 1770 ve->reuse_expansion++;
48e1416a 1771
375bb675 1772 return reg;
1773}
a9989fb4 1774
a9989fb4 1775
48e1416a 1776/* Given INSN replace the uses of the accumulator recorded in VE
375bb675 1777 with a new register. */
1778
1779static void
222dc1d3 1780expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
375bb675 1781{
1782 rtx new_reg, set;
1783 bool really_new_expansion = false;
48e1416a 1784
375bb675 1785 set = single_set (insn);
972e95af 1786 gcc_assert (set);
48e1416a 1787
375bb675 1788 /* Generate a new register only if the expansion limit has not been
1789 reached. Else reuse an already existing expansion. */
1790 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1791 {
1792 really_new_expansion = true;
1793 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1794 }
1795 else
1796 new_reg = get_expansion (ve);
1797
d022e236 1798 validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
375bb675 1799 if (apply_change_group ())
1800 if (really_new_expansion)
1801 {
f1f41a6c 1802 ve->var_expansions.safe_push (new_reg);
375bb675 1803 ve->expansion_count++;
1804 }
1805}
1806
b635a1a3 1807/* Initialize the variable expansions in loop preheader. PLACE is the
1808 loop-preheader basic block where the initialization of the
1809 expansions should take place. The expansions are initialized with
1810 (-0) when the operation is plus or minus to honor sign zero. This
1811 way we can prevent cases where the sign of the final result is
1812 effected by the sign of the expansion. Here is an example to
1813 demonstrate this:
48e1416a 1814
428c97ad 1815 for (i = 0 ; i < n; i++)
1816 sum += something;
1817
1818 ==>
1819
1820 sum += something
1821 ....
1822 i = i+1;
1823 sum1 += something
1824 ....
1825 i = i+1
1826 sum2 += something;
1827 ....
48e1416a 1828
428c97ad 1829 When SUM is initialized with -zero and SOMETHING is also -zero; the
1830 final result of sum should be -zero thus the expansions sum1 and sum2
1831 should be initialized with -zero as well (otherwise we will get +zero
1832 as the final result). */
375bb675 1833
b635a1a3 1834static void
1835insert_var_expansion_initialization (struct var_to_expand *ve,
1836 basic_block place)
375bb675 1837{
222dc1d3 1838 rtx_insn *seq;
1839 rtx var, zero_init;
375bb675 1840 unsigned i;
3754d046 1841 machine_mode mode = GET_MODE (ve->reg);
428c97ad 1842 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1843
f1f41a6c 1844 if (ve->var_expansions.length () == 0)
b635a1a3 1845 return;
48e1416a 1846
375bb675 1847 start_sequence ();
7497b98c 1848 switch (ve->op)
1849 {
1850 case FMA:
1851 /* Note that we only accumulate FMA via the ADD operand. */
1852 case PLUS:
1853 case MINUS:
f1f41a6c 1854 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
7497b98c 1855 {
1856 if (honor_signed_zero_p)
1857 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1858 else
1859 zero_init = CONST0_RTX (mode);
1860 emit_move_insn (var, zero_init);
1861 }
1862 break;
1863
1864 case MULT:
f1f41a6c 1865 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
7497b98c 1866 {
1867 zero_init = CONST1_RTX (GET_MODE (var));
1868 emit_move_insn (var, zero_init);
1869 }
1870 break;
1871
1872 default:
1873 gcc_unreachable ();
1874 }
48e1416a 1875
375bb675 1876 seq = get_insns ();
1877 end_sequence ();
48e1416a 1878
d022e236 1879 emit_insn_after (seq, BB_END (place));
375bb675 1880}
1881
b635a1a3 1882/* Combine the variable expansions at the loop exit. PLACE is the
1883 loop exit basic block where the summation of the expansions should
1884 take place. */
375bb675 1885
b635a1a3 1886static void
1887combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
375bb675 1888{
375bb675 1889 rtx sum = ve->reg;
222dc1d3 1890 rtx expr, var;
1891 rtx_insn *seq, *insn;
375bb675 1892 unsigned i;
1893
f1f41a6c 1894 if (ve->var_expansions.length () == 0)
b635a1a3 1895 return;
48e1416a 1896
375bb675 1897 start_sequence ();
7497b98c 1898 switch (ve->op)
1899 {
1900 case FMA:
1901 /* Note that we only accumulate FMA via the ADD operand. */
1902 case PLUS:
1903 case MINUS:
f1f41a6c 1904 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
7497b98c 1905 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1906 break;
1907
1908 case MULT:
f1f41a6c 1909 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
7497b98c 1910 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1911 break;
1912
1913 default:
1914 gcc_unreachable ();
1915 }
48e1416a 1916
375bb675 1917 expr = force_operand (sum, ve->reg);
1918 if (expr != ve->reg)
1919 emit_move_insn (ve->reg, expr);
1920 seq = get_insns ();
1921 end_sequence ();
48e1416a 1922
375bb675 1923 insn = BB_HEAD (place);
1924 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1925 insn = NEXT_INSN (insn);
1926
1927 emit_insn_after (seq, insn);
375bb675 1928}
1929
ef770610 1930/* Strip away REG_EQUAL notes for IVs we're splitting.
1931
1932 Updating REG_EQUAL notes for IVs we split is tricky: We
1933 cannot tell until after unrolling, DF-rescanning, and liveness
1934 updating, whether an EQ_USE is reached by the split IV while
1935 the IV reg is still live. See PR55006.
1936
1937 ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1938 because RTL loop-iv requires us to defer rescanning insns and
1939 any notes attached to them. So resort to old techniques... */
1940
1941static void
222dc1d3 1942maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
ef770610 1943{
1944 struct iv_to_split *ivts;
1945 rtx note = find_reg_equal_equiv_note (insn);
1946 if (! note)
1947 return;
1948 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1949 if (reg_mentioned_p (ivts->orig_var, note))
1950 {
1951 remove_note (insn, note);
1952 return;
1953 }
1954}
1955
48e1416a 1956/* Apply loop optimizations in loop copies using the
1957 data which gathered during the unrolling. Structure
375bb675 1958 OPT_INFO record that data.
48e1416a 1959
a9989fb4 1960 UNROLLING is true if we unrolled (not peeled) the loop.
1961 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1962 the loop (as it should happen in complete unrolling, but not in ordinary
1963 peeling of the loop). */
1964
1965static void
48e1416a 1966apply_opt_in_copies (struct opt_info *opt_info,
1967 unsigned n_copies, bool unrolling,
375bb675 1968 bool rewrite_original_loop)
a9989fb4 1969{
1970 unsigned i, delta;
1971 basic_block bb, orig_bb;
222dc1d3 1972 rtx_insn *insn, *orig_insn, *next;
a9989fb4 1973 struct iv_to_split ivts_templ, *ivts;
375bb675 1974 struct var_to_expand ve_templ, *ves;
48e1416a 1975
a9989fb4 1976 /* Sanity check -- we need to put initialization in the original loop
1977 body. */
1978 gcc_assert (!unrolling || rewrite_original_loop);
48e1416a 1979
a9989fb4 1980 /* Allocate the basic variables (i0). */
c1f445d2 1981 if (opt_info->insns_to_split)
b635a1a3 1982 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1983 allocate_basic_variable (ivts);
48e1416a 1984
fe672ac0 1985 for (i = opt_info->first_new_block;
1986 i < (unsigned) last_basic_block_for_fn (cfun);
1987 i++)
a9989fb4 1988 {
f5a6b05f 1989 bb = BASIC_BLOCK_FOR_FN (cfun, i);
01020a5f 1990 orig_bb = get_bb_original (bb);
48e1416a 1991
01020a5f 1992 /* bb->aux holds position in copy sequence initialized by
1993 duplicate_loop_to_header_edge. */
1994 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
a9989fb4 1995 unrolling);
75cc36a4 1996 bb->aux = 0;
a9989fb4 1997 orig_insn = BB_HEAD (orig_bb);
ef770610 1998 FOR_BB_INSNS_SAFE (bb, insn, next)
375bb675 1999 {
0e6893c5 2000 if (!INSN_P (insn)
2001 || (DEBUG_INSN_P (insn)
2002 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
375bb675 2003 continue;
48e1416a 2004
0e6893c5 2005 while (!INSN_P (orig_insn)
2006 || (DEBUG_INSN_P (orig_insn)
2007 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2008 == LABEL_DECL)))
375bb675 2009 orig_insn = NEXT_INSN (orig_insn);
48e1416a 2010
375bb675 2011 ivts_templ.insn = orig_insn;
2012 ve_templ.insn = orig_insn;
48e1416a 2013
375bb675 2014 /* Apply splitting iv optimization. */
c1f445d2 2015 if (opt_info->insns_to_split)
375bb675 2016 {
ef770610 2017 maybe_strip_eq_note_for_split_iv (opt_info, insn);
2018
c1f445d2 2019 ivts = opt_info->insns_to_split->find (&ivts_templ);
48e1416a 2020
375bb675 2021 if (ivts)
2022 {
19ef3705 2023 gcc_assert (GET_CODE (PATTERN (insn))
2024 == GET_CODE (PATTERN (orig_insn)));
48e1416a 2025
375bb675 2026 if (!delta)
2027 insert_base_initialization (ivts, insn);
2028 split_iv (ivts, insn, delta);
2029 }
2030 }
2031 /* Apply variable expansion optimization. */
c1f445d2 2032 if (unrolling && opt_info->insns_with_var_to_expand)
375bb675 2033 {
4077bf7a 2034 ves = (struct var_to_expand *)
c1f445d2 2035 opt_info->insns_with_var_to_expand->find (&ve_templ);
375bb675 2036 if (ves)
48e1416a 2037 {
19ef3705 2038 gcc_assert (GET_CODE (PATTERN (insn))
2039 == GET_CODE (PATTERN (orig_insn)));
375bb675 2040 expand_var_during_unrolling (ves, insn);
2041 }
2042 }
2043 orig_insn = NEXT_INSN (orig_insn);
2044 }
a9989fb4 2045 }
2046
2047 if (!rewrite_original_loop)
2048 return;
48e1416a 2049
375bb675 2050 /* Initialize the variable expansions in the loop preheader
48e1416a 2051 and take care of combining them at the loop exit. */
c1f445d2 2052 if (opt_info->insns_with_var_to_expand)
375bb675 2053 {
b635a1a3 2054 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2055 insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2056 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2057 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
375bb675 2058 }
48e1416a 2059
a9989fb4 2060 /* Rewrite also the original loop body. Find them as originals of the blocks
2061 in the last copied iteration, i.e. those that have
01020a5f 2062 get_bb_copy (get_bb_original (bb)) == bb. */
fe672ac0 2063 for (i = opt_info->first_new_block;
2064 i < (unsigned) last_basic_block_for_fn (cfun);
2065 i++)
a9989fb4 2066 {
f5a6b05f 2067 bb = BASIC_BLOCK_FOR_FN (cfun, i);
01020a5f 2068 orig_bb = get_bb_original (bb);
2069 if (get_bb_copy (orig_bb) != bb)
a9989fb4 2070 continue;
48e1416a 2071
a9989fb4 2072 delta = determine_split_iv_delta (0, n_copies, unrolling);
2073 for (orig_insn = BB_HEAD (orig_bb);
375bb675 2074 orig_insn != NEXT_INSN (BB_END (bb));
2075 orig_insn = next)
2076 {
2077 next = NEXT_INSN (orig_insn);
48e1416a 2078
375bb675 2079 if (!INSN_P (orig_insn))
2080 continue;
48e1416a 2081
375bb675 2082 ivts_templ.insn = orig_insn;
c1f445d2 2083 if (opt_info->insns_to_split)
375bb675 2084 {
ef770610 2085 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2086
4077bf7a 2087 ivts = (struct iv_to_split *)
c1f445d2 2088 opt_info->insns_to_split->find (&ivts_templ);
375bb675 2089 if (ivts)
2090 {
2091 if (!delta)
2092 insert_base_initialization (ivts, orig_insn);
2093 split_iv (ivts, orig_insn, delta);
2094 continue;
2095 }
2096 }
48e1416a 2097
375bb675 2098 }
2099 }
2100}
a9989fb4 2101
375bb675 2102/* Release OPT_INFO. */
a9989fb4 2103
2104static void
375bb675 2105free_opt_info (struct opt_info *opt_info)
a9989fb4 2106{
c1f445d2 2107 delete opt_info->insns_to_split;
2108 opt_info->insns_to_split = NULL;
2109 if (opt_info->insns_with_var_to_expand)
375bb675 2110 {
b635a1a3 2111 struct var_to_expand *ves;
2112
2113 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
f1f41a6c 2114 ves->var_expansions.release ();
c1f445d2 2115 delete opt_info->insns_with_var_to_expand;
2116 opt_info->insns_with_var_to_expand = NULL;
375bb675 2117 }
2118 free (opt_info);
a9989fb4 2119}