]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/loop-unroll.c
* asan.c (create_cond_insert_point): Do not update edge count.
[thirdparty/gcc.git] / gcc / loop-unroll.c
CommitLineData
c836de3f 1/* Loop unrolling.
aad93da1 2 Copyright (C) 2002-2017 Free Software Foundation, Inc.
ad5201d0 3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8c4c00c1 8Software Foundation; either version 3, or (at your option) any later
ad5201d0 9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
ad5201d0 19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
9ef16211 23#include "backend.h"
7c29e30e 24#include "target.h"
ad5201d0 25#include "rtl.h"
41a8aa41 26#include "tree.h"
7c29e30e 27#include "cfghooks.h"
ad7b10a2 28#include "memmodel.h"
7c29e30e 29#include "optabs.h"
30#include "emit-rtl.h"
31#include "recog.h"
886c1262 32#include "profile.h"
94ea8568 33#include "cfgrtl.h"
ad5201d0 34#include "cfgloop.h"
ad5201d0 35#include "params.h"
d53441c8 36#include "dojump.h"
ad5201d0 37#include "expr.h"
b9ed1410 38#include "dumpfile.h"
ad5201d0 39
c836de3f 40/* This pass performs loop unrolling. We only perform this
41 optimization on innermost loops (with single exception) because
ad5201d0 42 the impact on performance is greatest here, and we want to avoid
43 unnecessary code size growth. The gain is caused by greater sequentiality
917bbcab 44 of code, better code to optimize for further passes and in some cases
ad5201d0 45 by fewer testings of exit conditions. The main problem is code growth,
46 that impacts performance negatively due to effect of caches.
47
48 What we do:
49
ad5201d0 50 -- unrolling of loops that roll constant times; this is almost always
51 win, as we get rid of exit condition tests.
52 -- unrolling of loops that roll number of times that we can compute
53 in runtime; we also get rid of exit condition tests here, but there
54 is the extra expense for calculating the number of iterations
55 -- simple unrolling of remaining loops; this is performed only if we
56 are asked to, as the gain is questionable in this case and often
57 it may even slow down the code
58 For more detailed descriptions of each of those, see comments at
59 appropriate function below.
60
61 There is a lot of parameters (defined and described in params.def) that
c836de3f 62 control how much we unroll.
ad5201d0 63
64 ??? A great problem is that we don't have a good way how to determine
65 how many times we should unroll the loop; the experiments I have made
66 showed that this choice may affect performance in order of several %.
67 */
68
a9989fb4 69/* Information about induction variables to split. */
70
71struct iv_to_split
72{
222dc1d3 73 rtx_insn *insn; /* The insn in that the induction variable occurs. */
ef770610 74 rtx orig_var; /* The variable (register) for the IV before split. */
a9989fb4 75 rtx base_var; /* The variable on that the values in the further
76 iterations are based. */
77 rtx step; /* Step of the induction variable. */
b635a1a3 78 struct iv_to_split *next; /* Next entry in walking order. */
a9989fb4 79};
80
375bb675 81/* Information about accumulators to expand. */
82
83struct var_to_expand
84{
222dc1d3 85 rtx_insn *insn; /* The insn in that the variable expansion occurs. */
dac49aa5 86 rtx reg; /* The accumulator which is expanded. */
f1f41a6c 87 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */
b635a1a3 88 struct var_to_expand *next; /* Next entry in walking order. */
48e1416a 89 enum rtx_code op; /* The type of the accumulation - addition, subtraction
375bb675 90 or multiplication. */
91 int expansion_count; /* Count the number of expansions generated so far. */
92 int reuse_expansion; /* The expansion we intend to reuse to expand
48e1416a 93 the accumulator. If REUSE_EXPANSION is 0 reuse
94 the original accumulator. Else use
375bb675 95 var_expansions[REUSE_EXPANSION - 1]. */
96};
97
d9dd21a8 98/* Hashtable helper for iv_to_split. */
99
298e7f9a 100struct iv_split_hasher : free_ptr_hash <iv_to_split>
d9dd21a8 101{
9969c043 102 static inline hashval_t hash (const iv_to_split *);
103 static inline bool equal (const iv_to_split *, const iv_to_split *);
d9dd21a8 104};
105
106
107/* A hash function for information about insns to split. */
108
109inline hashval_t
9969c043 110iv_split_hasher::hash (const iv_to_split *ivts)
d9dd21a8 111{
112 return (hashval_t) INSN_UID (ivts->insn);
113}
114
115/* An equality functions for information about insns to split. */
116
117inline bool
9969c043 118iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
d9dd21a8 119{
120 return i1->insn == i2->insn;
121}
122
123/* Hashtable helper for iv_to_split. */
124
298e7f9a 125struct var_expand_hasher : free_ptr_hash <var_to_expand>
d9dd21a8 126{
9969c043 127 static inline hashval_t hash (const var_to_expand *);
128 static inline bool equal (const var_to_expand *, const var_to_expand *);
d9dd21a8 129};
130
131/* Return a hash for VES. */
132
133inline hashval_t
9969c043 134var_expand_hasher::hash (const var_to_expand *ves)
d9dd21a8 135{
136 return (hashval_t) INSN_UID (ves->insn);
137}
138
139/* Return true if I1 and I2 refer to the same instruction. */
140
141inline bool
9969c043 142var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
d9dd21a8 143{
144 return i1->insn == i2->insn;
145}
146
375bb675 147/* Information about optimization applied in
148 the unrolled loop. */
149
150struct opt_info
a9989fb4 151{
c1f445d2 152 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
d9dd21a8 153 split. */
b635a1a3 154 struct iv_to_split *iv_to_split_head; /* The first iv to split. */
155 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
c1f445d2 156 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
d9dd21a8 157 insns with accumulators to expand. */
b635a1a3 158 struct var_to_expand *var_to_expand_head; /* The first var to expand. */
159 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
375bb675 160 unsigned first_new_block; /* The first basic block that was
161 duplicated. */
162 basic_block loop_exit; /* The loop exit basic block. */
163 basic_block loop_preheader; /* The loop preheader basic block. */
a9989fb4 164};
165
0051c76a 166static void decide_unroll_stupid (struct loop *, int);
167static void decide_unroll_constant_iterations (struct loop *, int);
168static void decide_unroll_runtime_iterations (struct loop *, int);
7194de72 169static void unroll_loop_stupid (struct loop *);
c836de3f 170static void decide_unrolling (int);
7194de72 171static void unroll_loop_constant_iterations (struct loop *);
172static void unroll_loop_runtime_iterations (struct loop *);
375bb675 173static struct opt_info *analyze_insns_in_loop (struct loop *);
174static void opt_info_start_duplication (struct opt_info *);
175static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
176static void free_opt_info (struct opt_info *);
222dc1d3 177static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
3b3940d7 178static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
3eeb4f9a 179static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
222dc1d3 180static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
b635a1a3 181static void insert_var_expansion_initialization (struct var_to_expand *,
182 basic_block);
183static void combine_var_copies_in_loop_exit (struct var_to_expand *,
184 basic_block);
375bb675 185static rtx get_expansion (struct var_to_expand *);
ad5201d0 186
c836de3f 187/* Emit a message summarizing the unroll that will be
f55775aa 188 performed for LOOP, along with the loop's location LOCUS, if
189 appropriate given the dump or -fopt-info settings. */
190
191static void
c836de3f 192report_unroll (struct loop *loop, location_t locus)
f55775aa 193{
ffdaf8f1 194 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
f55775aa 195
830b796a 196 if (loop->lpt_decision.decision == LPT_NONE)
197 return;
198
f55775aa 199 if (!dump_enabled_p ())
200 return;
201
c836de3f 202 dump_printf_loc (report_flags, locus,
203 "loop unrolled %d times",
204 loop->lpt_decision.times);
db9cef39 205 if (profile_info && loop->header->count.initialized_p ())
f55775aa 206 dump_printf (report_flags,
c836de3f 207 " (header execution count %d)",
db9cef39 208 (int)loop->header->count.to_gcov_type ());
f55775aa 209
210 dump_printf (report_flags, "\n");
211}
212
c836de3f 213/* Decide whether unroll loops and how much. */
ad5201d0 214static void
c836de3f 215decide_unrolling (int flags)
ad5201d0 216{
17519ba0 217 struct loop *loop;
ad5201d0 218
219 /* Scan the loops, inner ones first. */
f21d4d00 220 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
ad5201d0 221 {
ad5201d0 222 loop->lpt_decision.decision = LPT_NONE;
f55775aa 223 location_t locus = get_loop_location (loop);
ad5201d0 224
f55775aa 225 if (dump_enabled_p ())
ffdaf8f1 226 dump_printf_loc (MSG_NOTE, locus,
f55775aa 227 ";; *** Considering loop %d at BB %d for "
c836de3f 228 "unrolling ***\n",
f55775aa 229 loop->num, loop->header->index);
ad5201d0 230
231 /* Do not peel cold areas. */
0bfd8d5c 232 if (optimize_loop_for_size_p (loop))
ad5201d0 233 {
450d042a 234 if (dump_file)
235 fprintf (dump_file, ";; Not considering loop, cold area\n");
ad5201d0 236 continue;
237 }
238
239 /* Can the loop be manipulated? */
240 if (!can_duplicate_loop_p (loop))
241 {
450d042a 242 if (dump_file)
243 fprintf (dump_file,
ad5201d0 244 ";; Not considering loop, cannot duplicate\n");
ad5201d0 245 continue;
246 }
247
248 /* Skip non-innermost loops. */
249 if (loop->inner)
250 {
450d042a 251 if (dump_file)
252 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
ad5201d0 253 continue;
254 }
255
256 loop->ninsns = num_loop_insns (loop);
257 loop->av_ninsns = average_num_loop_insns (loop);
258
259 /* Try transformations one by one in decreasing order of
260 priority. */
261
0051c76a 262 decide_unroll_constant_iterations (loop, flags);
ad5201d0 263 if (loop->lpt_decision.decision == LPT_NONE)
0051c76a 264 decide_unroll_runtime_iterations (loop, flags);
ad5201d0 265 if (loop->lpt_decision.decision == LPT_NONE)
0051c76a 266 decide_unroll_stupid (loop, flags);
ad5201d0 267
c836de3f 268 report_unroll (loop, locus);
ad5201d0 269 }
ad5201d0 270}
271
c836de3f 272/* Unroll LOOPS. */
273void
274unroll_loops (int flags)
ad5201d0 275{
c836de3f 276 struct loop *loop;
277 bool changed = false;
ad5201d0 278
c836de3f 279 /* Now decide rest of unrolling. */
280 decide_unrolling (flags);
ad5201d0 281
c836de3f 282 /* Scan the loops, inner ones first. */
283 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
ad5201d0 284 {
c836de3f 285 /* And perform the appropriate transformations. */
286 switch (loop->lpt_decision.decision)
3ad4992f 287 {
c836de3f 288 case LPT_UNROLL_CONSTANT:
289 unroll_loop_constant_iterations (loop);
290 changed = true;
291 break;
292 case LPT_UNROLL_RUNTIME:
293 unroll_loop_runtime_iterations (loop);
294 changed = true;
295 break;
296 case LPT_UNROLL_STUPID:
297 unroll_loop_stupid (loop);
298 changed = true;
299 break;
300 case LPT_NONE:
301 break;
302 default:
303 gcc_unreachable ();
ad5201d0 304 }
ad5201d0 305 }
306
c836de3f 307 if (changed)
308 {
309 calculate_dominance_info (CDI_DOMINATORS);
310 fix_loop_structure (NULL);
311 }
3ad4992f 312
c836de3f 313 iv_analysis_done ();
314}
ad5201d0 315
c836de3f 316/* Check whether exit of the LOOP is at the end of loop body. */
3ad4992f 317
c836de3f 318static bool
319loop_exit_at_end_p (struct loop *loop)
ad5201d0 320{
f9cce2dc 321 struct niter_desc *desc = get_simple_loop_desc (loop);
c836de3f 322 rtx_insn *insn;
a5414ff5 323
c836de3f 324 /* We should never have conditional in latch block. */
325 gcc_assert (desc->in_edge->dest != loop->header);
48e1416a 326
c836de3f 327 if (desc->in_edge->dest != loop->latch)
328 return false;
a9989fb4 329
c836de3f 330 /* Check that the latch is empty. */
331 FOR_BB_INSNS (loop->latch, insn)
332 {
333 if (INSN_P (insn) && active_insn_p (insn))
334 return false;
a5414ff5 335 }
ad5201d0 336
c836de3f 337 return true;
ad5201d0 338}
339
450d042a 340/* Decide whether to unroll LOOP iterating constant number of times
341 and how much. */
f9cce2dc 342
ad5201d0 343static void
0051c76a 344decide_unroll_constant_iterations (struct loop *loop, int flags)
ad5201d0 345{
f9cce2dc 346 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
347 struct niter_desc *desc;
5de9d3ed 348 widest_int iterations;
ad5201d0 349
350 if (!(flags & UAP_UNROLL))
351 {
352 /* We were not asked to, just return back silently. */
353 return;
354 }
355
450d042a 356 if (dump_file)
357 fprintf (dump_file,
358 "\n;; Considering unrolling loop with constant "
359 "number of iterations\n");
ad5201d0 360
361 /* nunroll = total number of copies of the original loop body in
362 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
363 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
450d042a 364 nunroll_by_av
365 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
ad5201d0 366 if (nunroll > nunroll_by_av)
367 nunroll = nunroll_by_av;
368 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
369 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
370
e7f33cf8 371 if (targetm.loop_unroll_adjust)
372 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
373
ad5201d0 374 /* Skip big loops. */
375 if (nunroll <= 1)
376 {
450d042a 377 if (dump_file)
378 fprintf (dump_file, ";; Not considering loop, is too big\n");
ad5201d0 379 return;
380 }
381
382 /* Check for simple loops. */
f9cce2dc 383 desc = get_simple_loop_desc (loop);
ad5201d0 384
385 /* Check number of iterations. */
f9cce2dc 386 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
ad5201d0 387 {
450d042a 388 if (dump_file)
389 fprintf (dump_file,
390 ";; Unable to prove that the loop iterates constant times\n");
ad5201d0 391 return;
392 }
393
c8fbcf58 394 /* Check whether the loop rolls enough to consider.
395 Consult also loop bounds and profile; in the case the loop has more
396 than one exit it may well loop less than determined maximal number
397 of iterations. */
398 if (desc->niter < 2 * nunroll
f86b328b 399 || ((get_estimated_loop_iterations (loop, &iterations)
53b01069 400 || get_likely_max_loop_iterations (loop, &iterations))
796b6678 401 && wi::ltu_p (iterations, 2 * nunroll)))
ad5201d0 402 {
450d042a 403 if (dump_file)
404 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
ad5201d0 405 return;
406 }
407
408 /* Success; now compute number of iterations to unroll. We alter
409 nunroll so that as few as possible copies of loop body are
d01481af 410 necessary, while still not decreasing the number of unrollings
ad5201d0 411 too much (at most by 1). */
412 best_copies = 2 * nunroll + 10;
413
414 i = 2 * nunroll + 2;
f9cce2dc 415 if (i - 1 >= desc->niter)
416 i = desc->niter - 2;
ad5201d0 417
418 for (; i >= nunroll - 1; i--)
419 {
f9cce2dc 420 unsigned exit_mod = desc->niter % (i + 1);
ad5201d0 421
f9cce2dc 422 if (!loop_exit_at_end_p (loop))
ad5201d0 423 n_copies = exit_mod + i + 1;
f9cce2dc 424 else if (exit_mod != (unsigned) i
425 || desc->noloop_assumptions != NULL_RTX)
ad5201d0 426 n_copies = exit_mod + i + 2;
427 else
428 n_copies = i + 1;
429
430 if (n_copies < best_copies)
431 {
432 best_copies = n_copies;
433 best_unroll = i;
434 }
435 }
436
ad5201d0 437 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
438 loop->lpt_decision.times = best_unroll;
439}
440
1c18ed19 441/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
442 The transformation does this:
3ad4992f 443
ad5201d0 444 for (i = 0; i < 102; i++)
445 body;
3ad4992f 446
1c18ed19 447 ==> (LOOP->LPT_DECISION.TIMES == 3)
3ad4992f 448
ad5201d0 449 i = 0;
450 body; i++;
451 body; i++;
452 while (i < 102)
453 {
454 body; i++;
455 body; i++;
456 body; i++;
457 body; i++;
458 }
459 */
460static void
7194de72 461unroll_loop_constant_iterations (struct loop *loop)
ad5201d0 462{
463 unsigned HOST_WIDE_INT niter;
464 unsigned exit_mod;
f3c40e6d 465 unsigned i;
f3c40e6d 466 edge e;
ad5201d0 467 unsigned max_unroll = loop->lpt_decision.times;
f9cce2dc 468 struct niter_desc *desc = get_simple_loop_desc (loop);
469 bool exit_at_end = loop_exit_at_end_p (loop);
375bb675 470 struct opt_info *opt_info = NULL;
a53ff4c1 471 bool ok;
48e1416a 472
ad5201d0 473 niter = desc->niter;
474
a9989fb4 475 /* Should not get here (such loop should be peeled instead). */
476 gcc_assert (niter > max_unroll + 1);
ad5201d0 477
478 exit_mod = niter % (max_unroll + 1);
479
3c6549f8 480 auto_sbitmap wont_exit (max_unroll + 1);
53c5d9d4 481 bitmap_ones (wont_exit);
ad5201d0 482
c2078b80 483 auto_vec<edge> remove_edges;
48e1416a 484 if (flag_split_ivs_in_unroller
375bb675 485 || flag_variable_expansion_in_unroller)
486 opt_info = analyze_insns_in_loop (loop);
48e1416a 487
f9cce2dc 488 if (!exit_at_end)
ad5201d0 489 {
f9cce2dc 490 /* The exit is not at the end of the loop; leave exit test
ad5201d0 491 in the first copy, so that the loops that start with test
492 of exit condition have continuous body after unrolling. */
493
450d042a 494 if (dump_file)
1c18ed19 495 fprintf (dump_file, ";; Condition at beginning of loop.\n");
ad5201d0 496
497 /* Peel exit_mod iterations. */
08b7917c 498 bitmap_clear_bit (wont_exit, 0);
f9cce2dc 499 if (desc->noloop_assumptions)
08b7917c 500 bitmap_clear_bit (wont_exit, 1);
ad5201d0 501
f9cce2dc 502 if (exit_mod)
503 {
375bb675 504 opt_info_start_duplication (opt_info);
a53ff4c1 505 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
7194de72 506 exit_mod,
f9cce2dc 507 wont_exit, desc->out_edge,
f3c40e6d 508 &remove_edges,
75cc36a4 509 DLTHE_FLAG_UPDATE_FREQ
510 | (opt_info && exit_mod > 1
511 ? DLTHE_RECORD_COPY_NUMBER
512 : 0));
a53ff4c1 513 gcc_assert (ok);
f9cce2dc 514
375bb675 515 if (opt_info && exit_mod > 1)
48e1416a 516 apply_opt_in_copies (opt_info, exit_mod, false, false);
517
f9cce2dc 518 desc->noloop_assumptions = NULL_RTX;
519 desc->niter -= exit_mod;
e913b5cd 520 loop->nb_iterations_upper_bound -= exit_mod;
a9ef9877 521 if (loop->any_estimate
796b6678 522 && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
e913b5cd 523 loop->nb_iterations_estimate -= exit_mod;
a9ef9877 524 else
525 loop->any_estimate = false;
8e3ffe30 526 if (loop->any_likely_upper_bound
527 && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
528 loop->nb_iterations_likely_upper_bound -= exit_mod;
529 else
530 loop->any_likely_upper_bound = false;
f9cce2dc 531 }
ad5201d0 532
08b7917c 533 bitmap_set_bit (wont_exit, 1);
ad5201d0 534 }
535 else
536 {
537 /* Leave exit test in last copy, for the same reason as above if
538 the loop tests the condition at the end of loop body. */
539
450d042a 540 if (dump_file)
1c18ed19 541 fprintf (dump_file, ";; Condition at end of loop.\n");
ad5201d0 542
543 /* We know that niter >= max_unroll + 2; so we do not need to care of
544 case when we would exit before reaching the loop. So just peel
f9cce2dc 545 exit_mod + 1 iterations. */
546 if (exit_mod != max_unroll
547 || desc->noloop_assumptions)
ad5201d0 548 {
08b7917c 549 bitmap_clear_bit (wont_exit, 0);
f9cce2dc 550 if (desc->noloop_assumptions)
08b7917c 551 bitmap_clear_bit (wont_exit, 1);
48e1416a 552
375bb675 553 opt_info_start_duplication (opt_info);
a53ff4c1 554 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
7194de72 555 exit_mod + 1,
a53ff4c1 556 wont_exit, desc->out_edge,
f3c40e6d 557 &remove_edges,
75cc36a4 558 DLTHE_FLAG_UPDATE_FREQ
559 | (opt_info && exit_mod > 0
560 ? DLTHE_RECORD_COPY_NUMBER
561 : 0));
a53ff4c1 562 gcc_assert (ok);
48e1416a 563
375bb675 564 if (opt_info && exit_mod > 0)
565 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
a9989fb4 566
f9cce2dc 567 desc->niter -= exit_mod + 1;
e913b5cd 568 loop->nb_iterations_upper_bound -= exit_mod + 1;
a9ef9877 569 if (loop->any_estimate
796b6678 570 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
e913b5cd 571 loop->nb_iterations_estimate -= exit_mod + 1;
a9ef9877 572 else
573 loop->any_estimate = false;
8e3ffe30 574 if (loop->any_likely_upper_bound
575 && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
576 loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
577 else
578 loop->any_likely_upper_bound = false;
f9cce2dc 579 desc->noloop_assumptions = NULL_RTX;
580
08b7917c 581 bitmap_set_bit (wont_exit, 0);
582 bitmap_set_bit (wont_exit, 1);
ad5201d0 583 }
584
08b7917c 585 bitmap_clear_bit (wont_exit, max_unroll);
ad5201d0 586 }
587
588 /* Now unroll the loop. */
48e1416a 589
375bb675 590 opt_info_start_duplication (opt_info);
a53ff4c1 591 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
7194de72 592 max_unroll,
a53ff4c1 593 wont_exit, desc->out_edge,
f3c40e6d 594 &remove_edges,
75cc36a4 595 DLTHE_FLAG_UPDATE_FREQ
596 | (opt_info
597 ? DLTHE_RECORD_COPY_NUMBER
598 : 0));
a53ff4c1 599 gcc_assert (ok);
ad5201d0 600
375bb675 601 if (opt_info)
a9989fb4 602 {
375bb675 603 apply_opt_in_copies (opt_info, max_unroll, true, true);
604 free_opt_info (opt_info);
a9989fb4 605 }
606
f9cce2dc 607 if (exit_at_end)
608 {
01020a5f 609 basic_block exit_block = get_bb_copy (desc->in_edge->src);
f9cce2dc 610 /* Find a new in and out edge; they are in the last copy we have made. */
48e1416a 611
cd665a06 612 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
f9cce2dc 613 {
cd665a06 614 desc->out_edge = EDGE_SUCC (exit_block, 0);
615 desc->in_edge = EDGE_SUCC (exit_block, 1);
f9cce2dc 616 }
617 else
618 {
cd665a06 619 desc->out_edge = EDGE_SUCC (exit_block, 1);
620 desc->in_edge = EDGE_SUCC (exit_block, 0);
f9cce2dc 621 }
622 }
623
624 desc->niter /= max_unroll + 1;
a9ef9877 625 loop->nb_iterations_upper_bound
796b6678 626 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
a9ef9877 627 if (loop->any_estimate)
628 loop->nb_iterations_estimate
796b6678 629 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
8e3ffe30 630 if (loop->any_likely_upper_bound)
631 loop->nb_iterations_likely_upper_bound
632 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
f9cce2dc 633 desc->niter_expr = GEN_INT (desc->niter);
634
ad5201d0 635 /* Remove the edges. */
f1f41a6c 636 FOR_EACH_VEC_ELT (remove_edges, i, e)
f3c40e6d 637 remove_path (e);
ad5201d0 638
450d042a 639 if (dump_file)
640 fprintf (dump_file,
641 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
642 max_unroll, num_loop_insns (loop));
ad5201d0 643}
644
645/* Decide whether to unroll LOOP iterating runtime computable number of times
646 and how much. */
647static void
0051c76a 648decide_unroll_runtime_iterations (struct loop *loop, int flags)
ad5201d0 649{
650 unsigned nunroll, nunroll_by_av, i;
f9cce2dc 651 struct niter_desc *desc;
5de9d3ed 652 widest_int iterations;
ad5201d0 653
654 if (!(flags & UAP_UNROLL))
655 {
656 /* We were not asked to, just return back silently. */
657 return;
658 }
659
450d042a 660 if (dump_file)
661 fprintf (dump_file,
662 "\n;; Considering unrolling loop with runtime "
663 "computable number of iterations\n");
ad5201d0 664
665 /* nunroll = total number of copies of the original loop body in
666 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
667 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
668 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
669 if (nunroll > nunroll_by_av)
670 nunroll = nunroll_by_av;
671 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
672 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
673
9ccaa774 674 if (targetm.loop_unroll_adjust)
675 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
676
ad5201d0 677 /* Skip big loops. */
678 if (nunroll <= 1)
679 {
450d042a 680 if (dump_file)
681 fprintf (dump_file, ";; Not considering loop, is too big\n");
ad5201d0 682 return;
683 }
684
685 /* Check for simple loops. */
f9cce2dc 686 desc = get_simple_loop_desc (loop);
ad5201d0 687
688 /* Check simpleness. */
f9cce2dc 689 if (!desc->simple_p || desc->assumptions)
ad5201d0 690 {
450d042a 691 if (dump_file)
692 fprintf (dump_file,
693 ";; Unable to prove that the number of iterations "
694 "can be counted in runtime\n");
ad5201d0 695 return;
696 }
697
f9cce2dc 698 if (desc->const_iter)
ad5201d0 699 {
450d042a 700 if (dump_file)
701 fprintf (dump_file, ";; Loop iterates constant times\n");
ad5201d0 702 return;
703 }
704
204a2e45 705 /* Check whether the loop rolls. */
f86b328b 706 if ((get_estimated_loop_iterations (loop, &iterations)
53b01069 707 || get_likely_max_loop_iterations (loop, &iterations))
796b6678 708 && wi::ltu_p (iterations, 2 * nunroll))
ad5201d0 709 {
450d042a 710 if (dump_file)
711 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
ad5201d0 712 return;
713 }
714
715 /* Success; now force nunroll to be power of 2, as we are unable to
716 cope with overflows in computation of number of iterations. */
f9cce2dc 717 for (i = 1; 2 * i <= nunroll; i *= 2)
718 continue;
ad5201d0 719
720 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
721 loop->lpt_decision.times = i - 1;
722}
723
d6a1bbdb 724/* Splits edge E and inserts the sequence of instructions INSNS on it, and
725 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
726 and NULL is returned instead. */
88e6f696 727
728basic_block
222dc1d3 729split_edge_and_insert (edge e, rtx_insn *insns)
88e6f696 730{
d6a1bbdb 731 basic_block bb;
732
733 if (!insns)
734 return NULL;
48e1416a 735 bb = split_edge (e);
88e6f696 736 emit_insn_after (insns, BB_END (bb));
db1c50be 737
738 /* ??? We used to assume that INSNS can contain control flow insns, and
739 that we had to try to find sub basic blocks in BB to maintain a valid
740 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
741 and call break_superblocks when going out of cfglayout mode. But it
742 turns out that this never happens; and that if it does ever happen,
8b88439e 743 the verify_flow_info at the end of the RTL loop passes would fail.
db1c50be 744
745 There are two reasons why we expected we could have control flow insns
746 in INSNS. The first is when a comparison has to be done in parts, and
747 the second is when the number of iterations is computed for loops with
748 the number of iterations known at runtime. In both cases, test cases
749 to get control flow in INSNS appear to be impossible to construct:
750
751 * If do_compare_rtx_and_jump needs several branches to do comparison
752 in a mode that needs comparison by parts, we cannot analyze the
753 number of iterations of the loop, and we never get to unrolling it.
754
755 * The code in expand_divmod that was suspected to cause creation of
756 branching code seems to be only accessed for signed division. The
757 divisions used by # of iterations analysis are always unsigned.
758 Problems might arise on architectures that emits branching code
759 for some operations that may appear in the unroller (especially
760 for division), but we have no such architectures.
761
762 Considering all this, it was decided that we should for now assume
763 that INSNS can in theory contain control flow insns, but in practice
764 it never does. So we don't handle the theoretical case, and should
765 a real failure ever show up, we have a pretty good clue for how to
766 fix it. */
767
88e6f696 768 return bb;
769}
770
b99ab275 771/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
772 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
773 in order to create a jump. */
774
222dc1d3 775static rtx_insn *
f9a00e9e 776compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
720cfc43 777 rtx_code_label *label, profile_probability prob,
778 rtx_insn *cinsn)
b99ab275 779{
f9a00e9e 780 rtx_insn *seq;
781 rtx_jump_insn *jump;
222dc1d3 782 rtx cond;
3754d046 783 machine_mode mode;
b99ab275 784
785 mode = GET_MODE (op0);
786 if (mode == VOIDmode)
787 mode = GET_MODE (op1);
788
789 start_sequence ();
790 if (GET_MODE_CLASS (mode) == MODE_CC)
791 {
792 /* A hack -- there seems to be no easy generic way how to make a
793 conditional jump from a ccmode comparison. */
794 gcc_assert (cinsn);
795 cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
796 gcc_assert (GET_CODE (cond) == comp);
797 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
798 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
799 emit_jump_insn (copy_insn (PATTERN (cinsn)));
f9a00e9e 800 jump = as_a <rtx_jump_insn *> (get_last_insn ());
b99ab275 801 JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
802 LABEL_NUSES (JUMP_LABEL (jump))++;
803 redirect_jump (jump, label, 0);
804 }
805 else
806 {
807 gcc_assert (!cinsn);
808
809 op0 = force_operand (op0, NULL_RTX);
810 op1 = force_operand (op1, NULL_RTX);
811 do_compare_rtx_and_jump (op0, op1, comp, 0,
720cfc43 812 mode, NULL_RTX, NULL, label,
813 profile_probability::uninitialized ());
f9a00e9e 814 jump = as_a <rtx_jump_insn *> (get_last_insn ());
815 jump->set_jump_target (label);
b99ab275 816 LABEL_NUSES (label)++;
817 }
720cfc43 818 if (prob.initialized_p ())
61cb1816 819 add_reg_br_prob_note (jump, prob);
b99ab275 820
821 seq = get_insns ();
822 end_sequence ();
823
824 return seq;
825}
826
1c18ed19 827/* Unroll LOOP for which we are able to count number of iterations in runtime
828 LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
ad5201d0 829 extra care for case n < 0):
3ad4992f 830
ad5201d0 831 for (i = 0; i < n; i++)
832 body;
3ad4992f 833
1c18ed19 834 ==> (LOOP->LPT_DECISION.TIMES == 3)
3ad4992f 835
ad5201d0 836 i = 0;
837 mod = n % 4;
3ad4992f 838
ad5201d0 839 switch (mod)
840 {
841 case 3:
842 body; i++;
843 case 2:
844 body; i++;
845 case 1:
846 body; i++;
847 case 0: ;
848 }
3ad4992f 849
ad5201d0 850 while (i < n)
851 {
852 body; i++;
853 body; i++;
854 body; i++;
855 body; i++;
856 }
857 */
858static void
7194de72 859unroll_loop_runtime_iterations (struct loop *loop)
ad5201d0 860{
222dc1d3 861 rtx old_niter, niter, tmp;
862 rtx_insn *init_code, *branch_code;
720cfc43 863 unsigned i, j;
864 profile_probability p;
bf866763 865 basic_block preheader, *body, swtch, ezc_swtch = NULL;
305db1a8 866 int may_exit_copy, iter_freq, new_freq;
db9cef39 867 profile_count iter_count, new_count;
f3c40e6d 868 unsigned n_peel;
f3c40e6d 869 edge e;
ad5201d0 870 bool extra_zero_check, last_may_exit;
871 unsigned max_unroll = loop->lpt_decision.times;
f9cce2dc 872 struct niter_desc *desc = get_simple_loop_desc (loop);
873 bool exit_at_end = loop_exit_at_end_p (loop);
375bb675 874 struct opt_info *opt_info = NULL;
a53ff4c1 875 bool ok;
48e1416a 876
375bb675 877 if (flag_split_ivs_in_unroller
878 || flag_variable_expansion_in_unroller)
879 opt_info = analyze_insns_in_loop (loop);
48e1416a 880
ad5201d0 881 /* Remember blocks whose dominators will have to be updated. */
c2078b80 882 auto_vec<basic_block> dom_bbs;
ad5201d0 883
884 body = get_loop_body (loop);
885 for (i = 0; i < loop->num_nodes; i++)
886 {
f1f41a6c 887 vec<basic_block> ldom;
3f9439d7 888 basic_block bb;
ad5201d0 889
3f9439d7 890 ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
f1f41a6c 891 FOR_EACH_VEC_ELT (ldom, j, bb)
3f9439d7 892 if (!flow_bb_inside_loop_p (loop, bb))
f1f41a6c 893 dom_bbs.safe_push (bb);
ad5201d0 894
f1f41a6c 895 ldom.release ();
ad5201d0 896 }
897 free (body);
898
f9cce2dc 899 if (!exit_at_end)
ad5201d0 900 {
901 /* Leave exit in first copy (for explanation why see comment in
902 unroll_loop_constant_iterations). */
903 may_exit_copy = 0;
904 n_peel = max_unroll - 1;
905 extra_zero_check = true;
906 last_may_exit = false;
907 }
908 else
909 {
910 /* Leave exit in last copy (for explanation why see comment in
911 unroll_loop_constant_iterations). */
912 may_exit_copy = max_unroll;
913 n_peel = max_unroll;
914 extra_zero_check = false;
915 last_may_exit = true;
916 }
917
918 /* Get expression for number of iterations. */
919 start_sequence ();
f9cce2dc 920 old_niter = niter = gen_reg_rtx (desc->mode);
921 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
922 if (tmp != niter)
923 emit_move_insn (niter, tmp);
ad5201d0 924
b851b8f0 925 /* For loops that exit at end and whose number of iterations is reliable,
926 add one to niter to account for first pass through loop body before
927 reaching exit test. */
928 if (exit_at_end && !desc->noloop_assumptions)
bf866763 929 {
930 niter = expand_simple_binop (desc->mode, PLUS,
931 niter, const1_rtx,
932 NULL_RTX, 0, OPTAB_LIB_WIDEN);
933 old_niter = niter;
934 }
935
ad5201d0 936 /* Count modulo by ANDing it with max_unroll; we use the fact that
937 the number of unrollings is a power of two, and thus this is correct
938 even if there is overflow in the computation. */
f9cce2dc 939 niter = expand_simple_binop (desc->mode, AND,
0359f9f5 940 niter, gen_int_mode (max_unroll, desc->mode),
ad5201d0 941 NULL_RTX, 0, OPTAB_LIB_WIDEN);
942
943 init_code = get_insns ();
944 end_sequence ();
ffbd194b 945 unshare_all_rtl_in_chain (init_code);
ad5201d0 946
947 /* Precondition the loop. */
88e6f696 948 split_edge_and_insert (loop_preheader_edge (loop), init_code);
ad5201d0 949
c2078b80 950 auto_vec<edge> remove_edges;
ad5201d0 951
3c6549f8 952 auto_sbitmap wont_exit (max_unroll + 2);
ad5201d0 953
b851b8f0 954 if (extra_zero_check || desc->noloop_assumptions)
bf866763 955 {
956 /* Peel the first copy of loop body. Leave the exit test if the number
957 of iterations is not reliable. Also record the place of the extra zero
958 check. */
959 bitmap_clear (wont_exit);
960 if (!desc->noloop_assumptions)
961 bitmap_set_bit (wont_exit, 1);
962 ezc_swtch = loop_preheader_edge (loop)->src;
963 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
964 1, wont_exit, desc->out_edge,
965 &remove_edges,
966 DLTHE_FLAG_UPDATE_FREQ);
967 gcc_assert (ok);
968 }
ad5201d0 969
970 /* Record the place where switch will be built for preconditioning. */
88e6f696 971 swtch = split_edge (loop_preheader_edge (loop));
ad5201d0 972
305db1a8 973 /* Compute frequency/count increments for each switch block and initialize
974 innermost switch block. Switch blocks and peeled loop copies are built
975 from innermost outward. */
976 iter_freq = new_freq = swtch->frequency / (max_unroll + 1);
db9cef39 977 iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
305db1a8 978 swtch->frequency = new_freq;
979 swtch->count = new_count;
305db1a8 980
ad5201d0 981 for (i = 0; i < n_peel; i++)
982 {
983 /* Peel the copy. */
53c5d9d4 984 bitmap_clear (wont_exit);
ad5201d0 985 if (i != n_peel - 1 || !last_may_exit)
08b7917c 986 bitmap_set_bit (wont_exit, 1);
a53ff4c1 987 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
7194de72 988 1, wont_exit, desc->out_edge,
f3c40e6d 989 &remove_edges,
a53ff4c1 990 DLTHE_FLAG_UPDATE_FREQ);
991 gcc_assert (ok);
ad5201d0 992
c87a3eff 993 /* Create item for switch. */
994 j = n_peel - i - (extra_zero_check ? 0 : 1);
720cfc43 995 p = profile_probability::always ().apply_scale (1, i + 2);
c87a3eff 996
88e6f696 997 preheader = split_edge (loop_preheader_edge (loop));
305db1a8 998 /* Add in frequency/count of edge from switch block. */
999 preheader->frequency += iter_freq;
1000 preheader->count += iter_count;
f9cce2dc 1001 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
a53ff4c1 1002 block_label (preheader), p,
222dc1d3 1003 NULL);
c87a3eff 1004
d6a1bbdb 1005 /* We rely on the fact that the compare and jump cannot be optimized out,
1006 and hence the cfg we create is correct. */
1007 gcc_assert (branch_code != NULL_RTX);
1008
88e6f696 1009 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
0051c76a 1010 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
720cfc43 1011 single_succ_edge (swtch)->probability = p.invert ();
305db1a8 1012 new_freq += iter_freq;
1013 new_count += iter_count;
1014 swtch->frequency = new_freq;
1015 swtch->count = new_count;
c87a3eff 1016 e = make_edge (swtch, preheader,
ea091dfd 1017 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
c87a3eff 1018 e->probability = p;
ad5201d0 1019 }
1020
1021 if (extra_zero_check)
1022 {
1023 /* Add branch for zero iterations. */
720cfc43 1024 p = profile_probability::always ().apply_scale (1, max_unroll + 1);
ad5201d0 1025 swtch = ezc_swtch;
88e6f696 1026 preheader = split_edge (loop_preheader_edge (loop));
305db1a8 1027 /* Recompute frequency/count adjustments since initial peel copy may
1028 have exited and reduced those values that were computed above. */
1029 iter_freq = swtch->frequency / (max_unroll + 1);
db9cef39 1030 iter_count = swtch->count.apply_scale (1, max_unroll + 1);
305db1a8 1031 /* Add in frequency/count of edge from switch block. */
1032 preheader->frequency += iter_freq;
1033 preheader->count += iter_count;
f9cce2dc 1034 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
a53ff4c1 1035 block_label (preheader), p,
222dc1d3 1036 NULL);
d6a1bbdb 1037 gcc_assert (branch_code != NULL_RTX);
ad5201d0 1038
88e6f696 1039 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
0051c76a 1040 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
720cfc43 1041 single_succ_edge (swtch)->probability = p.invert ();
a964683a 1042 e = make_edge (swtch, preheader,
ea091dfd 1043 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
ad5201d0 1044 e->probability = p;
1045 }
1046
1047 /* Recount dominators for outer blocks. */
3f9439d7 1048 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
ad5201d0 1049
1050 /* And unroll loop. */
1051
53c5d9d4 1052 bitmap_ones (wont_exit);
08b7917c 1053 bitmap_clear_bit (wont_exit, may_exit_copy);
375bb675 1054 opt_info_start_duplication (opt_info);
48e1416a 1055
a53ff4c1 1056 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
7194de72 1057 max_unroll,
a53ff4c1 1058 wont_exit, desc->out_edge,
f3c40e6d 1059 &remove_edges,
75cc36a4 1060 DLTHE_FLAG_UPDATE_FREQ
1061 | (opt_info
1062 ? DLTHE_RECORD_COPY_NUMBER
1063 : 0));
a53ff4c1 1064 gcc_assert (ok);
48e1416a 1065
375bb675 1066 if (opt_info)
a9989fb4 1067 {
375bb675 1068 apply_opt_in_copies (opt_info, max_unroll, true, true);
1069 free_opt_info (opt_info);
a9989fb4 1070 }
1071
f9cce2dc 1072 if (exit_at_end)
1073 {
01020a5f 1074 basic_block exit_block = get_bb_copy (desc->in_edge->src);
a53ff4c1 1075 /* Find a new in and out edge; they are in the last copy we have
1076 made. */
48e1416a 1077
cd665a06 1078 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
f9cce2dc 1079 {
cd665a06 1080 desc->out_edge = EDGE_SUCC (exit_block, 0);
1081 desc->in_edge = EDGE_SUCC (exit_block, 1);
f9cce2dc 1082 }
1083 else
1084 {
cd665a06 1085 desc->out_edge = EDGE_SUCC (exit_block, 1);
1086 desc->in_edge = EDGE_SUCC (exit_block, 0);
f9cce2dc 1087 }
1088 }
1089
ad5201d0 1090 /* Remove the edges. */
f1f41a6c 1091 FOR_EACH_VEC_ELT (remove_edges, i, e)
f3c40e6d 1092 remove_path (e);
ad5201d0 1093
f9cce2dc 1094 /* We must be careful when updating the number of iterations due to
1095 preconditioning and the fact that the value must be valid at entry
1096 of the loop. After passing through the above code, we see that
1097 the correct new number of iterations is this: */
a9989fb4 1098 gcc_assert (!desc->const_iter);
f9cce2dc 1099 desc->niter_expr =
a53ff4c1 1100 simplify_gen_binary (UDIV, desc->mode, old_niter,
5d5ee71f 1101 gen_int_mode (max_unroll + 1, desc->mode));
a9ef9877 1102 loop->nb_iterations_upper_bound
796b6678 1103 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
a9ef9877 1104 if (loop->any_estimate)
1105 loop->nb_iterations_estimate
796b6678 1106 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
8e3ffe30 1107 if (loop->any_likely_upper_bound)
1108 loop->nb_iterations_likely_upper_bound
1109 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
f9cce2dc 1110 if (exit_at_end)
1111 {
1112 desc->niter_expr =
1113 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1114 desc->noloop_assumptions = NULL_RTX;
a9ef9877 1115 --loop->nb_iterations_upper_bound;
1116 if (loop->any_estimate
e913b5cd 1117 && loop->nb_iterations_estimate != 0)
a9ef9877 1118 --loop->nb_iterations_estimate;
1119 else
1120 loop->any_estimate = false;
8e3ffe30 1121 if (loop->any_likely_upper_bound
1122 && loop->nb_iterations_likely_upper_bound != 0)
1123 --loop->nb_iterations_likely_upper_bound;
1124 else
1125 loop->any_likely_upper_bound = false;
f9cce2dc 1126 }
1127
450d042a 1128 if (dump_file)
1129 fprintf (dump_file,
1130 ";; Unrolled loop %d times, counting # of iterations "
1131 "in runtime, %i insns\n",
ad5201d0 1132 max_unroll, num_loop_insns (loop));
1133}
3ad4992f 1134
ad5201d0 1135/* Decide whether to unroll LOOP stupidly and how much. */
1136static void
0051c76a 1137decide_unroll_stupid (struct loop *loop, int flags)
ad5201d0 1138{
1139 unsigned nunroll, nunroll_by_av, i;
f9cce2dc 1140 struct niter_desc *desc;
5de9d3ed 1141 widest_int iterations;
ad5201d0 1142
1143 if (!(flags & UAP_UNROLL_ALL))
1144 {
1145 /* We were not asked to, just return back silently. */
1146 return;
1147 }
1148
450d042a 1149 if (dump_file)
1150 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
ad5201d0 1151
1152 /* nunroll = total number of copies of the original loop body in
1153 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1154 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
450d042a 1155 nunroll_by_av
1156 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
ad5201d0 1157 if (nunroll > nunroll_by_av)
1158 nunroll = nunroll_by_av;
1159 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1160 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1161
9ccaa774 1162 if (targetm.loop_unroll_adjust)
1163 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1164
ad5201d0 1165 /* Skip big loops. */
1166 if (nunroll <= 1)
1167 {
450d042a 1168 if (dump_file)
1169 fprintf (dump_file, ";; Not considering loop, is too big\n");
ad5201d0 1170 return;
1171 }
1172
1173 /* Check for simple loops. */
f9cce2dc 1174 desc = get_simple_loop_desc (loop);
ad5201d0 1175
1176 /* Check simpleness. */
f9cce2dc 1177 if (desc->simple_p && !desc->assumptions)
ad5201d0 1178 {
450d042a 1179 if (dump_file)
1180 fprintf (dump_file, ";; The loop is simple\n");
ad5201d0 1181 return;
1182 }
1183
1184 /* Do not unroll loops with branches inside -- it increases number
d9459f6b 1185 of mispredicts.
1186 TODO: this heuristic needs tunning; call inside the loop body
1187 is also relatively good reason to not unroll. */
f9cce2dc 1188 if (num_loop_branches (loop) > 1)
ad5201d0 1189 {
450d042a 1190 if (dump_file)
1191 fprintf (dump_file, ";; Not unrolling, contains branches\n");
ad5201d0 1192 return;
1193 }
1194
204a2e45 1195 /* Check whether the loop rolls. */
f86b328b 1196 if ((get_estimated_loop_iterations (loop, &iterations)
53b01069 1197 || get_likely_max_loop_iterations (loop, &iterations))
796b6678 1198 && wi::ltu_p (iterations, 2 * nunroll))
ad5201d0 1199 {
450d042a 1200 if (dump_file)
1201 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
ad5201d0 1202 return;
1203 }
1204
1205 /* Success. Now force nunroll to be power of 2, as it seems that this
d01481af 1206 improves results (partially because of better alignments, partially
ad5201d0 1207 because of some dark magic). */
f9cce2dc 1208 for (i = 1; 2 * i <= nunroll; i *= 2)
1209 continue;
ad5201d0 1210
1211 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1212 loop->lpt_decision.times = i - 1;
1213}
1214
1c18ed19 1215/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
1216
ad5201d0 1217 while (cond)
1218 body;
1219
1c18ed19 1220 ==> (LOOP->LPT_DECISION.TIMES == 3)
ad5201d0 1221
1222 while (cond)
1223 {
1224 body;
1225 if (!cond) break;
1226 body;
1227 if (!cond) break;
1228 body;
1229 if (!cond) break;
1230 body;
1231 }
1232 */
1233static void
7194de72 1234unroll_loop_stupid (struct loop *loop)
ad5201d0 1235{
ad5201d0 1236 unsigned nunroll = loop->lpt_decision.times;
f9cce2dc 1237 struct niter_desc *desc = get_simple_loop_desc (loop);
375bb675 1238 struct opt_info *opt_info = NULL;
a53ff4c1 1239 bool ok;
48e1416a 1240
375bb675 1241 if (flag_split_ivs_in_unroller
1242 || flag_variable_expansion_in_unroller)
1243 opt_info = analyze_insns_in_loop (loop);
48e1416a 1244
3c6549f8 1245 auto_sbitmap wont_exit (nunroll + 1);
53c5d9d4 1246 bitmap_clear (wont_exit);
375bb675 1247 opt_info_start_duplication (opt_info);
48e1416a 1248
a53ff4c1 1249 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
7194de72 1250 nunroll, wont_exit,
f3c40e6d 1251 NULL, NULL,
75cc36a4 1252 DLTHE_FLAG_UPDATE_FREQ
1253 | (opt_info
1254 ? DLTHE_RECORD_COPY_NUMBER
1255 : 0));
a53ff4c1 1256 gcc_assert (ok);
48e1416a 1257
375bb675 1258 if (opt_info)
a9989fb4 1259 {
375bb675 1260 apply_opt_in_copies (opt_info, nunroll, true, true);
1261 free_opt_info (opt_info);
a9989fb4 1262 }
1263
f9cce2dc 1264 if (desc->simple_p)
1265 {
1266 /* We indeed may get here provided that there are nontrivial assumptions
1267 for a loop to be really simple. We could update the counts, but the
1268 problem is that we are unable to decide which exit will be taken
1269 (not really true in case the number of iterations is constant,
75de4aa2 1270 but no one will do anything with this information, so we do not
f9cce2dc 1271 worry about it). */
1272 desc->simple_p = false;
1273 }
1274
450d042a 1275 if (dump_file)
1276 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
ad5201d0 1277 nunroll, num_loop_insns (loop));
1278}
a9989fb4 1279
3b3940d7 1280/* Returns true if REG is referenced in one nondebug insn in LOOP.
1281 Set *DEBUG_USES to the number of debug insns that reference the
1282 variable. */
375bb675 1283
f7b51129 1284static bool
3b3940d7 1285referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1286 int *debug_uses)
375bb675 1287{
1288 basic_block *body, bb;
1289 unsigned i;
1290 int count_ref = 0;
222dc1d3 1291 rtx_insn *insn;
48e1416a 1292
1293 body = get_loop_body (loop);
375bb675 1294 for (i = 0; i < loop->num_nodes; i++)
1295 {
1296 bb = body[i];
48e1416a 1297
375bb675 1298 FOR_BB_INSNS (bb, insn)
3b3940d7 1299 if (!rtx_referenced_p (reg, insn))
1300 continue;
1301 else if (DEBUG_INSN_P (insn))
1302 ++*debug_uses;
1303 else if (++count_ref > 1)
1304 break;
375bb675 1305 }
3b3940d7 1306 free (body);
375bb675 1307 return (count_ref == 1);
1308}
1309
3b3940d7 1310/* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
1311
1312static void
1313reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1314{
1315 basic_block *body, bb;
1316 unsigned i;
222dc1d3 1317 rtx_insn *insn;
3b3940d7 1318
1319 body = get_loop_body (loop);
1320 for (i = 0; debug_uses && i < loop->num_nodes; i++)
1321 {
1322 bb = body[i];
1323
1324 FOR_BB_INSNS (bb, insn)
1325 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1326 continue;
1327 else
1328 {
1329 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1330 gen_rtx_UNKNOWN_VAR_LOC (), 0);
1331 if (!--debug_uses)
1332 break;
1333 }
1334 }
1335 free (body);
1336}
1337
375bb675 1338/* Determine whether INSN contains an accumulator
48e1416a 1339 which can be expanded into separate copies,
375bb675 1340 one for each copy of the LOOP body.
48e1416a 1341
375bb675 1342 for (i = 0 ; i < n; i++)
1343 sum += a[i];
48e1416a 1344
375bb675 1345 ==>
48e1416a 1346
375bb675 1347 sum += a[i]
1348 ....
1349 i = i+1;
1350 sum1 += a[i]
1351 ....
1352 i = i+1
1353 sum2 += a[i];
1354 ....
1355
48e1416a 1356 Return NULL if INSN contains no opportunity for expansion of accumulator.
1357 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
375bb675 1358 information and return a pointer to it.
1359*/
1360
1361static struct var_to_expand *
222dc1d3 1362analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
375bb675 1363{
7497b98c 1364 rtx set, dest, src;
375bb675 1365 struct var_to_expand *ves;
0def560a 1366 unsigned accum_pos;
7497b98c 1367 enum rtx_code code;
3b3940d7 1368 int debug_uses = 0;
0def560a 1369
375bb675 1370 set = single_set (insn);
1371 if (!set)
1372 return NULL;
48e1416a 1373
375bb675 1374 dest = SET_DEST (set);
1375 src = SET_SRC (set);
7497b98c 1376 code = GET_CODE (src);
48e1416a 1377
7497b98c 1378 if (code != PLUS && code != MINUS && code != MULT && code != FMA)
375bb675 1379 return NULL;
1c6304f6 1380
7497b98c 1381 if (FLOAT_MODE_P (GET_MODE (dest)))
1382 {
1383 if (!flag_associative_math)
1384 return NULL;
1385 /* In the case of FMA, we're also changing the rounding. */
1386 if (code == FMA && !flag_unsafe_math_optimizations)
1387 return NULL;
1388 }
1389
1c6304f6 1390 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1391 in MD. But if there is no optab to generate the insn, we can not
1392 perform the variable expansion. This can happen if an MD provides
1393 an insn but not a named pattern to generate it, for example to avoid
1394 producing code that needs additional mode switches like for x87/mmx.
1395
1396 So we check have_insn_for which looks for an optab for the operation
1397 in SRC. If it doesn't exist, we can't perform the expansion even
1398 though INSN is valid. */
7497b98c 1399 if (!have_insn_for (code, GET_MODE (src)))
1c6304f6 1400 return NULL;
1401
375bb675 1402 if (!REG_P (dest)
1403 && !(GET_CODE (dest) == SUBREG
1404 && REG_P (SUBREG_REG (dest))))
1405 return NULL;
48e1416a 1406
7497b98c 1407 /* Find the accumulator use within the operation. */
1408 if (code == FMA)
1409 {
1410 /* We only support accumulation via FMA in the ADD position. */
1411 if (!rtx_equal_p (dest, XEXP (src, 2)))
1412 return NULL;
1413 accum_pos = 2;
1414 }
1415 else if (rtx_equal_p (dest, XEXP (src, 0)))
0def560a 1416 accum_pos = 0;
7497b98c 1417 else if (rtx_equal_p (dest, XEXP (src, 1)))
1418 {
1419 /* The method of expansion that we are using; which includes the
1420 initialization of the expansions with zero and the summation of
1421 the expansions at the end of the computation will yield wrong
1422 results for (x = something - x) thus avoid using it in that case. */
1423 if (code == MINUS)
1424 return NULL;
1425 accum_pos = 1;
1426 }
0def560a 1427 else
1428 return NULL;
1429
7497b98c 1430 /* It must not otherwise be used. */
1431 if (code == FMA)
1432 {
1433 if (rtx_referenced_p (dest, XEXP (src, 0))
1434 || rtx_referenced_p (dest, XEXP (src, 1)))
1435 return NULL;
1436 }
1437 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
375bb675 1438 return NULL;
48e1416a 1439
7497b98c 1440 /* It must be used in exactly one insn. */
3b3940d7 1441 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
375bb675 1442 return NULL;
48e1416a 1443
e24a805c 1444 if (dump_file)
7497b98c 1445 {
1446 fprintf (dump_file, "\n;; Expanding Accumulator ");
1447 print_rtl (dump_file, dest);
1448 fprintf (dump_file, "\n");
1449 }
e24a805c 1450
3b3940d7 1451 if (debug_uses)
1452 /* Instead of resetting the debug insns, we could replace each
1453 debug use in the loop with the sum or product of all expanded
2fbe7a32 1454 accumulators. Since we'll only know of all expansions at the
3b3940d7 1455 end, we'd have to keep track of which vars_to_expand a debug
1456 insn in the loop references, take note of each copy of the
1457 debug insn during unrolling, and when it's all done, compute
1458 the sum or product of each variable and adjust the original
1459 debug insn and each copy thereof. What a pain! */
1460 reset_debug_uses_in_loop (loop, dest, debug_uses);
1461
375bb675 1462 /* Record the accumulator to expand. */
4c36ffe6 1463 ves = XNEW (struct var_to_expand);
375bb675 1464 ves->insn = insn;
375bb675 1465 ves->reg = copy_rtx (dest);
f1f41a6c 1466 ves->var_expansions.create (1);
b635a1a3 1467 ves->next = NULL;
375bb675 1468 ves->op = GET_CODE (src);
1469 ves->expansion_count = 0;
1470 ves->reuse_expansion = 0;
48e1416a 1471 return ves;
375bb675 1472}
1473
a9989fb4 1474/* Determine whether there is an induction variable in INSN that
48e1416a 1475 we would like to split during unrolling.
375bb675 1476
1477 I.e. replace
1478
1479 i = i + 1;
1480 ...
1481 i = i + 1;
1482 ...
1483 i = i + 1;
1484 ...
1485
1486 type chains by
1487
1488 i0 = i + 1
1489 ...
1490 i = i0 + 1
1491 ...
1492 i = i0 + 2
1493 ...
1494
48e1416a 1495 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
375bb675 1496 an IV_TO_SPLIT structure, fill it with the relevant information and return a
a9989fb4 1497 pointer to it. */
1498
1499static struct iv_to_split *
3eeb4f9a 1500analyze_iv_to_split_insn (rtx_insn *insn)
a9989fb4 1501{
1502 rtx set, dest;
1503 struct rtx_iv iv;
1504 struct iv_to_split *ivts;
601e68ad 1505 scalar_int_mode mode;
a53ff4c1 1506 bool ok;
a9989fb4 1507
1508 /* For now we just split the basic induction variables. Later this may be
1509 extended for example by selecting also addresses of memory references. */
1510 set = single_set (insn);
1511 if (!set)
1512 return NULL;
1513
1514 dest = SET_DEST (set);
601e68ad 1515 if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
a9989fb4 1516 return NULL;
1517
601e68ad 1518 if (!biv_p (insn, mode, dest))
a9989fb4 1519 return NULL;
1520
3f37d465 1521 ok = iv_analyze_result (insn, dest, &iv);
e8820970 1522
1523 /* This used to be an assert under the assumption that if biv_p returns
1524 true that iv_analyze_result must also return true. However, that
1525 assumption is not strictly correct as evidenced by pr25569.
1526
1527 Returning NULL when iv_analyze_result returns false is safe and
1528 avoids the problems in pr25569 until the iv_analyze_* routines
1529 can be fixed, which is apparently hard and time consuming
1530 according to their author. */
1531 if (! ok)
1532 return NULL;
a9989fb4 1533
1534 if (iv.step == const0_rtx
1535 || iv.mode != iv.extend_mode)
1536 return NULL;
1537
1538 /* Record the insn to split. */
4c36ffe6 1539 ivts = XNEW (struct iv_to_split);
a9989fb4 1540 ivts->insn = insn;
ef770610 1541 ivts->orig_var = dest;
a9989fb4 1542 ivts->base_var = NULL_RTX;
1543 ivts->step = iv.step;
b635a1a3 1544 ivts->next = NULL;
48e1416a 1545
a9989fb4 1546 return ivts;
1547}
1548
375bb675 1549/* Determines which of insns in LOOP can be optimized.
1550 Return a OPT_INFO struct with the relevant hash tables filled
1551 with all insns to be optimized. The FIRST_NEW_BLOCK field
a9989fb4 1552 is undefined for the return value. */
1553
375bb675 1554static struct opt_info *
1555analyze_insns_in_loop (struct loop *loop)
a9989fb4 1556{
1557 basic_block *body, bb;
749ea85f 1558 unsigned i;
4c36ffe6 1559 struct opt_info *opt_info = XCNEW (struct opt_info);
3eeb4f9a 1560 rtx_insn *insn;
375bb675 1561 struct iv_to_split *ivts = NULL;
1562 struct var_to_expand *ves = NULL;
d9dd21a8 1563 iv_to_split **slot1;
1564 var_to_expand **slot2;
f1f41a6c 1565 vec<edge> edges = get_loop_exit_edges (loop);
749ea85f 1566 edge exit;
375bb675 1567 bool can_apply = false;
48e1416a 1568
a9989fb4 1569 iv_analysis_loop_init (loop);
1570
1571 body = get_loop_body (loop);
375bb675 1572
1573 if (flag_split_ivs_in_unroller)
b635a1a3 1574 {
c1f445d2 1575 opt_info->insns_to_split
1576 = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
b635a1a3 1577 opt_info->iv_to_split_head = NULL;
1578 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1579 }
48e1416a 1580
375bb675 1581 /* Record the loop exit bb and loop preheader before the unrolling. */
88e6f696 1582 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
48e1416a 1583
f1f41a6c 1584 if (edges.length () == 1)
375bb675 1585 {
f1f41a6c 1586 exit = edges[0];
749ea85f 1587 if (!(exit->flags & EDGE_COMPLEX))
1588 {
1589 opt_info->loop_exit = split_edge (exit);
1590 can_apply = true;
1591 }
375bb675 1592 }
48e1416a 1593
375bb675 1594 if (flag_variable_expansion_in_unroller
1595 && can_apply)
b635a1a3 1596 {
c1f445d2 1597 opt_info->insns_with_var_to_expand
1598 = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
b635a1a3 1599 opt_info->var_to_expand_head = NULL;
1600 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1601 }
48e1416a 1602
a9989fb4 1603 for (i = 0; i < loop->num_nodes; i++)
1604 {
1605 bb = body[i];
1606 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1607 continue;
1608
1609 FOR_BB_INSNS (bb, insn)
375bb675 1610 {
1611 if (!INSN_P (insn))
1612 continue;
48e1416a 1613
c1f445d2 1614 if (opt_info->insns_to_split)
375bb675 1615 ivts = analyze_iv_to_split_insn (insn);
48e1416a 1616
375bb675 1617 if (ivts)
1618 {
c1f445d2 1619 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
b635a1a3 1620 gcc_assert (*slot1 == NULL);
375bb675 1621 *slot1 = ivts;
b635a1a3 1622 *opt_info->iv_to_split_tail = ivts;
1623 opt_info->iv_to_split_tail = &ivts->next;
375bb675 1624 continue;
1625 }
48e1416a 1626
c1f445d2 1627 if (opt_info->insns_with_var_to_expand)
375bb675 1628 ves = analyze_insn_to_expand_var (loop, insn);
48e1416a 1629
375bb675 1630 if (ves)
1631 {
c1f445d2 1632 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
b635a1a3 1633 gcc_assert (*slot2 == NULL);
375bb675 1634 *slot2 = ves;
b635a1a3 1635 *opt_info->var_to_expand_tail = ves;
1636 opt_info->var_to_expand_tail = &ves->next;
375bb675 1637 }
1638 }
a9989fb4 1639 }
48e1416a 1640
f1f41a6c 1641 edges.release ();
a9989fb4 1642 free (body);
375bb675 1643 return opt_info;
a9989fb4 1644}
1645
1646/* Called just before loop duplication. Records start of duplicated area
375bb675 1647 to OPT_INFO. */
a9989fb4 1648
48e1416a 1649static void
375bb675 1650opt_info_start_duplication (struct opt_info *opt_info)
a9989fb4 1651{
375bb675 1652 if (opt_info)
fe672ac0 1653 opt_info->first_new_block = last_basic_block_for_fn (cfun);
a9989fb4 1654}
1655
1656/* Determine the number of iterations between initialization of the base
1657 variable and the current copy (N_COPY). N_COPIES is the total number
1658 of newly created copies. UNROLLING is true if we are unrolling
1659 (not peeling) the loop. */
1660
1661static unsigned
1662determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1663{
1664 if (unrolling)
1665 {
1666 /* If we are unrolling, initialization is done in the original loop
1667 body (number 0). */
1668 return n_copy;
1669 }
1670 else
1671 {
1672 /* If we are peeling, the copy in that the initialization occurs has
1673 number 1. The original loop (number 0) is the last. */
1674 if (n_copy)
1675 return n_copy - 1;
1676 else
1677 return n_copies;
1678 }
1679}
1680
b635a1a3 1681/* Allocate basic variable for the induction variable chain. */
a9989fb4 1682
b635a1a3 1683static void
1684allocate_basic_variable (struct iv_to_split *ivts)
a9989fb4 1685{
686fd44c 1686 rtx expr = SET_SRC (single_set (ivts->insn));
a9989fb4 1687
1688 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
a9989fb4 1689}
1690
1691/* Insert initialization of basic variable of IVTS before INSN, taking
1692 the initial value from INSN. */
1693
1694static void
222dc1d3 1695insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
a9989fb4 1696{
686fd44c 1697 rtx expr = copy_rtx (SET_SRC (single_set (insn)));
222dc1d3 1698 rtx_insn *seq;
a9989fb4 1699
1700 start_sequence ();
1701 expr = force_operand (expr, ivts->base_var);
1702 if (expr != ivts->base_var)
1703 emit_move_insn (ivts->base_var, expr);
1704 seq = get_insns ();
1705 end_sequence ();
1706
1707 emit_insn_before (seq, insn);
1708}
1709
1710/* Replace the use of induction variable described in IVTS in INSN
1711 by base variable + DELTA * step. */
1712
1713static void
222dc1d3 1714split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
a9989fb4 1715{
222dc1d3 1716 rtx expr, *loc, incr, var;
1717 rtx_insn *seq;
3754d046 1718 machine_mode mode = GET_MODE (ivts->base_var);
a9989fb4 1719 rtx src, dest, set;
1720
1721 /* Construct base + DELTA * step. */
1722 if (!delta)
1723 expr = ivts->base_var;
1724 else
1725 {
1726 incr = simplify_gen_binary (MULT, mode,
60ca8f8e 1727 copy_rtx (ivts->step),
1728 gen_int_mode (delta, mode));
a9989fb4 1729 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1730 ivts->base_var, incr);
1731 }
1732
1733 /* Figure out where to do the replacement. */
686fd44c 1734 loc = &SET_SRC (single_set (insn));
a9989fb4 1735
1736 /* If we can make the replacement right away, we're done. */
1737 if (validate_change (insn, loc, expr, 0))
1738 return;
1739
1740 /* Otherwise, force EXPR into a register and try again. */
1741 start_sequence ();
1742 var = gen_reg_rtx (mode);
1743 expr = force_operand (expr, var);
1744 if (expr != var)
1745 emit_move_insn (var, expr);
1746 seq = get_insns ();
1747 end_sequence ();
1748 emit_insn_before (seq, insn);
48e1416a 1749
a9989fb4 1750 if (validate_change (insn, loc, var, 0))
1751 return;
1752
1753 /* The last chance. Try recreating the assignment in insn
1754 completely from scratch. */
1755 set = single_set (insn);
1756 gcc_assert (set);
1757
1758 start_sequence ();
1759 *loc = var;
1760 src = copy_rtx (SET_SRC (set));
1761 dest = copy_rtx (SET_DEST (set));
1762 src = force_operand (src, dest);
1763 if (src != dest)
1764 emit_move_insn (dest, src);
1765 seq = get_insns ();
1766 end_sequence ();
48e1416a 1767
a9989fb4 1768 emit_insn_before (seq, insn);
1769 delete_insn (insn);
1770}
1771
a9989fb4 1772
6dce5fff 1773/* Return one expansion of the accumulator recorded in struct VE. */
a9989fb4 1774
375bb675 1775static rtx
1776get_expansion (struct var_to_expand *ve)
1777{
1778 rtx reg;
48e1416a 1779
375bb675 1780 if (ve->reuse_expansion == 0)
1781 reg = ve->reg;
1782 else
f1f41a6c 1783 reg = ve->var_expansions[ve->reuse_expansion - 1];
48e1416a 1784
f1f41a6c 1785 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
375bb675 1786 ve->reuse_expansion = 0;
48e1416a 1787 else
375bb675 1788 ve->reuse_expansion++;
48e1416a 1789
375bb675 1790 return reg;
1791}
a9989fb4 1792
a9989fb4 1793
48e1416a 1794/* Given INSN replace the uses of the accumulator recorded in VE
375bb675 1795 with a new register. */
1796
1797static void
222dc1d3 1798expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
375bb675 1799{
1800 rtx new_reg, set;
1801 bool really_new_expansion = false;
48e1416a 1802
375bb675 1803 set = single_set (insn);
972e95af 1804 gcc_assert (set);
48e1416a 1805
375bb675 1806 /* Generate a new register only if the expansion limit has not been
1807 reached. Else reuse an already existing expansion. */
1808 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1809 {
1810 really_new_expansion = true;
1811 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1812 }
1813 else
1814 new_reg = get_expansion (ve);
1815
d022e236 1816 validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
375bb675 1817 if (apply_change_group ())
1818 if (really_new_expansion)
1819 {
f1f41a6c 1820 ve->var_expansions.safe_push (new_reg);
375bb675 1821 ve->expansion_count++;
1822 }
1823}
1824
b635a1a3 1825/* Initialize the variable expansions in loop preheader. PLACE is the
1826 loop-preheader basic block where the initialization of the
1827 expansions should take place. The expansions are initialized with
1828 (-0) when the operation is plus or minus to honor sign zero. This
1829 way we can prevent cases where the sign of the final result is
1830 effected by the sign of the expansion. Here is an example to
1831 demonstrate this:
48e1416a 1832
428c97ad 1833 for (i = 0 ; i < n; i++)
1834 sum += something;
1835
1836 ==>
1837
1838 sum += something
1839 ....
1840 i = i+1;
1841 sum1 += something
1842 ....
1843 i = i+1
1844 sum2 += something;
1845 ....
48e1416a 1846
428c97ad 1847 When SUM is initialized with -zero and SOMETHING is also -zero; the
1848 final result of sum should be -zero thus the expansions sum1 and sum2
1849 should be initialized with -zero as well (otherwise we will get +zero
1850 as the final result). */
375bb675 1851
b635a1a3 1852static void
1853insert_var_expansion_initialization (struct var_to_expand *ve,
1854 basic_block place)
375bb675 1855{
222dc1d3 1856 rtx_insn *seq;
1857 rtx var, zero_init;
375bb675 1858 unsigned i;
3754d046 1859 machine_mode mode = GET_MODE (ve->reg);
428c97ad 1860 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1861
f1f41a6c 1862 if (ve->var_expansions.length () == 0)
b635a1a3 1863 return;
48e1416a 1864
375bb675 1865 start_sequence ();
7497b98c 1866 switch (ve->op)
1867 {
1868 case FMA:
1869 /* Note that we only accumulate FMA via the ADD operand. */
1870 case PLUS:
1871 case MINUS:
f1f41a6c 1872 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
7497b98c 1873 {
1874 if (honor_signed_zero_p)
1875 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1876 else
1877 zero_init = CONST0_RTX (mode);
1878 emit_move_insn (var, zero_init);
1879 }
1880 break;
1881
1882 case MULT:
f1f41a6c 1883 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
7497b98c 1884 {
1885 zero_init = CONST1_RTX (GET_MODE (var));
1886 emit_move_insn (var, zero_init);
1887 }
1888 break;
1889
1890 default:
1891 gcc_unreachable ();
1892 }
48e1416a 1893
375bb675 1894 seq = get_insns ();
1895 end_sequence ();
48e1416a 1896
d022e236 1897 emit_insn_after (seq, BB_END (place));
375bb675 1898}
1899
b635a1a3 1900/* Combine the variable expansions at the loop exit. PLACE is the
1901 loop exit basic block where the summation of the expansions should
1902 take place. */
375bb675 1903
b635a1a3 1904static void
1905combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
375bb675 1906{
375bb675 1907 rtx sum = ve->reg;
222dc1d3 1908 rtx expr, var;
1909 rtx_insn *seq, *insn;
375bb675 1910 unsigned i;
1911
f1f41a6c 1912 if (ve->var_expansions.length () == 0)
b635a1a3 1913 return;
48e1416a 1914
cbebc3ae 1915 /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1916 it both here and as the destination of the assignment. */
1917 sum = copy_rtx (sum);
375bb675 1918 start_sequence ();
7497b98c 1919 switch (ve->op)
1920 {
1921 case FMA:
1922 /* Note that we only accumulate FMA via the ADD operand. */
1923 case PLUS:
1924 case MINUS:
f1f41a6c 1925 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
7497b98c 1926 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1927 break;
1928
1929 case MULT:
f1f41a6c 1930 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
7497b98c 1931 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1932 break;
1933
1934 default:
1935 gcc_unreachable ();
1936 }
48e1416a 1937
375bb675 1938 expr = force_operand (sum, ve->reg);
1939 if (expr != ve->reg)
1940 emit_move_insn (ve->reg, expr);
1941 seq = get_insns ();
1942 end_sequence ();
48e1416a 1943
375bb675 1944 insn = BB_HEAD (place);
1945 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1946 insn = NEXT_INSN (insn);
1947
1948 emit_insn_after (seq, insn);
375bb675 1949}
1950
ef770610 1951/* Strip away REG_EQUAL notes for IVs we're splitting.
1952
1953 Updating REG_EQUAL notes for IVs we split is tricky: We
1954 cannot tell until after unrolling, DF-rescanning, and liveness
1955 updating, whether an EQ_USE is reached by the split IV while
1956 the IV reg is still live. See PR55006.
1957
1958 ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1959 because RTL loop-iv requires us to defer rescanning insns and
1960 any notes attached to them. So resort to old techniques... */
1961
1962static void
222dc1d3 1963maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
ef770610 1964{
1965 struct iv_to_split *ivts;
1966 rtx note = find_reg_equal_equiv_note (insn);
1967 if (! note)
1968 return;
1969 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1970 if (reg_mentioned_p (ivts->orig_var, note))
1971 {
1972 remove_note (insn, note);
1973 return;
1974 }
1975}
1976
48e1416a 1977/* Apply loop optimizations in loop copies using the
1978 data which gathered during the unrolling. Structure
375bb675 1979 OPT_INFO record that data.
48e1416a 1980
a9989fb4 1981 UNROLLING is true if we unrolled (not peeled) the loop.
1982 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1983 the loop (as it should happen in complete unrolling, but not in ordinary
1984 peeling of the loop). */
1985
1986static void
48e1416a 1987apply_opt_in_copies (struct opt_info *opt_info,
1988 unsigned n_copies, bool unrolling,
375bb675 1989 bool rewrite_original_loop)
a9989fb4 1990{
1991 unsigned i, delta;
1992 basic_block bb, orig_bb;
222dc1d3 1993 rtx_insn *insn, *orig_insn, *next;
a9989fb4 1994 struct iv_to_split ivts_templ, *ivts;
375bb675 1995 struct var_to_expand ve_templ, *ves;
48e1416a 1996
a9989fb4 1997 /* Sanity check -- we need to put initialization in the original loop
1998 body. */
1999 gcc_assert (!unrolling || rewrite_original_loop);
48e1416a 2000
a9989fb4 2001 /* Allocate the basic variables (i0). */
c1f445d2 2002 if (opt_info->insns_to_split)
b635a1a3 2003 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2004 allocate_basic_variable (ivts);
48e1416a 2005
fe672ac0 2006 for (i = opt_info->first_new_block;
2007 i < (unsigned) last_basic_block_for_fn (cfun);
2008 i++)
a9989fb4 2009 {
f5a6b05f 2010 bb = BASIC_BLOCK_FOR_FN (cfun, i);
01020a5f 2011 orig_bb = get_bb_original (bb);
48e1416a 2012
01020a5f 2013 /* bb->aux holds position in copy sequence initialized by
2014 duplicate_loop_to_header_edge. */
2015 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
a9989fb4 2016 unrolling);
75cc36a4 2017 bb->aux = 0;
a9989fb4 2018 orig_insn = BB_HEAD (orig_bb);
ef770610 2019 FOR_BB_INSNS_SAFE (bb, insn, next)
375bb675 2020 {
0e6893c5 2021 if (!INSN_P (insn)
2022 || (DEBUG_INSN_P (insn)
2023 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
375bb675 2024 continue;
48e1416a 2025
0e6893c5 2026 while (!INSN_P (orig_insn)
2027 || (DEBUG_INSN_P (orig_insn)
2028 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2029 == LABEL_DECL)))
375bb675 2030 orig_insn = NEXT_INSN (orig_insn);
48e1416a 2031
375bb675 2032 ivts_templ.insn = orig_insn;
2033 ve_templ.insn = orig_insn;
48e1416a 2034
375bb675 2035 /* Apply splitting iv optimization. */
c1f445d2 2036 if (opt_info->insns_to_split)
375bb675 2037 {
ef770610 2038 maybe_strip_eq_note_for_split_iv (opt_info, insn);
2039
c1f445d2 2040 ivts = opt_info->insns_to_split->find (&ivts_templ);
48e1416a 2041
375bb675 2042 if (ivts)
2043 {
19ef3705 2044 gcc_assert (GET_CODE (PATTERN (insn))
2045 == GET_CODE (PATTERN (orig_insn)));
48e1416a 2046
375bb675 2047 if (!delta)
2048 insert_base_initialization (ivts, insn);
2049 split_iv (ivts, insn, delta);
2050 }
2051 }
2052 /* Apply variable expansion optimization. */
c1f445d2 2053 if (unrolling && opt_info->insns_with_var_to_expand)
375bb675 2054 {
4077bf7a 2055 ves = (struct var_to_expand *)
c1f445d2 2056 opt_info->insns_with_var_to_expand->find (&ve_templ);
375bb675 2057 if (ves)
48e1416a 2058 {
19ef3705 2059 gcc_assert (GET_CODE (PATTERN (insn))
2060 == GET_CODE (PATTERN (orig_insn)));
375bb675 2061 expand_var_during_unrolling (ves, insn);
2062 }
2063 }
2064 orig_insn = NEXT_INSN (orig_insn);
2065 }
a9989fb4 2066 }
2067
2068 if (!rewrite_original_loop)
2069 return;
48e1416a 2070
375bb675 2071 /* Initialize the variable expansions in the loop preheader
48e1416a 2072 and take care of combining them at the loop exit. */
c1f445d2 2073 if (opt_info->insns_with_var_to_expand)
375bb675 2074 {
b635a1a3 2075 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2076 insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2077 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2078 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
375bb675 2079 }
48e1416a 2080
a9989fb4 2081 /* Rewrite also the original loop body. Find them as originals of the blocks
2082 in the last copied iteration, i.e. those that have
01020a5f 2083 get_bb_copy (get_bb_original (bb)) == bb. */
fe672ac0 2084 for (i = opt_info->first_new_block;
2085 i < (unsigned) last_basic_block_for_fn (cfun);
2086 i++)
a9989fb4 2087 {
f5a6b05f 2088 bb = BASIC_BLOCK_FOR_FN (cfun, i);
01020a5f 2089 orig_bb = get_bb_original (bb);
2090 if (get_bb_copy (orig_bb) != bb)
a9989fb4 2091 continue;
48e1416a 2092
a9989fb4 2093 delta = determine_split_iv_delta (0, n_copies, unrolling);
2094 for (orig_insn = BB_HEAD (orig_bb);
375bb675 2095 orig_insn != NEXT_INSN (BB_END (bb));
2096 orig_insn = next)
2097 {
2098 next = NEXT_INSN (orig_insn);
48e1416a 2099
375bb675 2100 if (!INSN_P (orig_insn))
2101 continue;
48e1416a 2102
375bb675 2103 ivts_templ.insn = orig_insn;
c1f445d2 2104 if (opt_info->insns_to_split)
375bb675 2105 {
ef770610 2106 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2107
4077bf7a 2108 ivts = (struct iv_to_split *)
c1f445d2 2109 opt_info->insns_to_split->find (&ivts_templ);
375bb675 2110 if (ivts)
2111 {
2112 if (!delta)
2113 insert_base_initialization (ivts, orig_insn);
2114 split_iv (ivts, orig_insn, delta);
2115 continue;
2116 }
2117 }
48e1416a 2118
375bb675 2119 }
2120 }
2121}
a9989fb4 2122
375bb675 2123/* Release OPT_INFO. */
a9989fb4 2124
2125static void
375bb675 2126free_opt_info (struct opt_info *opt_info)
a9989fb4 2127{
c1f445d2 2128 delete opt_info->insns_to_split;
2129 opt_info->insns_to_split = NULL;
2130 if (opt_info->insns_with_var_to_expand)
375bb675 2131 {
b635a1a3 2132 struct var_to_expand *ves;
2133
2134 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
f1f41a6c 2135 ves->var_expansions.release ();
c1f445d2 2136 delete opt_info->insns_with_var_to_expand;
2137 opt_info->insns_with_var_to_expand = NULL;
375bb675 2138 }
2139 free (opt_info);
a9989fb4 2140}