]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/loop-unroll.c
Correct a function pre/postcondition [PR102403].
[thirdparty/gcc.git] / gcc / loop-unroll.c
CommitLineData
f8934be7 1/* Loop unrolling.
99dee823 2 Copyright (C) 2002-2021 Free Software Foundation, Inc.
a29c7ea6
ZD
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
9dcd6f09 8Software Foundation; either version 3, or (at your option) any later
a29c7ea6
ZD
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
a29c7ea6
ZD
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
c7131fb2 23#include "backend.h"
957060b5 24#include "target.h"
a29c7ea6 25#include "rtl.h"
4d648807 26#include "tree.h"
957060b5 27#include "cfghooks.h"
4d0cdd0c 28#include "memmodel.h"
957060b5
AM
29#include "optabs.h"
30#include "emit-rtl.h"
31#include "recog.h"
59f2e9d8 32#include "profile.h"
60393bbc 33#include "cfgrtl.h"
a29c7ea6 34#include "cfgloop.h"
36566b39 35#include "dojump.h"
a29c7ea6 36#include "expr.h"
7ee2468b 37#include "dumpfile.h"
a29c7ea6 38
f8934be7
JH
39/* This pass performs loop unrolling. We only perform this
40 optimization on innermost loops (with single exception) because
a29c7ea6
ZD
41 the impact on performance is greatest here, and we want to avoid
42 unnecessary code size growth. The gain is caused by greater sequentiality
4d6922ee 43 of code, better code to optimize for further passes and in some cases
a29c7ea6
ZD
44 by fewer testings of exit conditions. The main problem is code growth,
45 that impacts performance negatively due to effect of caches.
46
47 What we do:
48
a29c7ea6
ZD
49 -- unrolling of loops that roll constant times; this is almost always
50 win, as we get rid of exit condition tests.
51 -- unrolling of loops that roll number of times that we can compute
52 in runtime; we also get rid of exit condition tests here, but there
53 is the extra expense for calculating the number of iterations
54 -- simple unrolling of remaining loops; this is performed only if we
55 are asked to, as the gain is questionable in this case and often
56 it may even slow down the code
57 For more detailed descriptions of each of those, see comments at
58 appropriate function below.
59
60 There is a lot of parameters (defined and described in params.def) that
f8934be7 61 control how much we unroll.
a29c7ea6
ZD
62
63 ??? A great problem is that we don't have a good way how to determine
64 how many times we should unroll the loop; the experiments I have made
65 showed that this choice may affect performance in order of several %.
66 */
67
113d659a
ZD
68/* Information about induction variables to split. */
69
70struct iv_to_split
71{
95ecfb26 72 rtx_insn *insn; /* The insn in that the induction variable occurs. */
4a0c3fde 73 rtx orig_var; /* The variable (register) for the IV before split. */
113d659a
ZD
74 rtx base_var; /* The variable on that the values in the further
75 iterations are based. */
76 rtx step; /* Step of the induction variable. */
a9f6ecee 77 struct iv_to_split *next; /* Next entry in walking order. */
113d659a
ZD
78};
79
f37a4f14
RE
80/* Information about accumulators to expand. */
81
82struct var_to_expand
83{
95ecfb26 84 rtx_insn *insn; /* The insn in that the variable expansion occurs. */
471854f8 85 rtx reg; /* The accumulator which is expanded. */
9771b263 86 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */
a9f6ecee 87 struct var_to_expand *next; /* Next entry in walking order. */
b8698a0f 88 enum rtx_code op; /* The type of the accumulation - addition, subtraction
f37a4f14
RE
89 or multiplication. */
90 int expansion_count; /* Count the number of expansions generated so far. */
91 int reuse_expansion; /* The expansion we intend to reuse to expand
b8698a0f
L
92 the accumulator. If REUSE_EXPANSION is 0 reuse
93 the original accumulator. Else use
f37a4f14
RE
94 var_expansions[REUSE_EXPANSION - 1]. */
95};
96
4a8fb1a1
LC
97/* Hashtable helper for iv_to_split. */
98
95fbe13e 99struct iv_split_hasher : free_ptr_hash <iv_to_split>
4a8fb1a1 100{
67f58944
TS
101 static inline hashval_t hash (const iv_to_split *);
102 static inline bool equal (const iv_to_split *, const iv_to_split *);
4a8fb1a1
LC
103};
104
105
106/* A hash function for information about insns to split. */
107
108inline hashval_t
67f58944 109iv_split_hasher::hash (const iv_to_split *ivts)
4a8fb1a1
LC
110{
111 return (hashval_t) INSN_UID (ivts->insn);
112}
113
114/* An equality functions for information about insns to split. */
115
116inline bool
67f58944 117iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
4a8fb1a1
LC
118{
119 return i1->insn == i2->insn;
120}
121
122/* Hashtable helper for iv_to_split. */
123
95fbe13e 124struct var_expand_hasher : free_ptr_hash <var_to_expand>
4a8fb1a1 125{
67f58944
TS
126 static inline hashval_t hash (const var_to_expand *);
127 static inline bool equal (const var_to_expand *, const var_to_expand *);
4a8fb1a1
LC
128};
129
130/* Return a hash for VES. */
131
132inline hashval_t
67f58944 133var_expand_hasher::hash (const var_to_expand *ves)
4a8fb1a1
LC
134{
135 return (hashval_t) INSN_UID (ves->insn);
136}
137
138/* Return true if I1 and I2 refer to the same instruction. */
139
140inline bool
67f58944 141var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
4a8fb1a1
LC
142{
143 return i1->insn == i2->insn;
144}
145
f37a4f14
RE
146/* Information about optimization applied in
147 the unrolled loop. */
148
149struct opt_info
113d659a 150{
c203e8a7 151 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
4a8fb1a1 152 split. */
a9f6ecee
AO
153 struct iv_to_split *iv_to_split_head; /* The first iv to split. */
154 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
c203e8a7 155 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
4a8fb1a1 156 insns with accumulators to expand. */
a9f6ecee
AO
157 struct var_to_expand *var_to_expand_head; /* The first var to expand. */
158 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
f37a4f14
RE
159 unsigned first_new_block; /* The first basic block that was
160 duplicated. */
161 basic_block loop_exit; /* The loop exit basic block. */
162 basic_block loop_preheader; /* The loop preheader basic block. */
113d659a
ZD
163};
164
99b1c316
MS
165static void decide_unroll_stupid (class loop *, int);
166static void decide_unroll_constant_iterations (class loop *, int);
167static void decide_unroll_runtime_iterations (class loop *, int);
168static void unroll_loop_stupid (class loop *);
f8934be7 169static void decide_unrolling (int);
99b1c316
MS
170static void unroll_loop_constant_iterations (class loop *);
171static void unroll_loop_runtime_iterations (class loop *);
172static struct opt_info *analyze_insns_in_loop (class loop *);
f37a4f14
RE
173static void opt_info_start_duplication (struct opt_info *);
174static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
175static void free_opt_info (struct opt_info *);
99b1c316
MS
176static struct var_to_expand *analyze_insn_to_expand_var (class loop*, rtx_insn *);
177static bool referenced_in_one_insn_in_loop_p (class loop *, rtx, int *);
1b20d55a 178static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
95ecfb26 179static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
a9f6ecee
AO
180static void insert_var_expansion_initialization (struct var_to_expand *,
181 basic_block);
182static void combine_var_copies_in_loop_exit (struct var_to_expand *,
183 basic_block);
f37a4f14 184static rtx get_expansion (struct var_to_expand *);
a29c7ea6 185
f8934be7 186/* Emit a message summarizing the unroll that will be
e25a6711
TJ
187 performed for LOOP, along with the loop's location LOCUS, if
188 appropriate given the dump or -fopt-info settings. */
189
190static void
99b1c316 191report_unroll (class loop *loop, dump_location_t locus)
e25a6711 192{
8264c84d 193 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
e25a6711 194
ad4db775
TJ
195 if (loop->lpt_decision.decision == LPT_NONE)
196 return;
197
e25a6711
TJ
198 if (!dump_enabled_p ())
199 return;
200
3da39f52
DM
201 dump_metadata_t metadata (report_flags, locus.get_impl_location ());
202 dump_printf_loc (metadata, locus.get_user_location (),
f8934be7
JH
203 "loop unrolled %d times",
204 loop->lpt_decision.times);
3995f3a2 205 if (profile_info && loop->header->count.initialized_p ())
3da39f52 206 dump_printf (metadata,
f8934be7 207 " (header execution count %d)",
3995f3a2 208 (int)loop->header->count.to_gcov_type ());
e25a6711 209
3da39f52 210 dump_printf (metadata, "\n");
e25a6711
TJ
211}
212
f8934be7 213/* Decide whether unroll loops and how much. */
a29c7ea6 214static void
f8934be7 215decide_unrolling (int flags)
a29c7ea6 216{
a29c7ea6 217 /* Scan the loops, inner ones first. */
e41ba804 218 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
a29c7ea6 219 {
a29c7ea6 220 loop->lpt_decision.decision = LPT_NONE;
4f5b9c80 221 dump_user_location_t locus = get_loop_location (loop);
a29c7ea6 222
e25a6711 223 if (dump_enabled_p ())
8264c84d 224 dump_printf_loc (MSG_NOTE, locus,
ac9effed
EB
225 "considering unrolling loop %d at BB %d\n",
226 loop->num, loop->header->index);
227
228 if (loop->unroll == 1)
229 {
230 if (dump_file)
231 fprintf (dump_file,
232 ";; Not unrolling loop, user didn't want it unrolled\n");
233 continue;
234 }
a29c7ea6
ZD
235
236 /* Do not peel cold areas. */
efd8f750 237 if (optimize_loop_for_size_p (loop))
a29c7ea6 238 {
c263766c
RH
239 if (dump_file)
240 fprintf (dump_file, ";; Not considering loop, cold area\n");
a29c7ea6
ZD
241 continue;
242 }
243
244 /* Can the loop be manipulated? */
245 if (!can_duplicate_loop_p (loop))
246 {
c263766c
RH
247 if (dump_file)
248 fprintf (dump_file,
a29c7ea6 249 ";; Not considering loop, cannot duplicate\n");
a29c7ea6
ZD
250 continue;
251 }
252
253 /* Skip non-innermost loops. */
254 if (loop->inner)
255 {
c263766c
RH
256 if (dump_file)
257 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
a29c7ea6
ZD
258 continue;
259 }
260
261 loop->ninsns = num_loop_insns (loop);
262 loop->av_ninsns = average_num_loop_insns (loop);
263
ac9effed 264 /* Try transformations one by one in decreasing order of priority. */
d47cc544 265 decide_unroll_constant_iterations (loop, flags);
a29c7ea6 266 if (loop->lpt_decision.decision == LPT_NONE)
d47cc544 267 decide_unroll_runtime_iterations (loop, flags);
a29c7ea6 268 if (loop->lpt_decision.decision == LPT_NONE)
d47cc544 269 decide_unroll_stupid (loop, flags);
a29c7ea6 270
f8934be7 271 report_unroll (loop, locus);
a29c7ea6 272 }
a29c7ea6
ZD
273}
274
f8934be7
JH
275/* Unroll LOOPS. */
276void
277unroll_loops (int flags)
a29c7ea6 278{
f8934be7 279 bool changed = false;
a29c7ea6 280
f8934be7
JH
281 /* Now decide rest of unrolling. */
282 decide_unrolling (flags);
a29c7ea6 283
f8934be7 284 /* Scan the loops, inner ones first. */
e41ba804 285 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
a29c7ea6 286 {
f8934be7
JH
287 /* And perform the appropriate transformations. */
288 switch (loop->lpt_decision.decision)
0c20a65f 289 {
f8934be7
JH
290 case LPT_UNROLL_CONSTANT:
291 unroll_loop_constant_iterations (loop);
292 changed = true;
293 break;
294 case LPT_UNROLL_RUNTIME:
295 unroll_loop_runtime_iterations (loop);
296 changed = true;
297 break;
298 case LPT_UNROLL_STUPID:
299 unroll_loop_stupid (loop);
300 changed = true;
301 break;
302 case LPT_NONE:
303 break;
304 default:
305 gcc_unreachable ();
a29c7ea6 306 }
a29c7ea6
ZD
307 }
308
f8934be7
JH
309 if (changed)
310 {
311 calculate_dominance_info (CDI_DOMINATORS);
312 fix_loop_structure (NULL);
313 }
0c20a65f 314
f8934be7
JH
315 iv_analysis_done ();
316}
a29c7ea6 317
f8934be7 318/* Check whether exit of the LOOP is at the end of loop body. */
0c20a65f 319
f8934be7 320static bool
99b1c316 321loop_exit_at_end_p (class loop *loop)
a29c7ea6 322{
99b1c316 323 class niter_desc *desc = get_simple_loop_desc (loop);
f8934be7 324 rtx_insn *insn;
35b07080 325
f8934be7
JH
326 /* We should never have conditional in latch block. */
327 gcc_assert (desc->in_edge->dest != loop->header);
b8698a0f 328
f8934be7
JH
329 if (desc->in_edge->dest != loop->latch)
330 return false;
113d659a 331
f8934be7
JH
332 /* Check that the latch is empty. */
333 FOR_BB_INSNS (loop->latch, insn)
334 {
335 if (INSN_P (insn) && active_insn_p (insn))
336 return false;
35b07080 337 }
a29c7ea6 338
f8934be7 339 return true;
a29c7ea6
ZD
340}
341
c263766c
RH
342/* Decide whether to unroll LOOP iterating constant number of times
343 and how much. */
50654f6c 344
a29c7ea6 345static void
99b1c316 346decide_unroll_constant_iterations (class loop *loop, int flags)
a29c7ea6 347{
50654f6c 348 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
99b1c316 349 class niter_desc *desc;
807e902e 350 widest_int iterations;
a29c7ea6 351
ac9effed
EB
352 /* If we were not asked to unroll this loop, just return back silently. */
353 if (!(flags & UAP_UNROLL) && !loop->unroll)
354 return;
a29c7ea6 355
ac9effed
EB
356 if (dump_enabled_p ())
357 dump_printf (MSG_NOTE,
358 "considering unrolling loop with constant "
359 "number of iterations\n");
a29c7ea6
ZD
360
361 /* nunroll = total number of copies of the original loop body in
ac9effed 362 unrolled loop (i.e. if it is 2, we have to duplicate loop body once). */
028d4092 363 nunroll = param_max_unrolled_insns / loop->ninsns;
c263766c 364 nunroll_by_av
028d4092 365 = param_max_average_unrolled_insns / loop->av_ninsns;
a29c7ea6
ZD
366 if (nunroll > nunroll_by_av)
367 nunroll = nunroll_by_av;
028d4092
ML
368 if (nunroll > (unsigned) param_max_unroll_times)
369 nunroll = param_max_unroll_times;
a29c7ea6 370
df0f3392
GG
371 if (targetm.loop_unroll_adjust)
372 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
373
a29c7ea6
ZD
374 /* Skip big loops. */
375 if (nunroll <= 1)
376 {
c263766c
RH
377 if (dump_file)
378 fprintf (dump_file, ";; Not considering loop, is too big\n");
a29c7ea6
ZD
379 return;
380 }
381
382 /* Check for simple loops. */
50654f6c 383 desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
384
385 /* Check number of iterations. */
50654f6c 386 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
a29c7ea6 387 {
c263766c
RH
388 if (dump_file)
389 fprintf (dump_file,
390 ";; Unable to prove that the loop iterates constant times\n");
a29c7ea6
ZD
391 return;
392 }
393
ac9effed 394 /* Check for an explicit unrolling factor. */
ff27462e 395 if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
ac9effed
EB
396 {
397 /* However we cannot unroll completely at the RTL level a loop with
398 constant number of iterations; it should have been peeled instead. */
4f683506 399 if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1)
ac9effed
EB
400 {
401 if (dump_file)
402 fprintf (dump_file, ";; Loop should have been peeled\n");
403 }
404 else
405 {
406 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
407 loop->lpt_decision.times = loop->unroll - 1;
408 }
409 return;
410 }
411
e598c332
JH
412 /* Check whether the loop rolls enough to consider.
413 Consult also loop bounds and profile; in the case the loop has more
414 than one exit it may well loop less than determined maximal number
415 of iterations. */
416 if (desc->niter < 2 * nunroll
71343877 417 || ((get_estimated_loop_iterations (loop, &iterations)
8c383600 418 || get_likely_max_loop_iterations (loop, &iterations))
807e902e 419 && wi::ltu_p (iterations, 2 * nunroll)))
a29c7ea6 420 {
c263766c
RH
421 if (dump_file)
422 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
a29c7ea6
ZD
423 return;
424 }
425
426 /* Success; now compute number of iterations to unroll. We alter
427 nunroll so that as few as possible copies of loop body are
e0bb17a8 428 necessary, while still not decreasing the number of unrollings
a29c7ea6
ZD
429 too much (at most by 1). */
430 best_copies = 2 * nunroll + 10;
431
432 i = 2 * nunroll + 2;
ac9effed 433 if (i > desc->niter - 2)
50654f6c 434 i = desc->niter - 2;
a29c7ea6
ZD
435
436 for (; i >= nunroll - 1; i--)
437 {
50654f6c 438 unsigned exit_mod = desc->niter % (i + 1);
a29c7ea6 439
50654f6c 440 if (!loop_exit_at_end_p (loop))
a29c7ea6 441 n_copies = exit_mod + i + 1;
50654f6c
ZD
442 else if (exit_mod != (unsigned) i
443 || desc->noloop_assumptions != NULL_RTX)
a29c7ea6
ZD
444 n_copies = exit_mod + i + 2;
445 else
446 n_copies = i + 1;
447
448 if (n_copies < best_copies)
449 {
450 best_copies = n_copies;
451 best_unroll = i;
452 }
453 }
454
a29c7ea6
ZD
455 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
456 loop->lpt_decision.times = best_unroll;
457}
458
4fc2e37d
EB
459/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
460 The transformation does this:
0c20a65f 461
a29c7ea6
ZD
462 for (i = 0; i < 102; i++)
463 body;
0c20a65f 464
4fc2e37d 465 ==> (LOOP->LPT_DECISION.TIMES == 3)
0c20a65f 466
a29c7ea6
ZD
467 i = 0;
468 body; i++;
469 body; i++;
470 while (i < 102)
471 {
472 body; i++;
473 body; i++;
474 body; i++;
475 body; i++;
476 }
477 */
478static void
99b1c316 479unroll_loop_constant_iterations (class loop *loop)
a29c7ea6
ZD
480{
481 unsigned HOST_WIDE_INT niter;
482 unsigned exit_mod;
ee8c1b05 483 unsigned i;
ee8c1b05 484 edge e;
a29c7ea6 485 unsigned max_unroll = loop->lpt_decision.times;
99b1c316 486 class niter_desc *desc = get_simple_loop_desc (loop);
50654f6c 487 bool exit_at_end = loop_exit_at_end_p (loop);
f37a4f14 488 struct opt_info *opt_info = NULL;
41806d92 489 bool ok;
b8698a0f 490
a29c7ea6
ZD
491 niter = desc->niter;
492
113d659a
ZD
493 /* Should not get here (such loop should be peeled instead). */
494 gcc_assert (niter > max_unroll + 1);
a29c7ea6
ZD
495
496 exit_mod = niter % (max_unroll + 1);
497
f389e798 498 auto_sbitmap wont_exit (max_unroll + 2);
f61e445a 499 bitmap_ones (wont_exit);
a29c7ea6 500
ef062b13 501 auto_vec<edge> remove_edges;
b8698a0f 502 if (flag_split_ivs_in_unroller
f37a4f14
RE
503 || flag_variable_expansion_in_unroller)
504 opt_info = analyze_insns_in_loop (loop);
b8698a0f 505
50654f6c 506 if (!exit_at_end)
a29c7ea6 507 {
50654f6c 508 /* The exit is not at the end of the loop; leave exit test
a29c7ea6
ZD
509 in the first copy, so that the loops that start with test
510 of exit condition have continuous body after unrolling. */
511
c263766c 512 if (dump_file)
4fc2e37d 513 fprintf (dump_file, ";; Condition at beginning of loop.\n");
a29c7ea6
ZD
514
515 /* Peel exit_mod iterations. */
d7c028c0 516 bitmap_clear_bit (wont_exit, 0);
50654f6c 517 if (desc->noloop_assumptions)
d7c028c0 518 bitmap_clear_bit (wont_exit, 1);
a29c7ea6 519
50654f6c
ZD
520 if (exit_mod)
521 {
f37a4f14 522 opt_info_start_duplication (opt_info);
41806d92 523 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
d73be268 524 exit_mod,
50654f6c 525 wont_exit, desc->out_edge,
ee8c1b05 526 &remove_edges,
7f7b1718
JH
527 DLTHE_FLAG_UPDATE_FREQ
528 | (opt_info && exit_mod > 1
529 ? DLTHE_RECORD_COPY_NUMBER
530 : 0));
41806d92 531 gcc_assert (ok);
50654f6c 532
f37a4f14 533 if (opt_info && exit_mod > 1)
b8698a0f
L
534 apply_opt_in_copies (opt_info, exit_mod, false, false);
535
50654f6c
ZD
536 desc->noloop_assumptions = NULL_RTX;
537 desc->niter -= exit_mod;
807e902e 538 loop->nb_iterations_upper_bound -= exit_mod;
e3a8f1fa 539 if (loop->any_estimate
807e902e
KZ
540 && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
541 loop->nb_iterations_estimate -= exit_mod;
e3a8f1fa
JH
542 else
543 loop->any_estimate = false;
105e29c5
JH
544 if (loop->any_likely_upper_bound
545 && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
546 loop->nb_iterations_likely_upper_bound -= exit_mod;
547 else
548 loop->any_likely_upper_bound = false;
50654f6c 549 }
a29c7ea6 550
d7c028c0 551 bitmap_set_bit (wont_exit, 1);
a29c7ea6
ZD
552 }
553 else
554 {
555 /* Leave exit test in last copy, for the same reason as above if
556 the loop tests the condition at the end of loop body. */
557
c263766c 558 if (dump_file)
4fc2e37d 559 fprintf (dump_file, ";; Condition at end of loop.\n");
a29c7ea6
ZD
560
561 /* We know that niter >= max_unroll + 2; so we do not need to care of
562 case when we would exit before reaching the loop. So just peel
50654f6c
ZD
563 exit_mod + 1 iterations. */
564 if (exit_mod != max_unroll
565 || desc->noloop_assumptions)
a29c7ea6 566 {
d7c028c0 567 bitmap_clear_bit (wont_exit, 0);
50654f6c 568 if (desc->noloop_assumptions)
d7c028c0 569 bitmap_clear_bit (wont_exit, 1);
b8698a0f 570
f37a4f14 571 opt_info_start_duplication (opt_info);
41806d92 572 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
d73be268 573 exit_mod + 1,
41806d92 574 wont_exit, desc->out_edge,
ee8c1b05 575 &remove_edges,
7f7b1718
JH
576 DLTHE_FLAG_UPDATE_FREQ
577 | (opt_info && exit_mod > 0
578 ? DLTHE_RECORD_COPY_NUMBER
579 : 0));
41806d92 580 gcc_assert (ok);
b8698a0f 581
f37a4f14
RE
582 if (opt_info && exit_mod > 0)
583 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
113d659a 584
50654f6c 585 desc->niter -= exit_mod + 1;
807e902e 586 loop->nb_iterations_upper_bound -= exit_mod + 1;
e3a8f1fa 587 if (loop->any_estimate
807e902e
KZ
588 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
589 loop->nb_iterations_estimate -= exit_mod + 1;
e3a8f1fa
JH
590 else
591 loop->any_estimate = false;
105e29c5
JH
592 if (loop->any_likely_upper_bound
593 && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
594 loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
595 else
596 loop->any_likely_upper_bound = false;
50654f6c
ZD
597 desc->noloop_assumptions = NULL_RTX;
598
d7c028c0
LC
599 bitmap_set_bit (wont_exit, 0);
600 bitmap_set_bit (wont_exit, 1);
a29c7ea6
ZD
601 }
602
d7c028c0 603 bitmap_clear_bit (wont_exit, max_unroll);
a29c7ea6
ZD
604 }
605
606 /* Now unroll the loop. */
b8698a0f 607
f37a4f14 608 opt_info_start_duplication (opt_info);
41806d92 609 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
d73be268 610 max_unroll,
41806d92 611 wont_exit, desc->out_edge,
ee8c1b05 612 &remove_edges,
7f7b1718
JH
613 DLTHE_FLAG_UPDATE_FREQ
614 | (opt_info
615 ? DLTHE_RECORD_COPY_NUMBER
616 : 0));
41806d92 617 gcc_assert (ok);
a29c7ea6 618
f37a4f14 619 if (opt_info)
113d659a 620 {
f37a4f14
RE
621 apply_opt_in_copies (opt_info, max_unroll, true, true);
622 free_opt_info (opt_info);
113d659a
ZD
623 }
624
50654f6c
ZD
625 if (exit_at_end)
626 {
6580ee77 627 basic_block exit_block = get_bb_copy (desc->in_edge->src);
50654f6c 628 /* Find a new in and out edge; they are in the last copy we have made. */
b8698a0f 629
628f6a4e 630 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
50654f6c 631 {
628f6a4e
BE
632 desc->out_edge = EDGE_SUCC (exit_block, 0);
633 desc->in_edge = EDGE_SUCC (exit_block, 1);
50654f6c
ZD
634 }
635 else
636 {
628f6a4e
BE
637 desc->out_edge = EDGE_SUCC (exit_block, 1);
638 desc->in_edge = EDGE_SUCC (exit_block, 0);
50654f6c
ZD
639 }
640 }
641
642 desc->niter /= max_unroll + 1;
e3a8f1fa 643 loop->nb_iterations_upper_bound
807e902e 644 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
e3a8f1fa
JH
645 if (loop->any_estimate)
646 loop->nb_iterations_estimate
807e902e 647 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
105e29c5
JH
648 if (loop->any_likely_upper_bound)
649 loop->nb_iterations_likely_upper_bound
650 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
2a23a1c3 651 desc->niter_expr = gen_int_mode (desc->niter, desc->mode);
50654f6c 652
a29c7ea6 653 /* Remove the edges. */
9771b263 654 FOR_EACH_VEC_ELT (remove_edges, i, e)
ee8c1b05 655 remove_path (e);
a29c7ea6 656
c263766c
RH
657 if (dump_file)
658 fprintf (dump_file,
659 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
660 max_unroll, num_loop_insns (loop));
a29c7ea6
ZD
661}
662
663/* Decide whether to unroll LOOP iterating runtime computable number of times
664 and how much. */
665static void
99b1c316 666decide_unroll_runtime_iterations (class loop *loop, int flags)
a29c7ea6
ZD
667{
668 unsigned nunroll, nunroll_by_av, i;
99b1c316 669 class niter_desc *desc;
807e902e 670 widest_int iterations;
a29c7ea6 671
ac9effed
EB
672 /* If we were not asked to unroll this loop, just return back silently. */
673 if (!(flags & UAP_UNROLL) && !loop->unroll)
674 return;
a29c7ea6 675
ac9effed
EB
676 if (dump_enabled_p ())
677 dump_printf (MSG_NOTE,
678 "considering unrolling loop with runtime-"
679 "computable number of iterations\n");
a29c7ea6
ZD
680
681 /* nunroll = total number of copies of the original loop body in
682 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
028d4092
ML
683 nunroll = param_max_unrolled_insns / loop->ninsns;
684 nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
a29c7ea6
ZD
685 if (nunroll > nunroll_by_av)
686 nunroll = nunroll_by_av;
028d4092
ML
687 if (nunroll > (unsigned) param_max_unroll_times)
688 nunroll = param_max_unroll_times;
a29c7ea6 689
40ac4f73
CB
690 if (targetm.loop_unroll_adjust)
691 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
692
ff27462e 693 if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
ac9effed
EB
694 nunroll = loop->unroll;
695
a29c7ea6
ZD
696 /* Skip big loops. */
697 if (nunroll <= 1)
698 {
c263766c
RH
699 if (dump_file)
700 fprintf (dump_file, ";; Not considering loop, is too big\n");
a29c7ea6
ZD
701 return;
702 }
703
704 /* Check for simple loops. */
50654f6c 705 desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
706
707 /* Check simpleness. */
50654f6c 708 if (!desc->simple_p || desc->assumptions)
a29c7ea6 709 {
c263766c
RH
710 if (dump_file)
711 fprintf (dump_file,
712 ";; Unable to prove that the number of iterations "
713 "can be counted in runtime\n");
a29c7ea6
ZD
714 return;
715 }
716
50654f6c 717 if (desc->const_iter)
a29c7ea6 718 {
c263766c
RH
719 if (dump_file)
720 fprintf (dump_file, ";; Loop iterates constant times\n");
a29c7ea6
ZD
721 return;
722 }
723
73367f92 724 /* Check whether the loop rolls. */
71343877 725 if ((get_estimated_loop_iterations (loop, &iterations)
8c383600 726 || get_likely_max_loop_iterations (loop, &iterations))
807e902e 727 && wi::ltu_p (iterations, 2 * nunroll))
a29c7ea6 728 {
c263766c
RH
729 if (dump_file)
730 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
a29c7ea6
ZD
731 return;
732 }
733
ac9effed
EB
734 /* Success; now force nunroll to be power of 2, as code-gen
735 requires it, we are unable to cope with overflows in
736 computation of number of iterations. */
50654f6c
ZD
737 for (i = 1; 2 * i <= nunroll; i *= 2)
738 continue;
a29c7ea6
ZD
739
740 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
741 loop->lpt_decision.times = i - 1;
742}
743
7d93d987
ZD
744/* Splits edge E and inserts the sequence of instructions INSNS on it, and
745 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
746 and NULL is returned instead. */
598ec7bd
ZD
747
748basic_block
95ecfb26 749split_edge_and_insert (edge e, rtx_insn *insns)
598ec7bd 750{
7d93d987
ZD
751 basic_block bb;
752
753 if (!insns)
754 return NULL;
b8698a0f 755 bb = split_edge (e);
598ec7bd 756 emit_insn_after (insns, BB_END (bb));
7984c787
SB
757
758 /* ??? We used to assume that INSNS can contain control flow insns, and
759 that we had to try to find sub basic blocks in BB to maintain a valid
760 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
761 and call break_superblocks when going out of cfglayout mode. But it
762 turns out that this never happens; and that if it does ever happen,
3bea341f 763 the verify_flow_info at the end of the RTL loop passes would fail.
7984c787
SB
764
765 There are two reasons why we expected we could have control flow insns
766 in INSNS. The first is when a comparison has to be done in parts, and
767 the second is when the number of iterations is computed for loops with
768 the number of iterations known at runtime. In both cases, test cases
769 to get control flow in INSNS appear to be impossible to construct:
770
771 * If do_compare_rtx_and_jump needs several branches to do comparison
772 in a mode that needs comparison by parts, we cannot analyze the
773 number of iterations of the loop, and we never get to unrolling it.
774
775 * The code in expand_divmod that was suspected to cause creation of
776 branching code seems to be only accessed for signed division. The
777 divisions used by # of iterations analysis are always unsigned.
778 Problems might arise on architectures that emits branching code
779 for some operations that may appear in the unroller (especially
780 for division), but we have no such architectures.
781
782 Considering all this, it was decided that we should for now assume
783 that INSNS can in theory contain control flow insns, but in practice
784 it never does. So we don't handle the theoretical case, and should
785 a real failure ever show up, we have a pretty good clue for how to
786 fix it. */
787
598ec7bd
ZD
788 return bb;
789}
790
da4cfeac
RB
791/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
792 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
793 in order to create a jump. */
794
95ecfb26 795static rtx_insn *
1476d1bd 796compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
357067f2
JH
797 rtx_code_label *label, profile_probability prob,
798 rtx_insn *cinsn)
da4cfeac 799{
1476d1bd
MM
800 rtx_insn *seq;
801 rtx_jump_insn *jump;
95ecfb26 802 rtx cond;
ef4bddc2 803 machine_mode mode;
da4cfeac
RB
804
805 mode = GET_MODE (op0);
806 if (mode == VOIDmode)
807 mode = GET_MODE (op1);
808
809 start_sequence ();
810 if (GET_MODE_CLASS (mode) == MODE_CC)
811 {
812 /* A hack -- there seems to be no easy generic way how to make a
813 conditional jump from a ccmode comparison. */
814 gcc_assert (cinsn);
815 cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
816 gcc_assert (GET_CODE (cond) == comp);
817 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
818 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
819 emit_jump_insn (copy_insn (PATTERN (cinsn)));
1476d1bd 820 jump = as_a <rtx_jump_insn *> (get_last_insn ());
da4cfeac
RB
821 JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
822 LABEL_NUSES (JUMP_LABEL (jump))++;
823 redirect_jump (jump, label, 0);
824 }
825 else
826 {
827 gcc_assert (!cinsn);
828
829 op0 = force_operand (op0, NULL_RTX);
830 op1 = force_operand (op1, NULL_RTX);
831 do_compare_rtx_and_jump (op0, op1, comp, 0,
357067f2
JH
832 mode, NULL_RTX, NULL, label,
833 profile_probability::uninitialized ());
1476d1bd
MM
834 jump = as_a <rtx_jump_insn *> (get_last_insn ());
835 jump->set_jump_target (label);
da4cfeac
RB
836 LABEL_NUSES (label)++;
837 }
357067f2 838 if (prob.initialized_p ())
5fa396ad 839 add_reg_br_prob_note (jump, prob);
da4cfeac
RB
840
841 seq = get_insns ();
842 end_sequence ();
843
844 return seq;
845}
846
ac9effed
EB
847/* Unroll LOOP for which we are able to count number of iterations in
848 runtime LOOP->LPT_DECISION.TIMES times. The times value must be a
849 power of two. The transformation does this (with some extra care
850 for case n < 0):
0c20a65f 851
a29c7ea6
ZD
852 for (i = 0; i < n; i++)
853 body;
0c20a65f 854
4fc2e37d 855 ==> (LOOP->LPT_DECISION.TIMES == 3)
0c20a65f 856
a29c7ea6
ZD
857 i = 0;
858 mod = n % 4;
0c20a65f 859
a29c7ea6
ZD
860 switch (mod)
861 {
862 case 3:
863 body; i++;
864 case 2:
865 body; i++;
866 case 1:
867 body; i++;
868 case 0: ;
869 }
0c20a65f 870
a29c7ea6
ZD
871 while (i < n)
872 {
873 body; i++;
874 body; i++;
875 body; i++;
876 body; i++;
877 }
878 */
879static void
99b1c316 880unroll_loop_runtime_iterations (class loop *loop)
a29c7ea6 881{
95ecfb26
DM
882 rtx old_niter, niter, tmp;
883 rtx_insn *init_code, *branch_code;
4f899c42 884 unsigned i;
357067f2 885 profile_probability p;
de0a3fa3 886 basic_block preheader, *body, swtch, ezc_swtch = NULL;
e7a74006 887 int may_exit_copy;
3995f3a2 888 profile_count iter_count, new_count;
ee8c1b05 889 unsigned n_peel;
ee8c1b05 890 edge e;
a29c7ea6
ZD
891 bool extra_zero_check, last_may_exit;
892 unsigned max_unroll = loop->lpt_decision.times;
99b1c316 893 class niter_desc *desc = get_simple_loop_desc (loop);
50654f6c 894 bool exit_at_end = loop_exit_at_end_p (loop);
f37a4f14 895 struct opt_info *opt_info = NULL;
41806d92 896 bool ok;
b8698a0f 897
f37a4f14
RE
898 if (flag_split_ivs_in_unroller
899 || flag_variable_expansion_in_unroller)
900 opt_info = analyze_insns_in_loop (loop);
b8698a0f 901
a29c7ea6 902 /* Remember blocks whose dominators will have to be updated. */
ef062b13 903 auto_vec<basic_block> dom_bbs;
a29c7ea6
ZD
904
905 body = get_loop_body (loop);
906 for (i = 0; i < loop->num_nodes; i++)
907 {
4f899c42 908 for (basic_block bb : get_dominated_by (CDI_DOMINATORS, body[i]))
66f97d31 909 if (!flow_bb_inside_loop_p (loop, bb))
9771b263 910 dom_bbs.safe_push (bb);
a29c7ea6
ZD
911 }
912 free (body);
913
50654f6c 914 if (!exit_at_end)
a29c7ea6
ZD
915 {
916 /* Leave exit in first copy (for explanation why see comment in
917 unroll_loop_constant_iterations). */
918 may_exit_copy = 0;
919 n_peel = max_unroll - 1;
920 extra_zero_check = true;
921 last_may_exit = false;
922 }
923 else
924 {
925 /* Leave exit in last copy (for explanation why see comment in
926 unroll_loop_constant_iterations). */
927 may_exit_copy = max_unroll;
928 n_peel = max_unroll;
929 extra_zero_check = false;
930 last_may_exit = true;
931 }
932
933 /* Get expression for number of iterations. */
934 start_sequence ();
50654f6c
ZD
935 old_niter = niter = gen_reg_rtx (desc->mode);
936 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
937 if (tmp != niter)
938 emit_move_insn (niter, tmp);
a29c7ea6 939
8d34bfa8
PH
940 /* For loops that exit at end and whose number of iterations is reliable,
941 add one to niter to account for first pass through loop body before
942 reaching exit test. */
943 if (exit_at_end && !desc->noloop_assumptions)
de0a3fa3
PH
944 {
945 niter = expand_simple_binop (desc->mode, PLUS,
946 niter, const1_rtx,
947 NULL_RTX, 0, OPTAB_LIB_WIDEN);
948 old_niter = niter;
949 }
950
a29c7ea6
ZD
951 /* Count modulo by ANDing it with max_unroll; we use the fact that
952 the number of unrollings is a power of two, and thus this is correct
953 even if there is overflow in the computation. */
50654f6c 954 niter = expand_simple_binop (desc->mode, AND,
2f1cd2eb 955 niter, gen_int_mode (max_unroll, desc->mode),
a29c7ea6
ZD
956 NULL_RTX, 0, OPTAB_LIB_WIDEN);
957
958 init_code = get_insns ();
959 end_sequence ();
2ed22578 960 unshare_all_rtl_in_chain (init_code);
a29c7ea6
ZD
961
962 /* Precondition the loop. */
598ec7bd 963 split_edge_and_insert (loop_preheader_edge (loop), init_code);
a29c7ea6 964
ef062b13 965 auto_vec<edge> remove_edges;
a29c7ea6 966
7ba9e72d 967 auto_sbitmap wont_exit (max_unroll + 2);
a29c7ea6 968
8d34bfa8 969 if (extra_zero_check || desc->noloop_assumptions)
de0a3fa3
PH
970 {
971 /* Peel the first copy of loop body. Leave the exit test if the number
972 of iterations is not reliable. Also record the place of the extra zero
973 check. */
974 bitmap_clear (wont_exit);
975 if (!desc->noloop_assumptions)
976 bitmap_set_bit (wont_exit, 1);
977 ezc_swtch = loop_preheader_edge (loop)->src;
978 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
979 1, wont_exit, desc->out_edge,
980 &remove_edges,
981 DLTHE_FLAG_UPDATE_FREQ);
982 gcc_assert (ok);
983 }
a29c7ea6
ZD
984
985 /* Record the place where switch will be built for preconditioning. */
598ec7bd 986 swtch = split_edge (loop_preheader_edge (loop));
a29c7ea6 987
e7a74006 988 /* Compute count increments for each switch block and initialize
d4919e65
PH
989 innermost switch block. Switch blocks and peeled loop copies are built
990 from innermost outward. */
3995f3a2 991 iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
d4919e65 992 swtch->count = new_count;
d4919e65 993
a29c7ea6
ZD
994 for (i = 0; i < n_peel; i++)
995 {
996 /* Peel the copy. */
f61e445a 997 bitmap_clear (wont_exit);
a29c7ea6 998 if (i != n_peel - 1 || !last_may_exit)
d7c028c0 999 bitmap_set_bit (wont_exit, 1);
41806d92 1000 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
d73be268 1001 1, wont_exit, desc->out_edge,
ee8c1b05 1002 &remove_edges,
41806d92
NS
1003 DLTHE_FLAG_UPDATE_FREQ);
1004 gcc_assert (ok);
a29c7ea6 1005
91f4cfe3 1006 /* Create item for switch. */
4f899c42 1007 unsigned j = n_peel - i - (extra_zero_check ? 0 : 1);
357067f2 1008 p = profile_probability::always ().apply_scale (1, i + 2);
91f4cfe3 1009
598ec7bd 1010 preheader = split_edge (loop_preheader_edge (loop));
e7a74006 1011 /* Add in count of edge from switch block. */
d4919e65 1012 preheader->count += iter_count;
2a23a1c3
JJ
1013 branch_code = compare_and_jump_seq (copy_rtx (niter),
1014 gen_int_mode (j, desc->mode), EQ,
1015 block_label (preheader), p, NULL);
91f4cfe3 1016
7d93d987
ZD
1017 /* We rely on the fact that the compare and jump cannot be optimized out,
1018 and hence the cfg we create is correct. */
1019 gcc_assert (branch_code != NULL_RTX);
1020
598ec7bd 1021 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
d47cc544 1022 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
357067f2 1023 single_succ_edge (swtch)->probability = p.invert ();
d4919e65 1024 new_count += iter_count;
d4919e65 1025 swtch->count = new_count;
91f4cfe3 1026 e = make_edge (swtch, preheader,
c5cbcccf 1027 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
91f4cfe3 1028 e->probability = p;
a29c7ea6
ZD
1029 }
1030
1031 if (extra_zero_check)
1032 {
1033 /* Add branch for zero iterations. */
357067f2 1034 p = profile_probability::always ().apply_scale (1, max_unroll + 1);
a29c7ea6 1035 swtch = ezc_swtch;
598ec7bd 1036 preheader = split_edge (loop_preheader_edge (loop));
e7a74006 1037 /* Recompute count adjustments since initial peel copy may
d4919e65 1038 have exited and reduced those values that were computed above. */
3995f3a2 1039 iter_count = swtch->count.apply_scale (1, max_unroll + 1);
e7a74006 1040 /* Add in count of edge from switch block. */
d4919e65 1041 preheader->count += iter_count;
50654f6c 1042 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
41806d92 1043 block_label (preheader), p,
95ecfb26 1044 NULL);
7d93d987 1045 gcc_assert (branch_code != NULL_RTX);
a29c7ea6 1046
598ec7bd 1047 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
d47cc544 1048 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
357067f2 1049 single_succ_edge (swtch)->probability = p.invert ();
72b8d451 1050 e = make_edge (swtch, preheader,
c5cbcccf 1051 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
a29c7ea6
ZD
1052 e->probability = p;
1053 }
1054
1055 /* Recount dominators for outer blocks. */
66f97d31 1056 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
a29c7ea6
ZD
1057
1058 /* And unroll loop. */
1059
f61e445a 1060 bitmap_ones (wont_exit);
d7c028c0 1061 bitmap_clear_bit (wont_exit, may_exit_copy);
f37a4f14 1062 opt_info_start_duplication (opt_info);
b8698a0f 1063
41806d92 1064 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
d73be268 1065 max_unroll,
41806d92 1066 wont_exit, desc->out_edge,
ee8c1b05 1067 &remove_edges,
7f7b1718
JH
1068 DLTHE_FLAG_UPDATE_FREQ
1069 | (opt_info
1070 ? DLTHE_RECORD_COPY_NUMBER
1071 : 0));
41806d92 1072 gcc_assert (ok);
b8698a0f 1073
f37a4f14 1074 if (opt_info)
113d659a 1075 {
f37a4f14
RE
1076 apply_opt_in_copies (opt_info, max_unroll, true, true);
1077 free_opt_info (opt_info);
113d659a
ZD
1078 }
1079
50654f6c
ZD
1080 if (exit_at_end)
1081 {
6580ee77 1082 basic_block exit_block = get_bb_copy (desc->in_edge->src);
41806d92
NS
1083 /* Find a new in and out edge; they are in the last copy we have
1084 made. */
b8698a0f 1085
628f6a4e 1086 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
50654f6c 1087 {
628f6a4e
BE
1088 desc->out_edge = EDGE_SUCC (exit_block, 0);
1089 desc->in_edge = EDGE_SUCC (exit_block, 1);
50654f6c
ZD
1090 }
1091 else
1092 {
628f6a4e
BE
1093 desc->out_edge = EDGE_SUCC (exit_block, 1);
1094 desc->in_edge = EDGE_SUCC (exit_block, 0);
50654f6c
ZD
1095 }
1096 }
1097
a29c7ea6 1098 /* Remove the edges. */
9771b263 1099 FOR_EACH_VEC_ELT (remove_edges, i, e)
ee8c1b05 1100 remove_path (e);
a29c7ea6 1101
50654f6c
ZD
1102 /* We must be careful when updating the number of iterations due to
1103 preconditioning and the fact that the value must be valid at entry
1104 of the loop. After passing through the above code, we see that
1105 the correct new number of iterations is this: */
113d659a 1106 gcc_assert (!desc->const_iter);
50654f6c 1107 desc->niter_expr =
41806d92 1108 simplify_gen_binary (UDIV, desc->mode, old_niter,
69a59f0f 1109 gen_int_mode (max_unroll + 1, desc->mode));
e3a8f1fa 1110 loop->nb_iterations_upper_bound
807e902e 1111 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
e3a8f1fa
JH
1112 if (loop->any_estimate)
1113 loop->nb_iterations_estimate
807e902e 1114 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
105e29c5
JH
1115 if (loop->any_likely_upper_bound)
1116 loop->nb_iterations_likely_upper_bound
1117 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
50654f6c
ZD
1118 if (exit_at_end)
1119 {
1120 desc->niter_expr =
1121 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1122 desc->noloop_assumptions = NULL_RTX;
e3a8f1fa
JH
1123 --loop->nb_iterations_upper_bound;
1124 if (loop->any_estimate
807e902e 1125 && loop->nb_iterations_estimate != 0)
e3a8f1fa
JH
1126 --loop->nb_iterations_estimate;
1127 else
1128 loop->any_estimate = false;
105e29c5
JH
1129 if (loop->any_likely_upper_bound
1130 && loop->nb_iterations_likely_upper_bound != 0)
1131 --loop->nb_iterations_likely_upper_bound;
1132 else
1133 loop->any_likely_upper_bound = false;
50654f6c
ZD
1134 }
1135
c263766c
RH
1136 if (dump_file)
1137 fprintf (dump_file,
1138 ";; Unrolled loop %d times, counting # of iterations "
1139 "in runtime, %i insns\n",
a29c7ea6
ZD
1140 max_unroll, num_loop_insns (loop));
1141}
0c20a65f 1142
a29c7ea6
ZD
1143/* Decide whether to unroll LOOP stupidly and how much. */
1144static void
99b1c316 1145decide_unroll_stupid (class loop *loop, int flags)
a29c7ea6
ZD
1146{
1147 unsigned nunroll, nunroll_by_av, i;
99b1c316 1148 class niter_desc *desc;
807e902e 1149 widest_int iterations;
a29c7ea6 1150
ac9effed
EB
1151 /* If we were not asked to unroll this loop, just return back silently. */
1152 if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
1153 return;
a29c7ea6 1154
ac9effed
EB
1155 if (dump_enabled_p ())
1156 dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
a29c7ea6
ZD
1157
1158 /* nunroll = total number of copies of the original loop body in
1159 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
028d4092 1160 nunroll = param_max_unrolled_insns / loop->ninsns;
c263766c 1161 nunroll_by_av
028d4092 1162 = param_max_average_unrolled_insns / loop->av_ninsns;
a29c7ea6
ZD
1163 if (nunroll > nunroll_by_av)
1164 nunroll = nunroll_by_av;
028d4092
ML
1165 if (nunroll > (unsigned) param_max_unroll_times)
1166 nunroll = param_max_unroll_times;
a29c7ea6 1167
40ac4f73
CB
1168 if (targetm.loop_unroll_adjust)
1169 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1170
ff27462e 1171 if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
ac9effed
EB
1172 nunroll = loop->unroll;
1173
a29c7ea6
ZD
1174 /* Skip big loops. */
1175 if (nunroll <= 1)
1176 {
c263766c
RH
1177 if (dump_file)
1178 fprintf (dump_file, ";; Not considering loop, is too big\n");
a29c7ea6
ZD
1179 return;
1180 }
1181
1182 /* Check for simple loops. */
50654f6c 1183 desc = get_simple_loop_desc (loop);
a29c7ea6
ZD
1184
1185 /* Check simpleness. */
50654f6c 1186 if (desc->simple_p && !desc->assumptions)
a29c7ea6 1187 {
c263766c 1188 if (dump_file)
ac9effed 1189 fprintf (dump_file, ";; Loop is simple\n");
a29c7ea6
ZD
1190 return;
1191 }
1192
1193 /* Do not unroll loops with branches inside -- it increases number
6acf25e4
JH
1194 of mispredicts.
1195 TODO: this heuristic needs tunning; call inside the loop body
1196 is also relatively good reason to not unroll. */
50654f6c 1197 if (num_loop_branches (loop) > 1)
a29c7ea6 1198 {
c263766c
RH
1199 if (dump_file)
1200 fprintf (dump_file, ";; Not unrolling, contains branches\n");
a29c7ea6
ZD
1201 return;
1202 }
1203
73367f92 1204 /* Check whether the loop rolls. */
71343877 1205 if ((get_estimated_loop_iterations (loop, &iterations)
8c383600 1206 || get_likely_max_loop_iterations (loop, &iterations))
807e902e 1207 && wi::ltu_p (iterations, 2 * nunroll))
a29c7ea6 1208 {
c263766c
RH
1209 if (dump_file)
1210 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
a29c7ea6
ZD
1211 return;
1212 }
1213
1214 /* Success. Now force nunroll to be power of 2, as it seems that this
e0bb17a8 1215 improves results (partially because of better alignments, partially
a29c7ea6 1216 because of some dark magic). */
50654f6c
ZD
1217 for (i = 1; 2 * i <= nunroll; i *= 2)
1218 continue;
a29c7ea6
ZD
1219
1220 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1221 loop->lpt_decision.times = i - 1;
1222}
1223
4fc2e37d
EB
1224/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
1225
a29c7ea6
ZD
1226 while (cond)
1227 body;
1228
4fc2e37d 1229 ==> (LOOP->LPT_DECISION.TIMES == 3)
a29c7ea6
ZD
1230
1231 while (cond)
1232 {
1233 body;
1234 if (!cond) break;
1235 body;
1236 if (!cond) break;
1237 body;
1238 if (!cond) break;
1239 body;
1240 }
1241 */
1242static void
99b1c316 1243unroll_loop_stupid (class loop *loop)
a29c7ea6 1244{
a29c7ea6 1245 unsigned nunroll = loop->lpt_decision.times;
99b1c316 1246 class niter_desc *desc = get_simple_loop_desc (loop);
f37a4f14 1247 struct opt_info *opt_info = NULL;
41806d92 1248 bool ok;
b8698a0f 1249
f37a4f14
RE
1250 if (flag_split_ivs_in_unroller
1251 || flag_variable_expansion_in_unroller)
1252 opt_info = analyze_insns_in_loop (loop);
b8698a0f 1253
7ba9e72d 1254 auto_sbitmap wont_exit (nunroll + 1);
f61e445a 1255 bitmap_clear (wont_exit);
f37a4f14 1256 opt_info_start_duplication (opt_info);
b8698a0f 1257
41806d92 1258 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
d73be268 1259 nunroll, wont_exit,
ee8c1b05 1260 NULL, NULL,
7f7b1718
JH
1261 DLTHE_FLAG_UPDATE_FREQ
1262 | (opt_info
1263 ? DLTHE_RECORD_COPY_NUMBER
1264 : 0));
41806d92 1265 gcc_assert (ok);
b8698a0f 1266
f37a4f14 1267 if (opt_info)
113d659a 1268 {
f37a4f14
RE
1269 apply_opt_in_copies (opt_info, nunroll, true, true);
1270 free_opt_info (opt_info);
113d659a
ZD
1271 }
1272
50654f6c
ZD
1273 if (desc->simple_p)
1274 {
1275 /* We indeed may get here provided that there are nontrivial assumptions
1276 for a loop to be really simple. We could update the counts, but the
1277 problem is that we are unable to decide which exit will be taken
1278 (not really true in case the number of iterations is constant,
1aa95df7 1279 but no one will do anything with this information, so we do not
50654f6c
ZD
1280 worry about it). */
1281 desc->simple_p = false;
1282 }
1283
c263766c
RH
1284 if (dump_file)
1285 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
a29c7ea6
ZD
1286 nunroll, num_loop_insns (loop));
1287}
113d659a 1288
60c48e4c
AO
1289/* Returns true if REG is referenced in one nondebug insn in LOOP.
1290 Set *DEBUG_USES to the number of debug insns that reference the
1291 variable. */
f37a4f14 1292
dd637013 1293static bool
99b1c316 1294referenced_in_one_insn_in_loop_p (class loop *loop, rtx reg,
60c48e4c 1295 int *debug_uses)
f37a4f14
RE
1296{
1297 basic_block *body, bb;
1298 unsigned i;
1299 int count_ref = 0;
95ecfb26 1300 rtx_insn *insn;
b8698a0f
L
1301
1302 body = get_loop_body (loop);
f37a4f14
RE
1303 for (i = 0; i < loop->num_nodes; i++)
1304 {
1305 bb = body[i];
b8698a0f 1306
f37a4f14 1307 FOR_BB_INSNS (bb, insn)
60c48e4c
AO
1308 if (!rtx_referenced_p (reg, insn))
1309 continue;
1310 else if (DEBUG_INSN_P (insn))
1311 ++*debug_uses;
1312 else if (++count_ref > 1)
1313 break;
f37a4f14 1314 }
60c48e4c 1315 free (body);
f37a4f14
RE
1316 return (count_ref == 1);
1317}
1318
60c48e4c
AO
1319/* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
1320
1321static void
99b1c316 1322reset_debug_uses_in_loop (class loop *loop, rtx reg, int debug_uses)
60c48e4c
AO
1323{
1324 basic_block *body, bb;
1325 unsigned i;
95ecfb26 1326 rtx_insn *insn;
60c48e4c
AO
1327
1328 body = get_loop_body (loop);
1329 for (i = 0; debug_uses && i < loop->num_nodes; i++)
1330 {
1331 bb = body[i];
1332
1333 FOR_BB_INSNS (bb, insn)
1334 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1335 continue;
1336 else
1337 {
1338 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1339 gen_rtx_UNKNOWN_VAR_LOC (), 0);
1340 if (!--debug_uses)
1341 break;
1342 }
1343 }
1344 free (body);
1345}
1346
f37a4f14 1347/* Determine whether INSN contains an accumulator
b8698a0f 1348 which can be expanded into separate copies,
f37a4f14 1349 one for each copy of the LOOP body.
b8698a0f 1350
f37a4f14
RE
1351 for (i = 0 ; i < n; i++)
1352 sum += a[i];
b8698a0f 1353
f37a4f14 1354 ==>
b8698a0f 1355
f37a4f14
RE
1356 sum += a[i]
1357 ....
1358 i = i+1;
1359 sum1 += a[i]
1360 ....
1361 i = i+1
1362 sum2 += a[i];
1363 ....
1364
b8698a0f
L
1365 Return NULL if INSN contains no opportunity for expansion of accumulator.
1366 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
f37a4f14
RE
1367 information and return a pointer to it.
1368*/
1369
1370static struct var_to_expand *
99b1c316 1371analyze_insn_to_expand_var (class loop *loop, rtx_insn *insn)
f37a4f14 1372{
531e5376 1373 rtx set, dest, src;
f37a4f14 1374 struct var_to_expand *ves;
76fd2caa 1375 unsigned accum_pos;
531e5376 1376 enum rtx_code code;
60c48e4c 1377 int debug_uses = 0;
76fd2caa 1378
f37a4f14
RE
1379 set = single_set (insn);
1380 if (!set)
1381 return NULL;
b8698a0f 1382
f37a4f14
RE
1383 dest = SET_DEST (set);
1384 src = SET_SRC (set);
531e5376 1385 code = GET_CODE (src);
b8698a0f 1386
531e5376 1387 if (code != PLUS && code != MINUS && code != MULT && code != FMA)
f37a4f14 1388 return NULL;
f2dd440f 1389
531e5376
RH
1390 if (FLOAT_MODE_P (GET_MODE (dest)))
1391 {
1392 if (!flag_associative_math)
1393 return NULL;
1394 /* In the case of FMA, we're also changing the rounding. */
1395 if (code == FMA && !flag_unsafe_math_optimizations)
1396 return NULL;
1397 }
1398
f2dd440f 1399 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
67914693 1400 in MD. But if there is no optab to generate the insn, we cannot
f2dd440f
SB
1401 perform the variable expansion. This can happen if an MD provides
1402 an insn but not a named pattern to generate it, for example to avoid
1403 producing code that needs additional mode switches like for x87/mmx.
1404
1405 So we check have_insn_for which looks for an optab for the operation
1406 in SRC. If it doesn't exist, we can't perform the expansion even
1407 though INSN is valid. */
531e5376 1408 if (!have_insn_for (code, GET_MODE (src)))
f2dd440f
SB
1409 return NULL;
1410
f37a4f14
RE
1411 if (!REG_P (dest)
1412 && !(GET_CODE (dest) == SUBREG
1413 && REG_P (SUBREG_REG (dest))))
1414 return NULL;
b8698a0f 1415
531e5376
RH
1416 /* Find the accumulator use within the operation. */
1417 if (code == FMA)
1418 {
1419 /* We only support accumulation via FMA in the ADD position. */
1420 if (!rtx_equal_p (dest, XEXP (src, 2)))
1421 return NULL;
1422 accum_pos = 2;
1423 }
1424 else if (rtx_equal_p (dest, XEXP (src, 0)))
76fd2caa 1425 accum_pos = 0;
531e5376
RH
1426 else if (rtx_equal_p (dest, XEXP (src, 1)))
1427 {
1428 /* The method of expansion that we are using; which includes the
1429 initialization of the expansions with zero and the summation of
1430 the expansions at the end of the computation will yield wrong
1431 results for (x = something - x) thus avoid using it in that case. */
1432 if (code == MINUS)
1433 return NULL;
1434 accum_pos = 1;
1435 }
76fd2caa
RE
1436 else
1437 return NULL;
1438
531e5376
RH
1439 /* It must not otherwise be used. */
1440 if (code == FMA)
1441 {
1442 if (rtx_referenced_p (dest, XEXP (src, 0))
1443 || rtx_referenced_p (dest, XEXP (src, 1)))
1444 return NULL;
1445 }
1446 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
f37a4f14 1447 return NULL;
b8698a0f 1448
531e5376 1449 /* It must be used in exactly one insn. */
60c48e4c 1450 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
f37a4f14 1451 return NULL;
b8698a0f 1452
c1c5a431 1453 if (dump_file)
531e5376
RH
1454 {
1455 fprintf (dump_file, "\n;; Expanding Accumulator ");
1456 print_rtl (dump_file, dest);
1457 fprintf (dump_file, "\n");
1458 }
c1c5a431 1459
60c48e4c
AO
1460 if (debug_uses)
1461 /* Instead of resetting the debug insns, we could replace each
1462 debug use in the loop with the sum or product of all expanded
5764ee3c 1463 accumulators. Since we'll only know of all expansions at the
60c48e4c
AO
1464 end, we'd have to keep track of which vars_to_expand a debug
1465 insn in the loop references, take note of each copy of the
1466 debug insn during unrolling, and when it's all done, compute
1467 the sum or product of each variable and adjust the original
1468 debug insn and each copy thereof. What a pain! */
1469 reset_debug_uses_in_loop (loop, dest, debug_uses);
1470
f37a4f14 1471 /* Record the accumulator to expand. */
5ed6ace5 1472 ves = XNEW (struct var_to_expand);
f37a4f14 1473 ves->insn = insn;
f37a4f14 1474 ves->reg = copy_rtx (dest);
9771b263 1475 ves->var_expansions.create (1);
a9f6ecee 1476 ves->next = NULL;
f37a4f14
RE
1477 ves->op = GET_CODE (src);
1478 ves->expansion_count = 0;
1479 ves->reuse_expansion = 0;
b8698a0f 1480 return ves;
f37a4f14
RE
1481}
1482
113d659a 1483/* Determine whether there is an induction variable in INSN that
b8698a0f 1484 we would like to split during unrolling.
f37a4f14
RE
1485
1486 I.e. replace
1487
1488 i = i + 1;
1489 ...
1490 i = i + 1;
1491 ...
1492 i = i + 1;
1493 ...
1494
1495 type chains by
1496
1497 i0 = i + 1
1498 ...
1499 i = i0 + 1
1500 ...
1501 i = i0 + 2
1502 ...
1503
b8698a0f 1504 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
f37a4f14 1505 an IV_TO_SPLIT structure, fill it with the relevant information and return a
113d659a
ZD
1506 pointer to it. */
1507
1508static struct iv_to_split *
1b20d55a 1509analyze_iv_to_split_insn (rtx_insn *insn)
113d659a
ZD
1510{
1511 rtx set, dest;
99b1c316 1512 class rtx_iv iv;
113d659a 1513 struct iv_to_split *ivts;
3d88d1cd 1514 scalar_int_mode mode;
41806d92 1515 bool ok;
113d659a
ZD
1516
1517 /* For now we just split the basic induction variables. Later this may be
1518 extended for example by selecting also addresses of memory references. */
1519 set = single_set (insn);
1520 if (!set)
1521 return NULL;
1522
1523 dest = SET_DEST (set);
3d88d1cd 1524 if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
113d659a
ZD
1525 return NULL;
1526
3d88d1cd 1527 if (!biv_p (insn, mode, dest))
113d659a
ZD
1528 return NULL;
1529
03fd2215 1530 ok = iv_analyze_result (insn, dest, &iv);
4dc7782d
JL
1531
1532 /* This used to be an assert under the assumption that if biv_p returns
1533 true that iv_analyze_result must also return true. However, that
1534 assumption is not strictly correct as evidenced by pr25569.
1535
1536 Returning NULL when iv_analyze_result returns false is safe and
1537 avoids the problems in pr25569 until the iv_analyze_* routines
1538 can be fixed, which is apparently hard and time consuming
1539 according to their author. */
1540 if (! ok)
1541 return NULL;
113d659a
ZD
1542
1543 if (iv.step == const0_rtx
1544 || iv.mode != iv.extend_mode)
1545 return NULL;
1546
1547 /* Record the insn to split. */
5ed6ace5 1548 ivts = XNEW (struct iv_to_split);
113d659a 1549 ivts->insn = insn;
4a0c3fde 1550 ivts->orig_var = dest;
113d659a
ZD
1551 ivts->base_var = NULL_RTX;
1552 ivts->step = iv.step;
a9f6ecee 1553 ivts->next = NULL;
b8698a0f 1554
113d659a
ZD
1555 return ivts;
1556}
1557
f37a4f14
RE
1558/* Determines which of insns in LOOP can be optimized.
1559 Return a OPT_INFO struct with the relevant hash tables filled
1560 with all insns to be optimized. The FIRST_NEW_BLOCK field
113d659a
ZD
1561 is undefined for the return value. */
1562
f37a4f14 1563static struct opt_info *
99b1c316 1564analyze_insns_in_loop (class loop *loop)
113d659a
ZD
1565{
1566 basic_block *body, bb;
ca83d385 1567 unsigned i;
5ed6ace5 1568 struct opt_info *opt_info = XCNEW (struct opt_info);
1b20d55a 1569 rtx_insn *insn;
f37a4f14
RE
1570 struct iv_to_split *ivts = NULL;
1571 struct var_to_expand *ves = NULL;
4a8fb1a1
LC
1572 iv_to_split **slot1;
1573 var_to_expand **slot2;
4b9d61f7 1574 auto_vec<edge> edges = get_loop_exit_edges (loop);
ca83d385 1575 edge exit;
f37a4f14 1576 bool can_apply = false;
b8698a0f 1577
113d659a
ZD
1578 iv_analysis_loop_init (loop);
1579
1580 body = get_loop_body (loop);
f37a4f14
RE
1581
1582 if (flag_split_ivs_in_unroller)
a9f6ecee 1583 {
c203e8a7
TS
1584 opt_info->insns_to_split
1585 = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
a9f6ecee
AO
1586 opt_info->iv_to_split_head = NULL;
1587 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1588 }
b8698a0f 1589
f37a4f14 1590 /* Record the loop exit bb and loop preheader before the unrolling. */
598ec7bd 1591 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
b8698a0f 1592
9771b263 1593 if (edges.length () == 1)
f37a4f14 1594 {
9771b263 1595 exit = edges[0];
ca83d385
ZD
1596 if (!(exit->flags & EDGE_COMPLEX))
1597 {
1598 opt_info->loop_exit = split_edge (exit);
1599 can_apply = true;
1600 }
f37a4f14 1601 }
b8698a0f 1602
f37a4f14
RE
1603 if (flag_variable_expansion_in_unroller
1604 && can_apply)
a9f6ecee 1605 {
c203e8a7
TS
1606 opt_info->insns_with_var_to_expand
1607 = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
a9f6ecee
AO
1608 opt_info->var_to_expand_head = NULL;
1609 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1610 }
b8698a0f 1611
113d659a
ZD
1612 for (i = 0; i < loop->num_nodes; i++)
1613 {
1614 bb = body[i];
1615 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1616 continue;
1617
1618 FOR_BB_INSNS (bb, insn)
f37a4f14
RE
1619 {
1620 if (!INSN_P (insn))
1621 continue;
b8698a0f 1622
c203e8a7 1623 if (opt_info->insns_to_split)
f37a4f14 1624 ivts = analyze_iv_to_split_insn (insn);
b8698a0f 1625
f37a4f14
RE
1626 if (ivts)
1627 {
c203e8a7 1628 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
a9f6ecee 1629 gcc_assert (*slot1 == NULL);
f37a4f14 1630 *slot1 = ivts;
a9f6ecee
AO
1631 *opt_info->iv_to_split_tail = ivts;
1632 opt_info->iv_to_split_tail = &ivts->next;
f37a4f14
RE
1633 continue;
1634 }
b8698a0f 1635
c203e8a7 1636 if (opt_info->insns_with_var_to_expand)
f37a4f14 1637 ves = analyze_insn_to_expand_var (loop, insn);
b8698a0f 1638
f37a4f14
RE
1639 if (ves)
1640 {
c203e8a7 1641 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
a9f6ecee 1642 gcc_assert (*slot2 == NULL);
f37a4f14 1643 *slot2 = ves;
a9f6ecee
AO
1644 *opt_info->var_to_expand_tail = ves;
1645 opt_info->var_to_expand_tail = &ves->next;
f37a4f14
RE
1646 }
1647 }
113d659a 1648 }
b8698a0f 1649
113d659a 1650 free (body);
f37a4f14 1651 return opt_info;
113d659a
ZD
1652}
1653
1654/* Called just before loop duplication. Records start of duplicated area
f37a4f14 1655 to OPT_INFO. */
113d659a 1656
b8698a0f 1657static void
f37a4f14 1658opt_info_start_duplication (struct opt_info *opt_info)
113d659a 1659{
f37a4f14 1660 if (opt_info)
8b1c6fd7 1661 opt_info->first_new_block = last_basic_block_for_fn (cfun);
113d659a
ZD
1662}
1663
1664/* Determine the number of iterations between initialization of the base
1665 variable and the current copy (N_COPY). N_COPIES is the total number
1666 of newly created copies. UNROLLING is true if we are unrolling
1667 (not peeling) the loop. */
1668
1669static unsigned
1670determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1671{
1672 if (unrolling)
1673 {
1674 /* If we are unrolling, initialization is done in the original loop
1675 body (number 0). */
1676 return n_copy;
1677 }
1678 else
1679 {
1680 /* If we are peeling, the copy in that the initialization occurs has
1681 number 1. The original loop (number 0) is the last. */
1682 if (n_copy)
1683 return n_copy - 1;
1684 else
1685 return n_copies;
1686 }
1687}
1688
a9f6ecee 1689/* Allocate basic variable for the induction variable chain. */
113d659a 1690
a9f6ecee
AO
1691static void
1692allocate_basic_variable (struct iv_to_split *ivts)
113d659a 1693{
8cab83f0 1694 rtx expr = SET_SRC (single_set (ivts->insn));
113d659a
ZD
1695
1696 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
113d659a
ZD
1697}
1698
1699/* Insert initialization of basic variable of IVTS before INSN, taking
1700 the initial value from INSN. */
1701
1702static void
95ecfb26 1703insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
113d659a 1704{
8cab83f0 1705 rtx expr = copy_rtx (SET_SRC (single_set (insn)));
95ecfb26 1706 rtx_insn *seq;
113d659a
ZD
1707
1708 start_sequence ();
1709 expr = force_operand (expr, ivts->base_var);
1710 if (expr != ivts->base_var)
1711 emit_move_insn (ivts->base_var, expr);
1712 seq = get_insns ();
1713 end_sequence ();
1714
1715 emit_insn_before (seq, insn);
1716}
1717
1718/* Replace the use of induction variable described in IVTS in INSN
1719 by base variable + DELTA * step. */
1720
1721static void
95ecfb26 1722split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
113d659a 1723{
95ecfb26
DM
1724 rtx expr, *loc, incr, var;
1725 rtx_insn *seq;
ef4bddc2 1726 machine_mode mode = GET_MODE (ivts->base_var);
113d659a
ZD
1727 rtx src, dest, set;
1728
1729 /* Construct base + DELTA * step. */
1730 if (!delta)
1731 expr = ivts->base_var;
1732 else
1733 {
1734 incr = simplify_gen_binary (MULT, mode,
6b1237e3
RS
1735 copy_rtx (ivts->step),
1736 gen_int_mode (delta, mode));
113d659a
ZD
1737 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1738 ivts->base_var, incr);
1739 }
1740
1741 /* Figure out where to do the replacement. */
8cab83f0 1742 loc = &SET_SRC (single_set (insn));
113d659a
ZD
1743
1744 /* If we can make the replacement right away, we're done. */
1745 if (validate_change (insn, loc, expr, 0))
1746 return;
1747
1748 /* Otherwise, force EXPR into a register and try again. */
1749 start_sequence ();
1750 var = gen_reg_rtx (mode);
1751 expr = force_operand (expr, var);
1752 if (expr != var)
1753 emit_move_insn (var, expr);
1754 seq = get_insns ();
1755 end_sequence ();
1756 emit_insn_before (seq, insn);
b8698a0f 1757
113d659a
ZD
1758 if (validate_change (insn, loc, var, 0))
1759 return;
1760
1761 /* The last chance. Try recreating the assignment in insn
1762 completely from scratch. */
1763 set = single_set (insn);
1764 gcc_assert (set);
1765
1766 start_sequence ();
1767 *loc = var;
1768 src = copy_rtx (SET_SRC (set));
1769 dest = copy_rtx (SET_DEST (set));
1770 src = force_operand (src, dest);
1771 if (src != dest)
1772 emit_move_insn (dest, src);
1773 seq = get_insns ();
1774 end_sequence ();
b8698a0f 1775
113d659a
ZD
1776 emit_insn_before (seq, insn);
1777 delete_insn (insn);
1778}
1779
113d659a 1780
2cd0e9f4 1781/* Return one expansion of the accumulator recorded in struct VE. */
113d659a 1782
f37a4f14
RE
1783static rtx
1784get_expansion (struct var_to_expand *ve)
1785{
1786 rtx reg;
b8698a0f 1787
f37a4f14
RE
1788 if (ve->reuse_expansion == 0)
1789 reg = ve->reg;
1790 else
9771b263 1791 reg = ve->var_expansions[ve->reuse_expansion - 1];
b8698a0f 1792
9771b263 1793 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
f37a4f14 1794 ve->reuse_expansion = 0;
b8698a0f 1795 else
f37a4f14 1796 ve->reuse_expansion++;
b8698a0f 1797
f37a4f14
RE
1798 return reg;
1799}
113d659a 1800
113d659a 1801
b8698a0f 1802/* Given INSN replace the uses of the accumulator recorded in VE
f37a4f14
RE
1803 with a new register. */
1804
1805static void
95ecfb26 1806expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
f37a4f14
RE
1807{
1808 rtx new_reg, set;
1809 bool really_new_expansion = false;
b8698a0f 1810
f37a4f14 1811 set = single_set (insn);
b5e624c6 1812 gcc_assert (set);
b8698a0f 1813
f37a4f14
RE
1814 /* Generate a new register only if the expansion limit has not been
1815 reached. Else reuse an already existing expansion. */
028d4092 1816 if (param_max_variable_expansions > ve->expansion_count)
f37a4f14
RE
1817 {
1818 really_new_expansion = true;
1819 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1820 }
1821 else
1822 new_reg = get_expansion (ve);
1823
6e74642b 1824 validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
f37a4f14
RE
1825 if (apply_change_group ())
1826 if (really_new_expansion)
1827 {
9771b263 1828 ve->var_expansions.safe_push (new_reg);
f37a4f14
RE
1829 ve->expansion_count++;
1830 }
1831}
1832
a9f6ecee
AO
1833/* Initialize the variable expansions in loop preheader. PLACE is the
1834 loop-preheader basic block where the initialization of the
1835 expansions should take place. The expansions are initialized with
1836 (-0) when the operation is plus or minus to honor sign zero. This
1837 way we can prevent cases where the sign of the final result is
1838 effected by the sign of the expansion. Here is an example to
1839 demonstrate this:
b8698a0f 1840
290358f7
RE
1841 for (i = 0 ; i < n; i++)
1842 sum += something;
1843
1844 ==>
1845
1846 sum += something
1847 ....
1848 i = i+1;
1849 sum1 += something
1850 ....
1851 i = i+1
1852 sum2 += something;
1853 ....
b8698a0f 1854
290358f7
RE
1855 When SUM is initialized with -zero and SOMETHING is also -zero; the
1856 final result of sum should be -zero thus the expansions sum1 and sum2
1857 should be initialized with -zero as well (otherwise we will get +zero
1858 as the final result). */
f37a4f14 1859
a9f6ecee
AO
1860static void
1861insert_var_expansion_initialization (struct var_to_expand *ve,
1862 basic_block place)
f37a4f14 1863{
95ecfb26
DM
1864 rtx_insn *seq;
1865 rtx var, zero_init;
f37a4f14 1866 unsigned i;
ef4bddc2 1867 machine_mode mode = GET_MODE (ve->reg);
290358f7
RE
1868 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1869
9771b263 1870 if (ve->var_expansions.length () == 0)
a9f6ecee 1871 return;
b8698a0f 1872
f37a4f14 1873 start_sequence ();
531e5376
RH
1874 switch (ve->op)
1875 {
1876 case FMA:
1877 /* Note that we only accumulate FMA via the ADD operand. */
1878 case PLUS:
1879 case MINUS:
9771b263 1880 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
531e5376
RH
1881 {
1882 if (honor_signed_zero_p)
1883 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1884 else
1885 zero_init = CONST0_RTX (mode);
1886 emit_move_insn (var, zero_init);
1887 }
1888 break;
1889
1890 case MULT:
9771b263 1891 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
531e5376
RH
1892 {
1893 zero_init = CONST1_RTX (GET_MODE (var));
1894 emit_move_insn (var, zero_init);
1895 }
1896 break;
1897
1898 default:
1899 gcc_unreachable ();
1900 }
b8698a0f 1901
f37a4f14
RE
1902 seq = get_insns ();
1903 end_sequence ();
b8698a0f 1904
6e74642b 1905 emit_insn_after (seq, BB_END (place));
f37a4f14
RE
1906}
1907
a9f6ecee
AO
1908/* Combine the variable expansions at the loop exit. PLACE is the
1909 loop exit basic block where the summation of the expansions should
1910 take place. */
f37a4f14 1911
a9f6ecee
AO
1912static void
1913combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
f37a4f14 1914{
f37a4f14 1915 rtx sum = ve->reg;
95ecfb26
DM
1916 rtx expr, var;
1917 rtx_insn *seq, *insn;
f37a4f14
RE
1918 unsigned i;
1919
9771b263 1920 if (ve->var_expansions.length () == 0)
a9f6ecee 1921 return;
b8698a0f 1922
39259462
JJ
1923 /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1924 it both here and as the destination of the assignment. */
1925 sum = copy_rtx (sum);
f37a4f14 1926 start_sequence ();
531e5376
RH
1927 switch (ve->op)
1928 {
1929 case FMA:
1930 /* Note that we only accumulate FMA via the ADD operand. */
1931 case PLUS:
1932 case MINUS:
9771b263 1933 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
531e5376
RH
1934 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1935 break;
1936
1937 case MULT:
9771b263 1938 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
531e5376
RH
1939 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1940 break;
1941
1942 default:
1943 gcc_unreachable ();
1944 }
b8698a0f 1945
f37a4f14
RE
1946 expr = force_operand (sum, ve->reg);
1947 if (expr != ve->reg)
1948 emit_move_insn (ve->reg, expr);
1949 seq = get_insns ();
1950 end_sequence ();
b8698a0f 1951
f37a4f14
RE
1952 insn = BB_HEAD (place);
1953 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1954 insn = NEXT_INSN (insn);
1955
1956 emit_insn_after (seq, insn);
f37a4f14
RE
1957}
1958
4a0c3fde
SB
1959/* Strip away REG_EQUAL notes for IVs we're splitting.
1960
1961 Updating REG_EQUAL notes for IVs we split is tricky: We
1962 cannot tell until after unrolling, DF-rescanning, and liveness
1963 updating, whether an EQ_USE is reached by the split IV while
1964 the IV reg is still live. See PR55006.
1965
1966 ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1967 because RTL loop-iv requires us to defer rescanning insns and
1968 any notes attached to them. So resort to old techniques... */
1969
1970static void
95ecfb26 1971maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
4a0c3fde
SB
1972{
1973 struct iv_to_split *ivts;
1974 rtx note = find_reg_equal_equiv_note (insn);
1975 if (! note)
1976 return;
1977 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1978 if (reg_mentioned_p (ivts->orig_var, note))
1979 {
1980 remove_note (insn, note);
1981 return;
1982 }
1983}
1984
b8698a0f
L
1985/* Apply loop optimizations in loop copies using the
1986 data which gathered during the unrolling. Structure
f37a4f14 1987 OPT_INFO record that data.
b8698a0f 1988
113d659a
ZD
1989 UNROLLING is true if we unrolled (not peeled) the loop.
1990 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1991 the loop (as it should happen in complete unrolling, but not in ordinary
1992 peeling of the loop). */
1993
1994static void
b8698a0f
L
1995apply_opt_in_copies (struct opt_info *opt_info,
1996 unsigned n_copies, bool unrolling,
f37a4f14 1997 bool rewrite_original_loop)
113d659a
ZD
1998{
1999 unsigned i, delta;
2000 basic_block bb, orig_bb;
95ecfb26 2001 rtx_insn *insn, *orig_insn, *next;
113d659a 2002 struct iv_to_split ivts_templ, *ivts;
f37a4f14 2003 struct var_to_expand ve_templ, *ves;
b8698a0f 2004
113d659a
ZD
2005 /* Sanity check -- we need to put initialization in the original loop
2006 body. */
2007 gcc_assert (!unrolling || rewrite_original_loop);
b8698a0f 2008
113d659a 2009 /* Allocate the basic variables (i0). */
c203e8a7 2010 if (opt_info->insns_to_split)
a9f6ecee
AO
2011 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2012 allocate_basic_variable (ivts);
b8698a0f 2013
8b1c6fd7
DM
2014 for (i = opt_info->first_new_block;
2015 i < (unsigned) last_basic_block_for_fn (cfun);
2016 i++)
113d659a 2017 {
06e28de2 2018 bb = BASIC_BLOCK_FOR_FN (cfun, i);
6580ee77 2019 orig_bb = get_bb_original (bb);
b8698a0f 2020
6580ee77
JH
2021 /* bb->aux holds position in copy sequence initialized by
2022 duplicate_loop_to_header_edge. */
2023 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
113d659a 2024 unrolling);
7f7b1718 2025 bb->aux = 0;
113d659a 2026 orig_insn = BB_HEAD (orig_bb);
4a0c3fde 2027 FOR_BB_INSNS_SAFE (bb, insn, next)
f37a4f14 2028 {
0397b965 2029 if (!INSN_P (insn)
36f52e8f
AO
2030 || (DEBUG_BIND_INSN_P (insn)
2031 && INSN_VAR_LOCATION_DECL (insn)
0397b965 2032 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
f37a4f14 2033 continue;
b8698a0f 2034
0397b965 2035 while (!INSN_P (orig_insn)
36f52e8f
AO
2036 || (DEBUG_BIND_INSN_P (orig_insn)
2037 && INSN_VAR_LOCATION_DECL (orig_insn)
0397b965
JJ
2038 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2039 == LABEL_DECL)))
f37a4f14 2040 orig_insn = NEXT_INSN (orig_insn);
b8698a0f 2041
f37a4f14
RE
2042 ivts_templ.insn = orig_insn;
2043 ve_templ.insn = orig_insn;
b8698a0f 2044
f37a4f14 2045 /* Apply splitting iv optimization. */
c203e8a7 2046 if (opt_info->insns_to_split)
f37a4f14 2047 {
4a0c3fde
SB
2048 maybe_strip_eq_note_for_split_iv (opt_info, insn);
2049
c203e8a7 2050 ivts = opt_info->insns_to_split->find (&ivts_templ);
b8698a0f 2051
f37a4f14
RE
2052 if (ivts)
2053 {
21f868a2
ZD
2054 gcc_assert (GET_CODE (PATTERN (insn))
2055 == GET_CODE (PATTERN (orig_insn)));
b8698a0f 2056
f37a4f14
RE
2057 if (!delta)
2058 insert_base_initialization (ivts, insn);
2059 split_iv (ivts, insn, delta);
2060 }
2061 }
2062 /* Apply variable expansion optimization. */
c203e8a7 2063 if (unrolling && opt_info->insns_with_var_to_expand)
f37a4f14 2064 {
d3bfe4de 2065 ves = (struct var_to_expand *)
c203e8a7 2066 opt_info->insns_with_var_to_expand->find (&ve_templ);
f37a4f14 2067 if (ves)
b8698a0f 2068 {
21f868a2
ZD
2069 gcc_assert (GET_CODE (PATTERN (insn))
2070 == GET_CODE (PATTERN (orig_insn)));
f37a4f14
RE
2071 expand_var_during_unrolling (ves, insn);
2072 }
2073 }
2074 orig_insn = NEXT_INSN (orig_insn);
2075 }
113d659a
ZD
2076 }
2077
2078 if (!rewrite_original_loop)
2079 return;
b8698a0f 2080
f37a4f14 2081 /* Initialize the variable expansions in the loop preheader
b8698a0f 2082 and take care of combining them at the loop exit. */
c203e8a7 2083 if (opt_info->insns_with_var_to_expand)
f37a4f14 2084 {
a9f6ecee
AO
2085 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2086 insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2087 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2088 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
f37a4f14 2089 }
b8698a0f 2090
113d659a
ZD
2091 /* Rewrite also the original loop body. Find them as originals of the blocks
2092 in the last copied iteration, i.e. those that have
6580ee77 2093 get_bb_copy (get_bb_original (bb)) == bb. */
8b1c6fd7
DM
2094 for (i = opt_info->first_new_block;
2095 i < (unsigned) last_basic_block_for_fn (cfun);
2096 i++)
113d659a 2097 {
06e28de2 2098 bb = BASIC_BLOCK_FOR_FN (cfun, i);
6580ee77
JH
2099 orig_bb = get_bb_original (bb);
2100 if (get_bb_copy (orig_bb) != bb)
113d659a 2101 continue;
b8698a0f 2102
113d659a
ZD
2103 delta = determine_split_iv_delta (0, n_copies, unrolling);
2104 for (orig_insn = BB_HEAD (orig_bb);
f37a4f14
RE
2105 orig_insn != NEXT_INSN (BB_END (bb));
2106 orig_insn = next)
2107 {
2108 next = NEXT_INSN (orig_insn);
b8698a0f 2109
f37a4f14
RE
2110 if (!INSN_P (orig_insn))
2111 continue;
b8698a0f 2112
f37a4f14 2113 ivts_templ.insn = orig_insn;
c203e8a7 2114 if (opt_info->insns_to_split)
f37a4f14 2115 {
4a0c3fde
SB
2116 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2117
d3bfe4de 2118 ivts = (struct iv_to_split *)
c203e8a7 2119 opt_info->insns_to_split->find (&ivts_templ);
f37a4f14
RE
2120 if (ivts)
2121 {
2122 if (!delta)
2123 insert_base_initialization (ivts, orig_insn);
2124 split_iv (ivts, orig_insn, delta);
2125 continue;
2126 }
2127 }
b8698a0f 2128
f37a4f14
RE
2129 }
2130 }
2131}
113d659a 2132
f37a4f14 2133/* Release OPT_INFO. */
113d659a
ZD
2134
2135static void
f37a4f14 2136free_opt_info (struct opt_info *opt_info)
113d659a 2137{
c203e8a7
TS
2138 delete opt_info->insns_to_split;
2139 opt_info->insns_to_split = NULL;
2140 if (opt_info->insns_with_var_to_expand)
f37a4f14 2141 {
a9f6ecee
AO
2142 struct var_to_expand *ves;
2143
2144 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
9771b263 2145 ves->var_expansions.release ();
c203e8a7
TS
2146 delete opt_info->insns_with_var_to_expand;
2147 opt_info->insns_with_var_to_expand = NULL;
f37a4f14
RE
2148 }
2149 free (opt_info);
113d659a 2150}