]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-loop-interchange.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / gimple-loop-interchange.cc
CommitLineData
fbdec14e 1/* Loop interchange.
8d9254fc 2 Copyright (C) 2017-2020 Free Software Foundation, Inc.
fbdec14e
BC
3 Contributed by ARM Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 3, or (at your option) any
10later version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT
13ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "is-a.h"
26#include "tree.h"
27#include "gimple.h"
28#include "tree-pass.h"
29#include "ssa.h"
30#include "gimple-pretty-print.h"
31#include "fold-const.h"
32#include "gimplify.h"
33#include "gimple-iterator.h"
34#include "gimplify-me.h"
35#include "cfgloop.h"
fbdec14e
BC
36#include "tree-ssa.h"
37#include "tree-scalar-evolution.h"
38#include "tree-ssa-loop-manip.h"
39#include "tree-ssa-loop-niter.h"
40#include "tree-ssa-loop-ivopts.h"
41#include "tree-ssa-dce.h"
42#include "tree-data-ref.h"
43#include "tree-vectorizer.h"
44
45/* This pass performs loop interchange: for example, the loop nest
46
47 for (int j = 0; j < N; j++)
48 for (int k = 0; k < N; k++)
49 for (int i = 0; i < N; i++)
50 c[i][j] = c[i][j] + a[i][k]*b[k][j];
51
52 is transformed to
53
54 for (int i = 0; i < N; i++)
55 for (int j = 0; j < N; j++)
56 for (int k = 0; k < N; k++)
57 c[i][j] = c[i][j] + a[i][k]*b[k][j];
58
59 This pass implements loop interchange in the following steps:
60
61 1) Find perfect loop nest for each innermost loop and compute data
62 dependence relations for it. For above example, loop nest is
63 <loop_j, loop_k, loop_i>.
64 2) From innermost to outermost loop, this pass tries to interchange
65 each loop pair. For above case, it firstly tries to interchange
66 <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
67 Then it tries to interchange <loop_j, loop_i> and loop nest becomes
68 <loop_i, loop_j, loop_k>. The overall effect is to move innermost
69 loop to the outermost position. For loop pair <loop_i, loop_j>
70 to be interchanged, we:
71 3) Check if data dependence relations are valid for loop interchange.
72 4) Check if both loops can be interchanged in terms of transformation.
73 5) Check if interchanging the two loops is profitable.
74 6) Interchange the two loops by mapping induction variables.
75
76 This pass also handles reductions in loop nest. So far we only support
77 simple reduction of inner loop and double reduction of the loop nest. */
78
79/* Maximum number of stmts in each loop that should be interchanged. */
028d4092 80#define MAX_NUM_STMT (param_loop_interchange_max_num_stmts)
fbdec14e 81/* Maximum number of data references in loop nest. */
028d4092 82#define MAX_DATAREFS (param_loop_max_datarefs_for_datadeps)
fbdec14e
BC
83
84/* Comparison ratio of access stride between inner/outer loops to be
85 interchanged. This is the minimum stride ratio for loop interchange
86 to be profitable. */
028d4092 87#define OUTER_STRIDE_RATIO (param_loop_interchange_stride_ratio)
fbdec14e
BC
88/* The same as above, but we require higher ratio for interchanging the
89 innermost two loops. */
90#define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
91
1eeeda47
BC
92/* Comparison ratio of stmt cost between inner/outer loops. Loops won't
93 be interchanged if outer loop has too many stmts. */
94#define STMT_COST_RATIO (3)
95
fbdec14e
BC
96/* Vector of strides that DR accesses in each level loop of a loop nest. */
97#define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
98
99/* Structure recording loop induction variable. */
100typedef struct induction
101{
102 /* IV itself. */
103 tree var;
104 /* IV's initializing value, which is the init arg of the IV PHI node. */
105 tree init_val;
106 /* IV's initializing expr, which is (the expanded result of) init_val. */
107 tree init_expr;
108 /* IV's step. */
109 tree step;
110} *induction_p;
111
112/* Enum type for loop reduction variable. */
113enum reduction_type
114{
115 UNKNOWN_RTYPE = 0,
116 SIMPLE_RTYPE,
117 DOUBLE_RTYPE
118};
119
120/* Structure recording loop reduction variable. */
121typedef struct reduction
122{
123 /* Reduction itself. */
124 tree var;
125 /* PHI node defining reduction variable. */
126 gphi *phi;
127 /* Init and next variables of the reduction. */
128 tree init;
129 tree next;
130 /* Lcssa PHI node if reduction is used outside of its definition loop. */
131 gphi *lcssa_phi;
132 /* Stmts defining init and next. */
133 gimple *producer;
134 gimple *consumer;
135 /* If init is loaded from memory, this is the loading memory reference. */
136 tree init_ref;
137 /* If reduction is finally stored to memory, this is the stored memory
138 reference. */
139 tree fini_ref;
140 enum reduction_type type;
141} *reduction_p;
142
143
144/* Dump reduction RE. */
145
146static void
147dump_reduction (reduction_p re)
148{
149 if (re->type == SIMPLE_RTYPE)
150 fprintf (dump_file, " Simple reduction: ");
151 else if (re->type == DOUBLE_RTYPE)
152 fprintf (dump_file, " Double reduction: ");
153 else
154 fprintf (dump_file, " Unknown reduction: ");
155
156 print_gimple_stmt (dump_file, re->phi, 0);
157}
158
159/* Dump LOOP's induction IV. */
160static void
99b1c316 161dump_induction (class loop *loop, induction_p iv)
fbdec14e
BC
162{
163 fprintf (dump_file, " Induction: ");
164 print_generic_expr (dump_file, iv->var, TDF_SLIM);
165 fprintf (dump_file, " = {");
166 print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
167 fprintf (dump_file, ", ");
168 print_generic_expr (dump_file, iv->step, TDF_SLIM);
169 fprintf (dump_file, "}_%d\n", loop->num);
170}
171
172/* Loop candidate for interchange. */
173
6c1dae73 174class loop_cand
fbdec14e 175{
6c1dae73 176public:
99b1c316 177 loop_cand (class loop *, class loop *);
fbdec14e
BC
178 ~loop_cand ();
179
180 reduction_p find_reduction_by_stmt (gimple *);
181 void classify_simple_reduction (reduction_p);
182 bool analyze_iloop_reduction_var (tree);
183 bool analyze_oloop_reduction_var (loop_cand *, tree);
184 bool analyze_induction_var (tree, tree);
185 bool analyze_carried_vars (loop_cand *);
186 bool analyze_lcssa_phis (void);
187 bool can_interchange_p (loop_cand *);
fbdec14e
BC
188 void undo_simple_reduction (reduction_p, bitmap);
189
190 /* The loop itself. */
99b1c316 191 class loop *m_loop;
fbdec14e
BC
192 /* The outer loop for interchange. It equals to loop if this loop cand
193 itself represents the outer loop. */
99b1c316 194 class loop *m_outer;
fbdec14e
BC
195 /* Vector of induction variables in loop. */
196 vec<induction_p> m_inductions;
197 /* Vector of reduction variables in loop. */
198 vec<reduction_p> m_reductions;
199 /* Lcssa PHI nodes of this loop. */
200 vec<gphi *> m_lcssa_nodes;
201 /* Single exit edge of this loop. */
202 edge m_exit;
203 /* Basic blocks of this loop. */
204 basic_block *m_bbs;
1eeeda47
BC
205 /* Number of stmts of this loop. Inner loops' stmts are not included. */
206 int m_num_stmts;
207 /* Number of constant initialized simple reduction. */
208 int m_const_init_reduc;
fbdec14e
BC
209};
210
211/* Constructor. */
212
99b1c316 213loop_cand::loop_cand (class loop *loop, class loop *outer)
1eeeda47
BC
214 : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
215 m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
fbdec14e
BC
216{
217 m_inductions.create (3);
218 m_reductions.create (3);
219 m_lcssa_nodes.create (3);
220}
221
222/* Destructor. */
223
224loop_cand::~loop_cand ()
225{
226 induction_p iv;
227 for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
228 free (iv);
229
230 reduction_p re;
231 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
232 free (re);
233
234 m_inductions.release ();
235 m_reductions.release ();
236 m_lcssa_nodes.release ();
237 free (m_bbs);
238}
239
240/* Return single use stmt of VAR in LOOP, otherwise return NULL. */
241
242static gimple *
99b1c316 243single_use_in_loop (tree var, class loop *loop)
fbdec14e
BC
244{
245 gimple *stmt, *res = NULL;
246 use_operand_p use_p;
247 imm_use_iterator iterator;
248
249 FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
250 {
251 stmt = USE_STMT (use_p);
252 if (is_gimple_debug (stmt))
253 continue;
254
255 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
256 continue;
257
258 if (res)
259 return NULL;
260
261 res = stmt;
262 }
263 return res;
264}
265
266/* Return true if E is unsupported in loop interchange, i.e, E is a complex
267 edge or part of irreducible loop. */
268
269static inline bool
270unsupported_edge (edge e)
271{
272 return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
273}
274
275/* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
276 stmt. */
277
278reduction_p
279loop_cand::find_reduction_by_stmt (gimple *stmt)
280{
281 gphi *phi = dyn_cast <gphi *> (stmt);
282 reduction_p re;
283
284 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
285 if ((phi != NULL && phi == re->lcssa_phi)
286 || (stmt == re->producer || stmt == re->consumer))
287 return re;
288
289 return NULL;
290}
291
fbdec14e
BC
292/* Return true if current loop_cand be interchanged. ILOOP is not NULL if
293 current loop_cand is outer loop in loop nest. */
294
295bool
296loop_cand::can_interchange_p (loop_cand *iloop)
297{
298 /* For now we only support at most one reduction. */
299 unsigned allowed_reduction_num = 1;
300
301 /* Only support reduction if the loop nest to be interchanged is the
302 innermostin two loops. */
303 if ((iloop == NULL && m_loop->inner != NULL)
304 || (iloop != NULL && iloop->m_loop->inner != NULL))
305 allowed_reduction_num = 0;
306
307 if (m_reductions.length () > allowed_reduction_num
308 || (m_reductions.length () == 1
309 && m_reductions[0]->type == UNKNOWN_RTYPE))
310 return false;
311
312 /* Only support lcssa PHI node which is for reduction. */
313 if (m_lcssa_nodes.length () > allowed_reduction_num)
314 return false;
315
1eeeda47
BC
316 /* Check if basic block has any unsupported operation. Note basic blocks
317 of inner loops are not checked here. */
fbdec14e
BC
318 for (unsigned i = 0; i < m_loop->num_nodes; i++)
319 {
320 basic_block bb = m_bbs[i];
1eeeda47
BC
321 gphi_iterator psi;
322 gimple_stmt_iterator gsi;
fbdec14e
BC
323
324 /* Skip basic blocks of inner loops. */
325 if (bb->loop_father != m_loop)
1eeeda47 326 continue;
fbdec14e 327
1eeeda47
BC
328 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
329 {
330 gimple *stmt = gsi_stmt (gsi);
331 if (is_gimple_debug (stmt))
332 continue;
333
334 if (gimple_has_side_effects (stmt))
335 return false;
336
337 m_num_stmts++;
338 if (gcall *call = dyn_cast <gcall *> (stmt))
339 {
340 /* In basic block of outer loop, the call should be cheap since
341 it will be moved to inner loop. */
342 if (iloop != NULL
343 && !gimple_inexpensive_call_p (call))
344 return false;
345 continue;
346 }
347
348 if (!iloop || !gimple_vuse (stmt))
349 continue;
fbdec14e 350
1eeeda47
BC
351 /* Support stmt accessing memory in outer loop only if it is for
352 inner loop's reduction. */
353 if (iloop->find_reduction_by_stmt (stmt))
354 continue;
355
356 tree lhs;
357 /* Support loop invariant memory reference if it's only used once by
358 inner loop. */
359 /* ??? How's this checking for invariantness? */
360 if (gimple_assign_single_p (stmt)
361 && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
362 && TREE_CODE (lhs) == SSA_NAME
363 && single_use_in_loop (lhs, iloop->m_loop))
364 continue;
365
366 return false;
367 }
fbdec14e 368 /* Check if loop has too many stmts. */
1eeeda47 369 if (m_num_stmts > MAX_NUM_STMT)
fbdec14e 370 return false;
1eeeda47
BC
371
372 /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
373 loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
374 if (!iloop || bb == m_loop->header
375 || bb == iloop->m_exit->dest)
376 continue;
377
378 /* Don't allow any other PHI nodes. */
379 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
380 if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
381 return false;
fbdec14e
BC
382 }
383
384 return true;
385}
386
387/* Programmers and optimizers (like loop store motion) may optimize code:
388
389 for (int i = 0; i < N; i++)
390 for (int j = 0; j < N; j++)
391 a[i] += b[j][i] * c[j][i];
392
393 into reduction:
394
395 for (int i = 0; i < N; i++)
396 {
397 // producer. Note sum can be intitialized to a constant.
398 int sum = a[i];
399 for (int j = 0; j < N; j++)
400 {
401 sum += b[j][i] * c[j][i];
402 }
403 // consumer.
404 a[i] = sum;
405 }
406
407 The result code can't be interchanged without undoing the optimization.
408 This function classifies this kind reduction and records information so
409 that we can undo the store motion during interchange. */
410
411void
412loop_cand::classify_simple_reduction (reduction_p re)
413{
414 gimple *producer, *consumer;
415
416 /* Check init variable of reduction and how it is initialized. */
417 if (TREE_CODE (re->init) == SSA_NAME)
418 {
419 producer = SSA_NAME_DEF_STMT (re->init);
420 re->producer = producer;
421 basic_block bb = gimple_bb (producer);
422 if (!bb || bb->loop_father != m_outer)
423 return;
424
425 if (!gimple_assign_load_p (producer))
426 return;
427
428 re->init_ref = gimple_assign_rhs1 (producer);
429 }
1eeeda47
BC
430 else if (CONSTANT_CLASS_P (re->init))
431 m_const_init_reduc++;
432 else
fbdec14e
BC
433 return;
434
435 /* Check how reduction variable is used. */
436 consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
437 if (!consumer
438 || !gimple_store_p (consumer))
439 return;
440
441 re->fini_ref = gimple_get_lhs (consumer);
442 re->consumer = consumer;
443
444 /* Simple reduction with constant initializer. */
445 if (!re->init_ref)
446 {
447 gcc_assert (CONSTANT_CLASS_P (re->init));
448 re->init_ref = unshare_expr (re->fini_ref);
449 }
450
451 /* Require memory references in producer and consumer are the same so
452 that we can undo reduction during interchange. */
453 if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
454 return;
455
456 re->type = SIMPLE_RTYPE;
457}
458
459/* Analyze reduction variable VAR for inner loop of the loop nest to be
460 interchanged. Return true if analysis succeeds. */
461
462bool
463loop_cand::analyze_iloop_reduction_var (tree var)
464{
465 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
466 gphi *lcssa_phi = NULL, *use_phi;
467 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
468 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
469 reduction_p re;
470 gimple *stmt, *next_def, *single_use = NULL;
471 use_operand_p use_p;
472 imm_use_iterator iterator;
473
474 if (TREE_CODE (next) != SSA_NAME)
475 return false;
476
477 next_def = SSA_NAME_DEF_STMT (next);
478 basic_block bb = gimple_bb (next_def);
479 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
480 return false;
481
482 /* In restricted reduction, the var is (and must be) used in defining
483 the updated var. The process can be depicted as below:
484
485 var ;; = PHI<init, next>
486 |
487 |
488 v
489 +---------------------+
490 | reduction operators | <-- other operands
491 +---------------------+
492 |
493 |
494 v
495 next
496
497 In terms loop interchange, we don't change how NEXT is computed based
498 on VAR and OTHER OPERANDS. In case of double reduction in loop nest
499 to be interchanged, we don't changed it at all. In the case of simple
500 reduction in inner loop, we only make change how VAR/NEXT is loaded or
501 stored. With these conditions, we can relax restrictions on reduction
502 in a way that reduction operation is seen as black box. In general,
503 we can ignore reassociation of reduction operator; we can handle fake
504 reductions in which VAR is not even used to compute NEXT. */
505 if (! single_imm_use (var, &use_p, &single_use)
506 || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
507 return false;
508
509 /* Check the reduction operation. We require a left-associative operation.
510 For FP math we also need to be allowed to associate operations. */
511 if (gassign *ass = dyn_cast <gassign *> (single_use))
512 {
513 enum tree_code code = gimple_assign_rhs_code (ass);
514 if (! (associative_tree_code (code)
515 || (code == MINUS_EXPR
516 && use_p->use == gimple_assign_rhs1_ptr (ass)))
517 || (FLOAT_TYPE_P (TREE_TYPE (var))
518 && ! flag_associative_math))
519 return false;
520 }
521 else
522 return false;
523
524 /* Handle and verify a series of stmts feeding the reduction op. */
525 if (single_use != next_def
4f5b9c80 526 && !check_reduction_path (dump_user_location_t (), m_loop, phi, next,
fbdec14e
BC
527 gimple_assign_rhs_code (single_use)))
528 return false;
529
530 /* Only support cases in which INIT is used in inner loop. */
531 if (TREE_CODE (init) == SSA_NAME)
532 FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
533 {
534 stmt = USE_STMT (use_p);
535 if (is_gimple_debug (stmt))
536 continue;
537
538 if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
539 return false;
540 }
541
542 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
543 {
544 stmt = USE_STMT (use_p);
545 if (is_gimple_debug (stmt))
546 continue;
547
548 /* Or else it's used in PHI itself. */
549 use_phi = dyn_cast <gphi *> (stmt);
550 if (use_phi == phi)
551 continue;
552
553 if (use_phi != NULL
554 && lcssa_phi == NULL
555 && gimple_bb (stmt) == m_exit->dest
556 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
557 lcssa_phi = use_phi;
558 else
559 return false;
560 }
561 if (!lcssa_phi)
562 return false;
563
564 re = XCNEW (struct reduction);
565 re->var = var;
566 re->init = init;
567 re->next = next;
568 re->phi = phi;
569 re->lcssa_phi = lcssa_phi;
570
571 classify_simple_reduction (re);
572
573 if (dump_file && (dump_flags & TDF_DETAILS))
574 dump_reduction (re);
575
576 m_reductions.safe_push (re);
577 return true;
578}
579
580/* Analyze reduction variable VAR for outer loop of the loop nest to be
581 interchanged. ILOOP is not NULL and points to inner loop. For the
582 moment, we only support double reduction for outer loop, like:
583
584 for (int i = 0; i < n; i++)
585 {
586 int sum = 0;
587
588 for (int j = 0; j < n; j++) // outer loop
589 for (int k = 0; k < n; k++) // inner loop
590 sum += a[i][k]*b[k][j];
591
592 s[i] = sum;
593 }
594
595 Note the innermost two loops are the loop nest to be interchanged.
596 Return true if analysis succeeds. */
597
598bool
599loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
600{
601 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
602 gphi *lcssa_phi = NULL, *use_phi;
603 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
604 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
605 reduction_p re;
606 gimple *stmt, *next_def;
607 use_operand_p use_p;
608 imm_use_iterator iterator;
609
610 if (TREE_CODE (next) != SSA_NAME)
611 return false;
612
613 next_def = SSA_NAME_DEF_STMT (next);
614 basic_block bb = gimple_bb (next_def);
615 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
616 return false;
617
618 /* Find inner loop's simple reduction that uses var as initializer. */
619 reduction_p inner_re = NULL;
620 for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
621 if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
622 break;
623
624 if (inner_re == NULL
625 || inner_re->type != UNKNOWN_RTYPE
626 || inner_re->producer != phi)
627 return false;
628
629 /* In case of double reduction, outer loop's reduction should be updated
630 by inner loop's simple reduction. */
631 if (next_def != inner_re->lcssa_phi)
632 return false;
633
634 /* Outer loop's reduction should only be used to initialize inner loop's
635 simple reduction. */
636 if (! single_imm_use (var, &use_p, &stmt)
637 || stmt != inner_re->phi)
638 return false;
639
640 /* Check this reduction is correctly used outside of loop via lcssa phi. */
641 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
642 {
643 stmt = USE_STMT (use_p);
644 if (is_gimple_debug (stmt))
645 continue;
646
647 /* Or else it's used in PHI itself. */
648 use_phi = dyn_cast <gphi *> (stmt);
649 if (use_phi == phi)
650 continue;
651
652 if (lcssa_phi == NULL
653 && use_phi != NULL
654 && gimple_bb (stmt) == m_exit->dest
655 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
656 lcssa_phi = use_phi;
657 else
658 return false;
659 }
660 if (!lcssa_phi)
661 return false;
662
663 re = XCNEW (struct reduction);
664 re->var = var;
665 re->init = init;
666 re->next = next;
667 re->phi = phi;
668 re->lcssa_phi = lcssa_phi;
669 re->type = DOUBLE_RTYPE;
670 inner_re->type = DOUBLE_RTYPE;
671
672 if (dump_file && (dump_flags & TDF_DETAILS))
673 dump_reduction (re);
674
675 m_reductions.safe_push (re);
676 return true;
677}
678
679/* Return true if VAR is induction variable of current loop whose scev is
680 specified by CHREC. */
681
682bool
683loop_cand::analyze_induction_var (tree var, tree chrec)
684{
685 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
686 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
687
688 /* Var is loop invariant, though it's unlikely to happen. */
689 if (tree_does_not_contain_chrecs (chrec))
690 {
37aa6856
JJ
691 /* Punt on floating point invariants if honoring signed zeros,
692 representing that as + 0.0 would change the result if init
f359ba2f
JJ
693 is -0.0. Similarly for SNaNs it can raise exception. */
694 if (HONOR_SIGNED_ZEROS (chrec) || HONOR_SNANS (chrec))
37aa6856 695 return false;
fbdec14e
BC
696 struct induction *iv = XCNEW (struct induction);
697 iv->var = var;
698 iv->init_val = init;
699 iv->init_expr = chrec;
37aa6856 700 iv->step = build_zero_cst (TREE_TYPE (chrec));
fbdec14e
BC
701 m_inductions.safe_push (iv);
702 return true;
703 }
704
705 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
706 || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
707 || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
708 || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
709 return false;
710
711 struct induction *iv = XCNEW (struct induction);
712 iv->var = var;
713 iv->init_val = init;
714 iv->init_expr = CHREC_LEFT (chrec);
715 iv->step = CHREC_RIGHT (chrec);
716
717 if (dump_file && (dump_flags & TDF_DETAILS))
718 dump_induction (m_loop, iv);
719
720 m_inductions.safe_push (iv);
721 return true;
722}
723
724/* Return true if all loop carried variables defined in loop header can
725 be successfully analyzed. */
726
727bool
728loop_cand::analyze_carried_vars (loop_cand *iloop)
729{
730 edge e = loop_preheader_edge (m_outer);
731 gphi_iterator gsi;
732
733 if (dump_file && (dump_flags & TDF_DETAILS))
734 fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
735
736 for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
737 {
738 gphi *phi = gsi.phi ();
739
740 tree var = PHI_RESULT (phi);
741 if (virtual_operand_p (var))
742 continue;
743
744 tree chrec = analyze_scalar_evolution (m_loop, var);
745 chrec = instantiate_scev (e, m_loop, chrec);
746
747 /* Analyze var as reduction variable. */
748 if (chrec_contains_undetermined (chrec)
749 || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
750 {
751 if (iloop && !analyze_oloop_reduction_var (iloop, var))
752 return false;
753 if (!iloop && !analyze_iloop_reduction_var (var))
754 return false;
755 }
756 /* Analyze var as induction variable. */
757 else if (!analyze_induction_var (var, chrec))
758 return false;
759 }
760
761 return true;
762}
763
764/* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
765
766bool
767loop_cand::analyze_lcssa_phis (void)
768{
769 gphi_iterator gsi;
770 for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
771 {
772 gphi *phi = gsi.phi ();
773
774 if (virtual_operand_p (PHI_RESULT (phi)))
775 continue;
776
777 /* TODO: We only support lcssa phi for reduction for now. */
778 if (!find_reduction_by_stmt (phi))
779 return false;
780 }
781
782 return true;
783}
784
785/* CONSUMER is a stmt in BB storing reduction result into memory object.
786 When the reduction is intialized from constant value, we need to add
787 a stmt loading from the memory object to target basic block in inner
788 loop during undoing the reduction. Problem is that memory reference
789 may use ssa variables not dominating the target basic block. This
790 function finds all stmts on which CONSUMER depends in basic block BB,
791 records and returns them via STMTS. */
792
793static void
794find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
795{
796 auto_vec<gimple *, 4> worklist;
797 use_operand_p use_p;
798 ssa_op_iter iter;
799 gimple *stmt, *def_stmt;
800 gimple_stmt_iterator gsi;
801
802 /* First clear flag for stmts in bb. */
803 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
804 gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
805
806 /* DFS search all depended stmts in bb and mark flag for these stmts. */
807 worklist.safe_push (consumer);
808 while (!worklist.is_empty ())
809 {
810 stmt = worklist.pop ();
811 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
812 {
813 def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
814
815 if (is_a <gphi *> (def_stmt)
816 || gimple_bb (def_stmt) != bb
817 || gimple_plf (def_stmt, GF_PLF_1))
818 continue;
819
820 worklist.safe_push (def_stmt);
821 }
822 gimple_set_plf (stmt, GF_PLF_1, true);
823 }
65f4b875 824 for (gsi = gsi_start_nondebug_bb (bb);
fbdec14e
BC
825 !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
826 {
827 /* Move dep stmts to sequence STMTS. */
828 if (gimple_plf (stmt, GF_PLF_1))
829 {
830 gsi_remove (&gsi, false);
831 gimple_seq_add_stmt_without_update (stmts, stmt);
832 }
833 else
834 gsi_next_nondebug (&gsi);
835 }
836}
837
838/* User can write, optimizers can generate simple reduction RE for inner
839 loop. In order to make interchange valid, we have to undo reduction by
840 moving producer and consumer stmts into the inner loop. For example,
841 below code:
842
843 init = MEM_REF[idx]; //producer
844 loop:
845 var = phi<init, next>
846 next = var op ...
847 reduc_sum = phi<next>
848 MEM_REF[idx] = reduc_sum //consumer
849
850 is transformed into:
851
852 loop:
853 new_var = MEM_REF[idx]; //producer after moving
854 next = new_var op ...
855 MEM_REF[idx] = next; //consumer after moving
856
857 Note if the reduction variable is initialized to constant, like:
858
859 var = phi<0.0, next>
860
861 we compute new_var as below:
862
863 loop:
864 tmp = MEM_REF[idx];
865 new_var = !first_iteration ? tmp : 0.0;
866
867 so that the initial const is used in the first iteration of loop. Also
868 record ssa variables for dead code elimination in DCE_SEEDS. */
869
870void
871loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
872{
873 gimple *stmt;
874 gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
875 gimple_seq stmts = NULL;
876 tree new_var;
877
878 /* Prepare the initialization stmts and insert it to inner loop. */
879 if (re->producer != NULL)
880 {
881 gimple_set_vuse (re->producer, NULL_TREE);
882 from = gsi_for_stmt (re->producer);
883 gsi_remove (&from, false);
884 gimple_seq_add_stmt_without_update (&stmts, re->producer);
885 new_var = re->init;
886 }
887 else
888 {
889 /* Find all stmts on which expression "MEM_REF[idx]" depends. */
890 find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
891 /* Because we generate new stmt loading from the MEM_REF to TMP. */
892 tree cond, tmp = copy_ssa_name (re->var);
893 stmt = gimple_build_assign (tmp, re->init_ref);
894 gimple_seq_add_stmt_without_update (&stmts, stmt);
895
896 /* Init new_var to MEM_REF or CONST depending on if it is the first
897 iteration. */
898 induction_p iv = m_inductions[0];
899 cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
900 new_var = copy_ssa_name (re->var);
901 stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
902 gimple_seq_add_stmt_without_update (&stmts, stmt);
903 }
904 gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
905
906 /* Replace all uses of reduction var with new variable. */
907 use_operand_p use_p;
908 imm_use_iterator iterator;
909 FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
910 {
911 FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
912 SET_USE (use_p, new_var);
913
914 update_stmt (stmt);
915 }
916
917 /* Move consumer stmt into inner loop, just after reduction next's def. */
918 unlink_stmt_vdef (re->consumer);
919 release_ssa_name (gimple_vdef (re->consumer));
920 gimple_set_vdef (re->consumer, NULL_TREE);
921 gimple_set_vuse (re->consumer, NULL_TREE);
922 gimple_assign_set_rhs1 (re->consumer, re->next);
923 from = gsi_for_stmt (re->consumer);
924 to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
925 gsi_move_after (&from, &to);
926
927 /* Mark the reduction variables for DCE. */
928 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
929 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
930}
931
932/* Free DATAREFS and its auxiliary memory. */
933
934static void
935free_data_refs_with_aux (vec<data_reference_p> datarefs)
936{
937 data_reference_p dr;
938 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
939 if (dr->aux != NULL)
940 {
941 DR_ACCESS_STRIDE (dr)->release ();
46bb9d29 942 delete (vec<tree> *) dr->aux;
fbdec14e
BC
943 }
944
945 free_data_refs (datarefs);
946}
947
948/* Class for loop interchange transformation. */
949
950class tree_loop_interchange
951{
952public:
99b1c316 953 tree_loop_interchange (vec<class loop *> loop_nest)
fbdec14e
BC
954 : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
955 m_dce_seeds (BITMAP_ALLOC (NULL)) { }
956 ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
957 bool interchange (vec<data_reference_p>, vec<ddr_p>);
958
959private:
960 void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
961 bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
962 void interchange_loops (loop_cand &, loop_cand &);
963 void map_inductions_to_loop (loop_cand &, loop_cand &);
99b1c316 964 void move_code_to_inner_loop (class loop *, class loop *, basic_block *);
fbdec14e
BC
965
966 /* The whole loop nest in which interchange is ongoing. */
99b1c316 967 vec<class loop *> m_loop_nest;
fbdec14e
BC
968 /* We create new IV which is only used in loop's exit condition check.
969 In case of 3-level loop nest interchange, when we interchange the
970 innermost two loops, new IV created in the middle level loop does
971 not need to be preserved in interchanging the outermost two loops
972 later. We record the IV so that it can be skipped. */
973 tree m_niters_iv_var;
974 /* Bitmap of seed variables for dead code elimination after interchange. */
975 bitmap m_dce_seeds;
976};
977
978/* Update data refs' access stride and dependence information after loop
979 interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
980 nest. DATAREFS are data references. DDRS are data dependences. */
981
982void
983tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
984 vec<data_reference_p> datarefs,
985 vec<ddr_p> ddrs)
986{
987 struct data_reference *dr;
988 struct data_dependence_relation *ddr;
989
990 /* Update strides of data references. */
991 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
992 {
993 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
994 gcc_assert (stride->length () > i_idx);
995 std::swap ((*stride)[i_idx], (*stride)[o_idx]);
996 }
997 /* Update data dependences. */
998 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
999 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1000 {
1001 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1002 {
1003 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1004 std::swap (dist_vect[i_idx], dist_vect[o_idx]);
1005 }
1006 }
1007}
1008
1009/* Check data dependence relations, return TRUE if it's valid to interchange
1010 two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
1011 loops is valid only if dist vector, after interchanging, doesn't have '>'
1012 as the leftmost non-'=' direction. Practically, this function have been
1013 conservative here by not checking some valid cases. */
1014
1015bool
1016tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
1017 vec<ddr_p> ddrs)
1018{
1019 struct data_dependence_relation *ddr;
1020
1021 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1022 {
1023 /* Skip no-dependence case. */
1024 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1025 continue;
1026
1027 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1028 {
1029 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1030 unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
1031
1032 /* If there is no carried dependence. */
1033 if (level == 0)
1034 continue;
1035
1036 level --;
1037
1038 /* If dependence is not carried by any loop in between the two
1039 loops [oloop, iloop] to interchange. */
1040 if (level < o_idx || level > i_idx)
1041 continue;
1042
1043 /* Be conservative, skip case if either direction at i_idx/o_idx
1044 levels is not '=' or '<'. */
1045 if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
1046 return false;
1047 }
1048 }
1049
1050 return true;
1051}
1052
1053/* Interchange two loops specified by ILOOP and OLOOP. */
1054
1055void
1056tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
1057{
1058 reduction_p re;
1059 gimple_stmt_iterator gsi;
1060 tree i_niters, o_niters, var_after;
1061
1062 /* Undo inner loop's simple reduction. */
1063 for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
1064 if (re->type != DOUBLE_RTYPE)
1065 {
1066 if (re->producer)
1067 reset_debug_uses (re->producer);
1068
1069 iloop.undo_simple_reduction (re, m_dce_seeds);
1070 }
1071
1072 /* Only need to reset debug uses for double reduction. */
1073 for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
1074 {
1075 gcc_assert (re->type == DOUBLE_RTYPE);
1076 reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
1077 reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
1078 }
1079
1080 /* Prepare niters for both loops. */
99b1c316 1081 class loop *loop_nest = m_loop_nest[0];
fbdec14e
BC
1082 edge instantiate_below = loop_preheader_edge (loop_nest);
1083 gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
1084 i_niters = number_of_latch_executions (iloop.m_loop);
1085 i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
1086 i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
1087 i_niters);
1088 i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
1089 NULL_TREE, false, GSI_CONTINUE_LINKING);
1090 o_niters = number_of_latch_executions (oloop.m_loop);
1091 if (oloop.m_loop != loop_nest)
1092 {
1093 o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
1094 o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
1095 o_niters);
1096 }
1097 o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
1098 NULL_TREE, false, GSI_CONTINUE_LINKING);
1099
1100 /* Move src's code to tgt loop. This is necessary when src is the outer
1101 loop and tgt is the inner loop. */
1102 move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
1103
1104 /* Map outer loop's IV to inner loop, and vice versa. */
1105 map_inductions_to_loop (oloop, iloop);
1106 map_inductions_to_loop (iloop, oloop);
1107
1108 /* Create canonical IV for both loops. Note canonical IV for outer/inner
1109 loop is actually from inner/outer loop. Also we record the new IV
1110 created for the outer loop so that it can be skipped in later loop
1111 interchange. */
1112 create_canonical_iv (oloop.m_loop, oloop.m_exit,
1113 i_niters, &m_niters_iv_var, &var_after);
1114 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1115 create_canonical_iv (iloop.m_loop, iloop.m_exit,
1116 o_niters, NULL, &var_after);
1117 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1118
1119 /* Scrap niters estimation of interchanged loops. */
1120 iloop.m_loop->any_upper_bound = false;
1121 iloop.m_loop->any_likely_upper_bound = false;
1122 free_numbers_of_iterations_estimates (iloop.m_loop);
1123 oloop.m_loop->any_upper_bound = false;
1124 oloop.m_loop->any_likely_upper_bound = false;
1125 free_numbers_of_iterations_estimates (oloop.m_loop);
1126
4e090bcc
BC
1127 /* Clear all cached scev information. This is expensive but shouldn't be
1128 a problem given we interchange in very limited times. */
1129 scev_reset_htab ();
1130
fbdec14e
BC
1131 /* ??? The association between the loop data structure and the
1132 CFG changed, so what was loop N at the source level is now
1133 loop M. We should think of retaining the association or breaking
1134 it fully by creating a new loop instead of re-using the "wrong" one. */
1135}
1136
1137/* Map induction variables of SRC loop to TGT loop. The function firstly
1138 creates the same IV of SRC loop in TGT loop, then deletes the original
1139 IV and re-initialize it using the newly created IV. For example, loop
1140 nest:
1141
1142 for (i = 0; i < N; i++)
1143 for (j = 0; j < M; j++)
1144 {
1145 //use of i;
1146 //use of j;
1147 }
1148
1149 will be transformed into:
1150
1151 for (jj = 0; jj < M; jj++)
1152 for (ii = 0; ii < N; ii++)
1153 {
1154 //use of ii;
1155 //use of jj;
1156 }
1157
1158 after loop interchange. */
1159
1160void
1161tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
1162{
1163 induction_p iv;
1164 edge e = tgt.m_exit;
1165 gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
1166
1167 /* Map source loop's IV to target loop. */
1168 for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
1169 {
1170 gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
1171 gcc_assert (is_a <gphi *> (stmt));
1172
1173 use_operand_p use_p;
1174 /* Only map original IV to target loop. */
1175 if (m_niters_iv_var != iv->var)
1176 {
1177 /* Map the IV by creating the same one in target loop. */
1178 tree var_before, var_after;
1179 tree base = unshare_expr (iv->init_expr);
1180 tree step = unshare_expr (iv->step);
1181 create_iv (base, step, SSA_NAME_VAR (iv->var),
1182 tgt.m_loop, &incr_pos, false, &var_before, &var_after);
1183 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
1184 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1185
1186 /* Replace uses of the original IV var with newly created IV var. */
1187 imm_use_iterator imm_iter;
1188 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
1189 {
1190 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1191 SET_USE (use_p, var_before);
1192
1193 update_stmt (use_stmt);
1194 }
1195 }
1196
1197 /* Mark all uses for DCE. */
1198 ssa_op_iter op_iter;
1199 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
1200 {
1201 tree use = USE_FROM_PTR (use_p);
1202 if (TREE_CODE (use) == SSA_NAME
1203 && ! SSA_NAME_IS_DEFAULT_DEF (use))
1204 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
1205 }
1206
1207 /* Delete definition of the original IV in the source loop. */
1208 gsi = gsi_for_stmt (stmt);
1209 remove_phi_node (&gsi, true);
1210 }
1211}
1212
1213/* Move stmts of outer loop to inner loop. */
1214
1215void
99b1c316
MS
1216tree_loop_interchange::move_code_to_inner_loop (class loop *outer,
1217 class loop *inner,
fbdec14e
BC
1218 basic_block *outer_bbs)
1219{
1220 basic_block oloop_exit_bb = single_exit (outer)->src;
1221 gimple_stmt_iterator gsi, to;
1222
1223 for (unsigned i = 0; i < outer->num_nodes; i++)
1224 {
1225 basic_block bb = outer_bbs[i];
1226
1227 /* Skip basic blocks of inner loop. */
1228 if (flow_bb_inside_loop_p (inner, bb))
1229 continue;
1230
1231 /* Move code from header/latch to header/latch. */
1232 if (bb == outer->header)
1233 to = gsi_after_labels (inner->header);
1234 else if (bb == outer->latch)
1235 to = gsi_after_labels (inner->latch);
1236 else
1237 /* Otherwise, simply move to exit->src. */
1238 to = gsi_last_bb (single_exit (inner)->src);
1239
1240 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1241 {
1242 gimple *stmt = gsi_stmt (gsi);
1243
1244 if (oloop_exit_bb == bb
1245 && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
1246 {
1247 gsi_next (&gsi);
1248 continue;
1249 }
1250
1251 if (gimple_vuse (stmt))
1252 gimple_set_vuse (stmt, NULL_TREE);
1253 if (gimple_vdef (stmt))
1254 {
1255 unlink_stmt_vdef (stmt);
1256 release_ssa_name (gimple_vdef (stmt));
1257 gimple_set_vdef (stmt, NULL_TREE);
1258 }
1259
1260 reset_debug_uses (stmt);
1261 gsi_move_before (&gsi, &to);
1262 }
1263 }
1264}
1265
1266/* Given data reference DR in LOOP_NEST, the function computes DR's access
1267 stride at each level of loop from innermost LOOP to outer. On success,
1268 it saves access stride at each level loop in a vector which is pointed
1269 by DR->aux. For example:
1270
1271 int arr[100][100][100];
1272 for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
1273 for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
1274 for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
1275 arr[i][j - 1][k] = 0; */
1276
1277static void
99b1c316 1278compute_access_stride (class loop *loop_nest, class loop *loop,
fbdec14e
BC
1279 data_reference_p dr)
1280{
1281 vec<tree> *strides = new vec<tree> ();
1282 basic_block bb = gimple_bb (DR_STMT (dr));
1283
1284 while (!flow_bb_inside_loop_p (loop, bb))
1285 {
1286 strides->safe_push (build_int_cst (sizetype, 0));
1287 loop = loop_outer (loop);
1288 }
1289 gcc_assert (loop == bb->loop_father);
1290
1291 tree ref = DR_REF (dr);
df0f6bbb
JJ
1292 if (TREE_CODE (ref) == COMPONENT_REF
1293 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1294 {
1295 /* We can't take address of bitfields. If the bitfield is at constant
1296 offset from the start of the struct, just use address of the
1297 struct, for analysis of the strides that shouldn't matter. */
1298 if (!TREE_OPERAND (ref, 2)
1299 || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
1300 ref = TREE_OPERAND (ref, 0);
1301 /* Otherwise, if we have a bit field representative, use that. */
1302 else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
1303 != NULL_TREE)
1304 {
1305 tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
1306 ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
1307 repr, TREE_OPERAND (ref, 2));
1308 }
1309 /* Otherwise punt. */
1310 else
1311 {
1312 dr->aux = strides;
1313 return;
1314 }
1315 }
fbdec14e
BC
1316 tree scev_base = build_fold_addr_expr (ref);
1317 tree scev = analyze_scalar_evolution (loop, scev_base);
1318 scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
1319 if (! chrec_contains_undetermined (scev))
1320 {
1321 tree sl = scev;
99b1c316 1322 class loop *expected = loop;
fbdec14e
BC
1323 while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
1324 {
99b1c316 1325 class loop *sl_loop = get_chrec_loop (sl);
fbdec14e
BC
1326 while (sl_loop != expected)
1327 {
1328 strides->safe_push (size_int (0));
1329 expected = loop_outer (expected);
1330 }
1331 strides->safe_push (CHREC_RIGHT (sl));
1332 sl = CHREC_LEFT (sl);
1333 expected = loop_outer (expected);
1334 }
1335 if (! tree_contains_chrecs (sl, NULL))
1336 while (expected != loop_outer (loop_nest))
1337 {
1338 strides->safe_push (size_int (0));
1339 expected = loop_outer (expected);
1340 }
1341 }
1342
1343 dr->aux = strides;
1344}
1345
1346/* Given loop nest LOOP_NEST with innermost LOOP, the function computes
1347 access strides with respect to each level loop for all data refs in
1348 DATAREFS from inner loop to outer loop. On success, it returns the
1349 outermost loop that access strides can be computed successfully for
1350 all data references. If access strides cannot be computed at least
1351 for two levels of loop for any data reference, it returns NULL. */
1352
99b1c316
MS
1353static class loop *
1354compute_access_strides (class loop *loop_nest, class loop *loop,
fbdec14e
BC
1355 vec<data_reference_p> datarefs)
1356{
1357 unsigned i, j, num_loops = (unsigned) -1;
1358 data_reference_p dr;
1359 vec<tree> *stride;
1360
1361 for (i = 0; datarefs.iterate (i, &dr); ++i)
1362 {
1363 compute_access_stride (loop_nest, loop, dr);
1364 stride = DR_ACCESS_STRIDE (dr);
1365 if (stride->length () < num_loops)
1366 {
1367 num_loops = stride->length ();
1368 if (num_loops < 2)
1369 return NULL;
1370 }
1371 }
1372
1373 for (i = 0; datarefs.iterate (i, &dr); ++i)
1374 {
1375 stride = DR_ACCESS_STRIDE (dr);
1376 if (stride->length () > num_loops)
1377 stride->truncate (num_loops);
1378
1379 for (j = 0; j < (num_loops >> 1); ++j)
1380 std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
1381 }
1382
1383 loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
1384 gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
1385 return loop;
1386}
1387
1388/* Prune access strides for data references in DATAREFS by removing strides
1389 of loops that isn't in current LOOP_NEST. */
1390
1391static void
99b1c316
MS
1392prune_access_strides_not_in_loop (class loop *loop_nest,
1393 class loop *innermost,
fbdec14e
BC
1394 vec<data_reference_p> datarefs)
1395{
1396 data_reference_p dr;
1397 unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
1398 gcc_assert (num_loops > 1);
1399
1400 /* Block remove strides of loops that is not in current loop nest. */
1401 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1402 {
1403 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1404 if (stride->length () > num_loops)
1405 stride->block_remove (0, stride->length () - num_loops);
1406 }
1407}
1408
1409/* Dump access strides for all DATAREFS. */
1410
1411static void
1412dump_access_strides (vec<data_reference_p> datarefs)
1413{
1414 data_reference_p dr;
1415 fprintf (dump_file, "Access Strides for DRs:\n");
1416 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1417 {
1418 fprintf (dump_file, " ");
1419 print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
1420 fprintf (dump_file, ":\t\t<");
1421
1422 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1423 unsigned num_loops = stride->length ();
1424 for (unsigned j = 0; j < num_loops; ++j)
1425 {
1426 print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
1427 fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
1428 }
1429 }
1430}
1431
1432/* Return true if it's profitable to interchange two loops whose index
1433 in whole loop nest vector are I_IDX/O_IDX respectively. The function
1434 computes and compares three types information from all DATAREFS:
1435 1) Access stride for loop I_IDX and O_IDX.
1436 2) Number of invariant memory references with respect to I_IDX before
1437 and after loop interchange.
1438 3) Flags indicating if all memory references access sequential memory
1439 in ILOOP, before and after loop interchange.
1440 If INNMOST_LOOP_P is true, the two loops for interchanging are the two
1441 innermost loops in loop nest. This function also dumps information if
1442 DUMP_INFO_P is true. */
1443
1444static bool
1445should_interchange_loops (unsigned i_idx, unsigned o_idx,
1446 vec<data_reference_p> datarefs,
1eeeda47 1447 unsigned i_stmt_cost, unsigned o_stmt_cost,
fbdec14e
BC
1448 bool innermost_loops_p, bool dump_info_p = true)
1449{
1450 unsigned HOST_WIDE_INT ratio;
1451 unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
1452 struct data_reference *dr;
1453 bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
1454 widest_int iloop_strides = 0, oloop_strides = 0;
1455 unsigned num_unresolved_drs = 0;
1456 unsigned num_resolved_ok_drs = 0;
1457 unsigned num_resolved_not_ok_drs = 0;
1458
1459 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1460 fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
1461
1462 for (i = 0; datarefs.iterate (i, &dr); ++i)
1463 {
1464 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1465 tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
1466
1467 bool subloop_stride_p = false;
1468 /* Data ref can't be invariant or sequential access at current loop if
1469 its address changes with respect to any subloops. */
1470 for (j = i_idx + 1; j < stride->length (); ++j)
1471 if (!integer_zerop ((*stride)[j]))
1472 {
1473 subloop_stride_p = true;
1474 break;
1475 }
1476
1477 if (integer_zerop (iloop_stride))
1478 {
1479 if (!subloop_stride_p)
1480 num_old_inv_drs++;
1481 }
1482 if (integer_zerop (oloop_stride))
1483 {
1484 if (!subloop_stride_p)
1485 num_new_inv_drs++;
1486 }
1487
1488 if (TREE_CODE (iloop_stride) == INTEGER_CST
1489 && TREE_CODE (oloop_stride) == INTEGER_CST)
1490 {
1491 iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
1492 oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
1493 }
1494 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1495 iloop_stride, oloop_stride))
1496 num_resolved_ok_drs++;
1497 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1498 oloop_stride, iloop_stride))
1499 num_resolved_not_ok_drs++;
1500 else
1501 num_unresolved_drs++;
1502
1503 /* Data ref can't be sequential access if its address changes in sub
1504 loop. */
1505 if (subloop_stride_p)
1506 {
1507 all_seq_dr_before_p = false;
1508 all_seq_dr_after_p = false;
1509 continue;
1510 }
1511 /* Track if all data references are sequential accesses before/after loop
1512 interchange. Note invariant is considered sequential here. */
1513 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
1514 if (all_seq_dr_before_p
1515 && ! (integer_zerop (iloop_stride)
1516 || operand_equal_p (access_size, iloop_stride, 0)))
1517 all_seq_dr_before_p = false;
1518 if (all_seq_dr_after_p
1519 && ! (integer_zerop (oloop_stride)
1520 || operand_equal_p (access_size, oloop_stride, 0)))
1521 all_seq_dr_after_p = false;
1522 }
1523
1524 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1525 {
1526 fprintf (dump_file, "\toverall:\t\t");
1527 print_decu (iloop_strides, dump_file);
1528 fprintf (dump_file, "\t");
1529 print_decu (oloop_strides, dump_file);
1530 fprintf (dump_file, "\n");
1531
1532 fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
1533 num_old_inv_drs, num_new_inv_drs);
1534 fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
1535 all_seq_dr_before_p ? "true" : "false",
1536 all_seq_dr_after_p ? "true" : "false");
1537 fprintf (dump_file, "OK to interchage with variable strides: %d\n",
1538 num_resolved_ok_drs);
1539 fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
1540 num_resolved_not_ok_drs);
1541 fprintf (dump_file, "Variable strides we cannot decide: %d\n",
1542 num_unresolved_drs);
1eeeda47
BC
1543 fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
1544 fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
fbdec14e
BC
1545 }
1546
1547 if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
1548 return false;
1549
1eeeda47
BC
1550 /* Stmts of outer loop will be moved to inner loop. If there are two many
1551 such stmts, it could make inner loop costly. Here we compare stmt cost
1552 between outer and inner loops. */
1553 if (i_stmt_cost && o_stmt_cost
1554 && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
1555 && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
1556 return false;
1557
fbdec14e
BC
1558 /* We use different stride comparison ratio for interchanging innermost
1559 two loops or not. The idea is to be conservative in interchange for
1560 the innermost loops. */
1561 ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
1562 /* Do interchange if it gives better data locality behavior. */
1563 if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
1564 return true;
1565 if (wi::gtu_p (iloop_strides, oloop_strides))
1566 {
1567 /* Or it creates more invariant memory references. */
1568 if ((!all_seq_dr_before_p || all_seq_dr_after_p)
1569 && num_new_inv_drs > num_old_inv_drs)
1570 return true;
1571 /* Or it makes all memory references sequential. */
1572 if (num_new_inv_drs >= num_old_inv_drs
1573 && !all_seq_dr_before_p && all_seq_dr_after_p)
1574 return true;
1575 }
1576
1577 return false;
1578}
1579
1580/* Try to interchange inner loop of a loop nest to outer level. */
1581
1582bool
1583tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
1584 vec<ddr_p> ddrs)
1585{
4f5b9c80 1586 dump_user_location_t loc = find_loop_location (m_loop_nest[0]);
fbdec14e
BC
1587 bool changed_p = false;
1588 /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
1589 The overall effect is to push inner loop to outermost level in whole
1590 loop nest. */
1591 for (unsigned i = m_loop_nest.length (); i > 1; --i)
1592 {
1593 unsigned i_idx = i - 1, o_idx = i - 2;
1594
1595 /* Check validity for loop interchange. */
1596 if (!valid_data_dependences (i_idx, o_idx, ddrs))
1597 break;
1598
1599 loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
1600 loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
1601
1602 /* Check if we can do transformation for loop interchange. */
1603 if (!iloop.analyze_carried_vars (NULL)
1604 || !iloop.analyze_lcssa_phis ()
1605 || !oloop.analyze_carried_vars (&iloop)
1606 || !oloop.analyze_lcssa_phis ()
1607 || !iloop.can_interchange_p (NULL)
1608 || !oloop.can_interchange_p (&iloop))
1609 break;
1610
1eeeda47
BC
1611 /* Outer loop's stmts will be moved to inner loop during interchange.
1612 If there are many of them, it may make inner loop very costly. We
1613 need to check number of outer loop's stmts in profit cost model of
1614 interchange. */
1615 int stmt_cost = oloop.m_num_stmts;
1616 /* Count out the exit checking stmt of outer loop. */
1617 stmt_cost --;
1618 /* Count out IV's increasing stmt, IVOPTs takes care if it. */
1619 stmt_cost -= oloop.m_inductions.length ();
1620 /* Count in the additional load and cond_expr stmts caused by inner
1621 loop's constant initialized reduction. */
1622 stmt_cost += iloop.m_const_init_reduc * 2;
1623 if (stmt_cost < 0)
1624 stmt_cost = 0;
1625
fbdec14e
BC
1626 /* Check profitability for loop interchange. */
1627 if (should_interchange_loops (i_idx, o_idx, datarefs,
1eeeda47
BC
1628 (unsigned) iloop.m_num_stmts,
1629 (unsigned) stmt_cost,
fbdec14e
BC
1630 iloop.m_loop->inner == NULL))
1631 {
1632 if (dump_file && (dump_flags & TDF_DETAILS))
1633 fprintf (dump_file,
1634 "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
1635 oloop.m_loop->num, iloop.m_loop->num);
1636
1637 changed_p = true;
1638 interchange_loops (iloop, oloop);
1639 /* No need to update if there is no further loop interchange. */
1640 if (o_idx > 0)
1641 update_data_info (i_idx, o_idx, datarefs, ddrs);
1642 }
1643 else
1644 {
1645 if (dump_file && (dump_flags & TDF_DETAILS))
1646 fprintf (dump_file,
1647 "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
1648 oloop.m_loop->num, iloop.m_loop->num);
1649 }
1650 }
fbdec14e 1651 simple_dce_from_worklist (m_dce_seeds);
da472c1b 1652
bbeeac91 1653 if (changed_p && dump_enabled_p ())
da472c1b
RB
1654 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1655 "loops interchanged in loop nest\n");
1656
fbdec14e
BC
1657 return changed_p;
1658}
1659
1660
1661/* Loop interchange pass. */
1662
1663namespace {
1664
1665const pass_data pass_data_linterchange =
1666{
1667 GIMPLE_PASS, /* type */
1668 "linterchange", /* name */
1669 OPTGROUP_LOOP, /* optinfo_flags */
1670 TV_LINTERCHANGE, /* tv_id */
1671 PROP_cfg, /* properties_required */
1672 0, /* properties_provided */
1673 0, /* properties_destroyed */
1674 0, /* todo_flags_start */
1675 0, /* todo_flags_finish */
1676};
1677
1678class pass_linterchange : public gimple_opt_pass
1679{
1680public:
1681 pass_linterchange (gcc::context *ctxt)
1682 : gimple_opt_pass (pass_data_linterchange, ctxt)
1683 {}
1684
1685 /* opt_pass methods: */
1686 opt_pass * clone () { return new pass_linterchange (m_ctxt); }
1687 virtual bool gate (function *) { return flag_loop_interchange; }
1688 virtual unsigned int execute (function *);
1689
1690}; // class pass_linterchange
1691
1692
1693/* Return true if LOOP has proper form for interchange. We check three
1694 conditions in the function:
1695 1) In general, a loop can be interchanged only if it doesn't have
1696 basic blocks other than header, exit and latch besides possible
1697 inner loop nest. This basically restricts loop interchange to
1698 below form loop nests:
1699
1700 header<---+
1701 | |
1702 v |
1703 INNER_LOOP |
1704 | |
1705 v |
1706 exit--->latch
1707
1708 2) Data reference in basic block that executes in different times
1709 than loop head/exit is not allowed.
1710 3) Record the innermost outer loop that doesn't form rectangle loop
1711 nest with LOOP. */
1712
1713static bool
99b1c316 1714proper_loop_form_for_interchange (class loop *loop, class loop **min_outer)
fbdec14e
BC
1715{
1716 edge e0, e1, exit;
1717
1718 /* Don't interchange if loop has unsupported information for the moment. */
1719 if (loop->safelen > 0
1720 || loop->constraints != 0
1721 || loop->can_be_parallel
1722 || loop->dont_vectorize
1723 || loop->force_vectorize
1724 || loop->in_oacc_kernels_region
1725 || loop->orig_loop_num != 0
1726 || loop->simduid != NULL_TREE)
1727 return false;
1728
1729 /* Don't interchange if outer loop has basic block other than header, exit
1730 and latch. */
1731 if (loop->inner != NULL
1732 && loop->num_nodes != loop->inner->num_nodes + 3)
1733 return false;
1734
1735 if ((exit = single_dom_exit (loop)) == NULL)
1736 return false;
1737
1738 /* Check control flow on loop header/exit blocks. */
1739 if (loop->header == exit->src
1740 && (EDGE_COUNT (loop->header->preds) != 2
1741 || EDGE_COUNT (loop->header->succs) != 2))
1742 return false;
1743 else if (loop->header != exit->src
1744 && (EDGE_COUNT (loop->header->preds) != 2
1745 || !single_succ_p (loop->header)
1746 || unsupported_edge (single_succ_edge (loop->header))
1747 || EDGE_COUNT (exit->src->succs) != 2
1748 || !single_pred_p (exit->src)
1749 || unsupported_edge (single_pred_edge (exit->src))))
1750 return false;
1751
1752 e0 = EDGE_PRED (loop->header, 0);
1753 e1 = EDGE_PRED (loop->header, 1);
1754 if (unsupported_edge (e0) || unsupported_edge (e1)
1755 || (e0->src != loop->latch && e1->src != loop->latch)
1756 || (e0->src->loop_father == loop && e1->src->loop_father == loop))
1757 return false;
1758
1759 e0 = EDGE_SUCC (exit->src, 0);
1760 e1 = EDGE_SUCC (exit->src, 1);
1761 if (unsupported_edge (e0) || unsupported_edge (e1)
1762 || (e0->dest != loop->latch && e1->dest != loop->latch)
1763 || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
1764 return false;
1765
1766 /* Don't interchange if any reference is in basic block that doesn't
1767 dominate exit block. */
1768 basic_block *bbs = get_loop_body (loop);
1769 for (unsigned i = 0; i < loop->num_nodes; i++)
1770 {
1771 basic_block bb = bbs[i];
1772
1773 if (bb->loop_father != loop
1774 || bb == loop->header || bb == exit->src
1775 || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
1776 continue;
1777
65f4b875 1778 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
fbdec14e
BC
1779 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
1780 if (gimple_vuse (gsi_stmt (gsi)))
1781 {
1782 free (bbs);
1783 return false;
1784 }
1785 }
1786 free (bbs);
1787
1788 tree niters = number_of_latch_executions (loop);
1789 niters = analyze_scalar_evolution (loop_outer (loop), niters);
1790 if (!niters || chrec_contains_undetermined (niters))
1791 return false;
1792
1793 /* Record the innermost outer loop that doesn't form rectangle loop nest. */
1794 for (loop_p loop2 = loop_outer (loop);
1795 loop2 && flow_loop_nested_p (*min_outer, loop2);
1796 loop2 = loop_outer (loop2))
1797 {
1798 niters = instantiate_scev (loop_preheader_edge (loop2),
1799 loop_outer (loop), niters);
1800 if (!evolution_function_is_invariant_p (niters, loop2->num))
1801 {
1802 *min_outer = loop2;
1803 break;
1804 }
1805 }
1806 return true;
1807}
1808
1809/* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
1810 should be interchanged by looking into all DATAREFS. */
1811
1812static bool
99b1c316 1813should_interchange_loop_nest (class loop *loop_nest, class loop *innermost,
fbdec14e
BC
1814 vec<data_reference_p> datarefs)
1815{
1816 unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
1817 gcc_assert (idx > 0);
1818
1819 /* Check if any two adjacent loops should be interchanged. */
99b1c316 1820 for (class loop *loop = innermost;
fbdec14e 1821 loop != loop_nest; loop = loop_outer (loop), idx--)
1eeeda47 1822 if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
fbdec14e
BC
1823 loop == innermost, false))
1824 return true;
1825
1826 return false;
1827}
1828
1829/* Given loop nest LOOP_NEST and data references DATAREFS, compute data
1830 dependences for loop interchange and store it in DDRS. Note we compute
1831 dependences directly rather than call generic interface so that we can
1832 return on unknown dependence instantly. */
1833
1834static bool
1835tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
1836 vec<data_reference_p> datarefs,
1837 vec<ddr_p> *ddrs)
1838{
1839 struct data_reference *a, *b;
99b1c316 1840 class loop *innermost = loop_nest.last ();
fbdec14e
BC
1841
1842 for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
1843 {
1844 bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
1845 for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
1846 if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
1847 {
1848 bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
1849 /* Don't support multiple write references in outer loop. */
1850 if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
1851 return false;
1852
1853 ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
1854 ddrs->safe_push (ddr);
1855 compute_affine_dependence (ddr, loop_nest[0]);
1856
1857 /* Give up if ddr is unknown dependence or classic direct vector
1858 is not available. */
1859 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1860 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
1861 && DDR_NUM_DIR_VECTS (ddr) == 0))
1862 return false;
1863
1864 /* If either data references is in outer loop of nest, we require
1865 no dependence here because the data reference need to be moved
1866 into inner loop during interchange. */
1867 if (a_outer_p && b_outer_p
1868 && operand_equal_p (DR_REF (a), DR_REF (b), 0))
1869 continue;
1870 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1871 && (a_outer_p || b_outer_p))
1872 return false;
1873 }
1874 }
1875
1876 return true;
1877}
1878
1879/* Prune DATAREFS by removing any data reference not inside of LOOP. */
1880
1881static inline void
99b1c316 1882prune_datarefs_not_in_loop (class loop *loop, vec<data_reference_p> datarefs)
fbdec14e
BC
1883{
1884 unsigned i, j;
1885 struct data_reference *dr;
1886
1887 for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
1888 {
1889 if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
1890 datarefs[j++] = dr;
1891 else
1892 {
1893 if (dr->aux)
1894 {
1895 DR_ACCESS_STRIDE (dr)->release ();
46bb9d29 1896 delete (vec<tree> *) dr->aux;
fbdec14e
BC
1897 }
1898 free_data_ref (dr);
1899 }
1900 }
1901 datarefs.truncate (j);
1902}
1903
1904/* Find and store data references in DATAREFS for LOOP nest. If there's
1905 difficult data reference in a basic block, we shrink the loop nest to
1906 inner loop of that basic block's father loop. On success, return the
1907 outer loop of the result loop nest. */
1908
99b1c316
MS
1909static class loop *
1910prepare_data_references (class loop *loop, vec<data_reference_p> *datarefs)
fbdec14e 1911{
99b1c316 1912 class loop *loop_nest = loop;
fbdec14e
BC
1913 vec<data_reference_p> *bb_refs;
1914 basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
1915
1916 for (unsigned i = 0; i < loop->num_nodes; i++)
1917 bbs[i]->aux = NULL;
1918
1919 /* Find data references for all basic blocks. Shrink loop nest on difficult
1920 data reference. */
1921 for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
1922 {
1923 bb = bbs[i];
1924 if (!flow_bb_inside_loop_p (loop_nest, bb))
1925 continue;
1926
1927 bb_refs = new vec<data_reference_p> ();
1928 if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
1929 {
1930 loop_nest = bb->loop_father->inner;
1931 if (loop_nest && !loop_nest->inner)
1932 loop_nest = NULL;
1933
1934 free_data_refs (*bb_refs);
1935 delete bb_refs;
1936 }
1937 else if (bb_refs->is_empty ())
1938 delete bb_refs;
1939 else
1940 bb->aux = bb_refs;
1941 }
1942
1943 /* Collect all data references in loop nest. */
1944 for (unsigned i = 0; i < loop->num_nodes; i++)
1945 {
1946 bb = bbs[i];
1947 if (!bb->aux)
1948 continue;
1949
1950 bb_refs = (vec<data_reference_p> *) bb->aux;
1951 if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
1952 datarefs->safe_splice (*bb_refs);
1953 else
1954 free_data_refs (*bb_refs);
1955
1956 delete bb_refs;
1957 bb->aux = NULL;
1958 }
1959 free (bbs);
1960
1961 return loop_nest;
1962}
1963
1964/* Given innermost LOOP, return true if perfect loop nest can be found and
1965 data dependences can be computed. If succeed, record the perfect loop
1966 nest in LOOP_NEST; record all data references in DATAREFS and record all
1967 data dependence relations in DDRS.
1968
1969 We do support a restricted form of imperfect loop nest, i.e, loop nest
1970 with load/store in outer loop initializing/finalizing simple reduction
1971 of the innermost loop. For such outer loop reference, we require that
1972 it has no dependence with others sinve it will be moved to inner loop
1973 in interchange. */
1974
1975static bool
99b1c316 1976prepare_perfect_loop_nest (class loop *loop, vec<loop_p> *loop_nest,
fbdec14e
BC
1977 vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
1978{
99b1c316
MS
1979 class loop *start_loop = NULL, *innermost = loop;
1980 class loop *outermost = loops_for_fn (cfun)->tree_root;
fbdec14e
BC
1981
1982 /* Find loop nest from the innermost loop. The outermost is the innermost
1983 outer*/
1984 while (loop->num != 0 && loop->inner == start_loop
1985 && flow_loop_nested_p (outermost, loop))
1986 {
1987 if (!proper_loop_form_for_interchange (loop, &outermost))
1988 break;
1989
1990 start_loop = loop;
1991 /* If this loop has sibling loop, the father loop won't be in perfect
1992 loop nest. */
1993 if (loop->next != NULL)
1994 break;
1995
1996 loop = loop_outer (loop);
1997 }
1998 if (!start_loop || !start_loop->inner)
1999 return false;
2000
2001 /* Prepare the data reference vector for the loop nest, pruning outer
2002 loops we cannot handle. */
2003 start_loop = prepare_data_references (start_loop, datarefs);
2004 if (!start_loop
2005 /* Check if there is no data reference. */
2006 || datarefs->is_empty ()
2007 /* Check if there are too many of data references. */
2008 || (int) datarefs->length () > MAX_DATAREFS)
2009 return false;
2010
2011 /* Compute access strides for all data references, pruning outer
2012 loops we cannot analyze refs in. */
2013 start_loop = compute_access_strides (start_loop, innermost, *datarefs);
2014 if (!start_loop)
2015 return false;
2016
2017 /* Check if any interchange is profitable in the loop nest. */
2018 if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
2019 return false;
2020
2021 /* Check if data dependences can be computed for loop nest starting from
2022 start_loop. */
2023 loop = start_loop;
2024 do {
2025 loop_nest->truncate (0);
2026
2027 if (loop != start_loop)
2028 prune_datarefs_not_in_loop (start_loop, *datarefs);
2029
2030 if (find_loop_nest (start_loop, loop_nest)
2031 && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
2032 {
2033 if (dump_file && (dump_flags & TDF_DETAILS))
2034 fprintf (dump_file,
2035 "\nConsider loop interchange for loop_nest<%d - %d>\n",
2036 start_loop->num, innermost->num);
2037
2038 if (loop != start_loop)
2039 prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
2040
2041 if (dump_file && (dump_flags & TDF_DETAILS))
2042 dump_access_strides (*datarefs);
2043
2044 return true;
2045 }
2046
2047 free_dependence_relations (*ddrs);
2048 *ddrs = vNULL;
2049 /* Try to compute data dependences with the outermost loop stripped. */
2050 loop = start_loop;
2051 start_loop = start_loop->inner;
2052 } while (start_loop && start_loop->inner);
2053
2054 return false;
2055}
2056
2057/* Main entry for loop interchange pass. */
2058
2059unsigned int
2060pass_linterchange::execute (function *fun)
2061{
2062 if (number_of_loops (fun) <= 2)
2063 return 0;
2064
2065 bool changed_p = false;
99b1c316 2066 class loop *loop;
fbdec14e
BC
2067 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2068 {
2069 vec<loop_p> loop_nest = vNULL;
2070 vec<data_reference_p> datarefs = vNULL;
2071 vec<ddr_p> ddrs = vNULL;
2072 if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
2073 {
2074 tree_loop_interchange loop_interchange (loop_nest);
2075 changed_p |= loop_interchange.interchange (datarefs, ddrs);
2076 }
2077 free_dependence_relations (ddrs);
2078 free_data_refs_with_aux (datarefs);
2079 loop_nest.release ();
2080 }
2081
fbdec14e
BC
2082 return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
2083}
2084
2085} // anon namespace
2086
2087gimple_opt_pass *
2088make_pass_linterchange (gcc::context *ctxt)
2089{
2090 return new pass_linterchange (ctxt);
2091}