]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/gimple-loop-interchange.cc
Update copyright years.
[thirdparty/gcc.git] / gcc / gimple-loop-interchange.cc
1 /* Loop interchange.
2 Copyright (C) 2017-2021 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "is-a.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "gimple-pretty-print.h"
31 #include "fold-const.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimplify-me.h"
35 #include "cfgloop.h"
36 #include "tree-ssa.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-ssa-loop-manip.h"
39 #include "tree-ssa-loop-niter.h"
40 #include "tree-ssa-loop-ivopts.h"
41 #include "tree-ssa-dce.h"
42 #include "tree-data-ref.h"
43 #include "tree-vectorizer.h"
44
45 /* This pass performs loop interchange: for example, the loop nest
46
47 for (int j = 0; j < N; j++)
48 for (int k = 0; k < N; k++)
49 for (int i = 0; i < N; i++)
50 c[i][j] = c[i][j] + a[i][k]*b[k][j];
51
52 is transformed to
53
54 for (int i = 0; i < N; i++)
55 for (int j = 0; j < N; j++)
56 for (int k = 0; k < N; k++)
57 c[i][j] = c[i][j] + a[i][k]*b[k][j];
58
59 This pass implements loop interchange in the following steps:
60
61 1) Find perfect loop nest for each innermost loop and compute data
62 dependence relations for it. For above example, loop nest is
63 <loop_j, loop_k, loop_i>.
64 2) From innermost to outermost loop, this pass tries to interchange
65 each loop pair. For above case, it firstly tries to interchange
66 <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
67 Then it tries to interchange <loop_j, loop_i> and loop nest becomes
68 <loop_i, loop_j, loop_k>. The overall effect is to move innermost
69 loop to the outermost position. For loop pair <loop_i, loop_j>
70 to be interchanged, we:
71 3) Check if data dependence relations are valid for loop interchange.
72 4) Check if both loops can be interchanged in terms of transformation.
73 5) Check if interchanging the two loops is profitable.
74 6) Interchange the two loops by mapping induction variables.
75
76 This pass also handles reductions in loop nest. So far we only support
77 simple reduction of inner loop and double reduction of the loop nest. */
78
79 /* Maximum number of stmts in each loop that should be interchanged. */
80 #define MAX_NUM_STMT (param_loop_interchange_max_num_stmts)
81 /* Maximum number of data references in loop nest. */
82 #define MAX_DATAREFS (param_loop_max_datarefs_for_datadeps)
83
84 /* Comparison ratio of access stride between inner/outer loops to be
85 interchanged. This is the minimum stride ratio for loop interchange
86 to be profitable. */
87 #define OUTER_STRIDE_RATIO (param_loop_interchange_stride_ratio)
88 /* The same as above, but we require higher ratio for interchanging the
89 innermost two loops. */
90 #define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
91
92 /* Comparison ratio of stmt cost between inner/outer loops. Loops won't
93 be interchanged if outer loop has too many stmts. */
94 #define STMT_COST_RATIO (3)
95
96 /* Vector of strides that DR accesses in each level loop of a loop nest. */
97 #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
98
99 /* Structure recording loop induction variable. */
100 typedef struct induction
101 {
102 /* IV itself. */
103 tree var;
104 /* IV's initializing value, which is the init arg of the IV PHI node. */
105 tree init_val;
106 /* IV's initializing expr, which is (the expanded result of) init_val. */
107 tree init_expr;
108 /* IV's step. */
109 tree step;
110 } *induction_p;
111
112 /* Enum type for loop reduction variable. */
113 enum reduction_type
114 {
115 UNKNOWN_RTYPE = 0,
116 SIMPLE_RTYPE,
117 DOUBLE_RTYPE
118 };
119
120 /* Structure recording loop reduction variable. */
121 typedef struct reduction
122 {
123 /* Reduction itself. */
124 tree var;
125 /* PHI node defining reduction variable. */
126 gphi *phi;
127 /* Init and next variables of the reduction. */
128 tree init;
129 tree next;
130 /* Lcssa PHI node if reduction is used outside of its definition loop. */
131 gphi *lcssa_phi;
132 /* Stmts defining init and next. */
133 gimple *producer;
134 gimple *consumer;
135 /* If init is loaded from memory, this is the loading memory reference. */
136 tree init_ref;
137 /* If reduction is finally stored to memory, this is the stored memory
138 reference. */
139 tree fini_ref;
140 enum reduction_type type;
141 } *reduction_p;
142
143
144 /* Dump reduction RE. */
145
146 static void
147 dump_reduction (reduction_p re)
148 {
149 if (re->type == SIMPLE_RTYPE)
150 fprintf (dump_file, " Simple reduction: ");
151 else if (re->type == DOUBLE_RTYPE)
152 fprintf (dump_file, " Double reduction: ");
153 else
154 fprintf (dump_file, " Unknown reduction: ");
155
156 print_gimple_stmt (dump_file, re->phi, 0);
157 }
158
159 /* Dump LOOP's induction IV. */
160 static void
161 dump_induction (class loop *loop, induction_p iv)
162 {
163 fprintf (dump_file, " Induction: ");
164 print_generic_expr (dump_file, iv->var, TDF_SLIM);
165 fprintf (dump_file, " = {");
166 print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
167 fprintf (dump_file, ", ");
168 print_generic_expr (dump_file, iv->step, TDF_SLIM);
169 fprintf (dump_file, "}_%d\n", loop->num);
170 }
171
172 /* Loop candidate for interchange. */
173
174 class loop_cand
175 {
176 public:
177 loop_cand (class loop *, class loop *);
178 ~loop_cand ();
179
180 reduction_p find_reduction_by_stmt (gimple *);
181 void classify_simple_reduction (reduction_p);
182 bool analyze_iloop_reduction_var (tree);
183 bool analyze_oloop_reduction_var (loop_cand *, tree);
184 bool analyze_induction_var (tree, tree);
185 bool analyze_carried_vars (loop_cand *);
186 bool analyze_lcssa_phis (void);
187 bool can_interchange_p (loop_cand *);
188 void undo_simple_reduction (reduction_p, bitmap);
189
190 /* The loop itself. */
191 class loop *m_loop;
192 /* The outer loop for interchange. It equals to loop if this loop cand
193 itself represents the outer loop. */
194 class loop *m_outer;
195 /* Vector of induction variables in loop. */
196 vec<induction_p> m_inductions;
197 /* Vector of reduction variables in loop. */
198 vec<reduction_p> m_reductions;
199 /* Lcssa PHI nodes of this loop. */
200 vec<gphi *> m_lcssa_nodes;
201 /* Single exit edge of this loop. */
202 edge m_exit;
203 /* Basic blocks of this loop. */
204 basic_block *m_bbs;
205 /* Number of stmts of this loop. Inner loops' stmts are not included. */
206 int m_num_stmts;
207 /* Number of constant initialized simple reduction. */
208 int m_const_init_reduc;
209 };
210
211 /* Constructor. */
212
213 loop_cand::loop_cand (class loop *loop, class loop *outer)
214 : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
215 m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
216 {
217 m_inductions.create (3);
218 m_reductions.create (3);
219 m_lcssa_nodes.create (3);
220 }
221
222 /* Destructor. */
223
224 loop_cand::~loop_cand ()
225 {
226 induction_p iv;
227 for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
228 free (iv);
229
230 reduction_p re;
231 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
232 free (re);
233
234 m_inductions.release ();
235 m_reductions.release ();
236 m_lcssa_nodes.release ();
237 free (m_bbs);
238 }
239
240 /* Return single use stmt of VAR in LOOP, otherwise return NULL. */
241
242 static gimple *
243 single_use_in_loop (tree var, class loop *loop)
244 {
245 gimple *stmt, *res = NULL;
246 use_operand_p use_p;
247 imm_use_iterator iterator;
248
249 FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
250 {
251 stmt = USE_STMT (use_p);
252 if (is_gimple_debug (stmt))
253 continue;
254
255 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
256 continue;
257
258 if (res)
259 return NULL;
260
261 res = stmt;
262 }
263 return res;
264 }
265
266 /* Return true if E is unsupported in loop interchange, i.e, E is a complex
267 edge or part of irreducible loop. */
268
269 static inline bool
270 unsupported_edge (edge e)
271 {
272 return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
273 }
274
275 /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
276 stmt. */
277
278 reduction_p
279 loop_cand::find_reduction_by_stmt (gimple *stmt)
280 {
281 gphi *phi = dyn_cast <gphi *> (stmt);
282 reduction_p re;
283
284 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
285 if ((phi != NULL && phi == re->lcssa_phi)
286 || (stmt == re->producer || stmt == re->consumer))
287 return re;
288
289 return NULL;
290 }
291
292 /* Return true if current loop_cand be interchanged. ILOOP is not NULL if
293 current loop_cand is outer loop in loop nest. */
294
295 bool
296 loop_cand::can_interchange_p (loop_cand *iloop)
297 {
298 /* For now we only support at most one reduction. */
299 unsigned allowed_reduction_num = 1;
300
301 /* Only support reduction if the loop nest to be interchanged is the
302 innermostin two loops. */
303 if ((iloop == NULL && m_loop->inner != NULL)
304 || (iloop != NULL && iloop->m_loop->inner != NULL))
305 allowed_reduction_num = 0;
306
307 if (m_reductions.length () > allowed_reduction_num
308 || (m_reductions.length () == 1
309 && m_reductions[0]->type == UNKNOWN_RTYPE))
310 return false;
311
312 /* Only support lcssa PHI node which is for reduction. */
313 if (m_lcssa_nodes.length () > allowed_reduction_num)
314 return false;
315
316 /* Check if basic block has any unsupported operation. Note basic blocks
317 of inner loops are not checked here. */
318 for (unsigned i = 0; i < m_loop->num_nodes; i++)
319 {
320 basic_block bb = m_bbs[i];
321 gphi_iterator psi;
322 gimple_stmt_iterator gsi;
323
324 /* Skip basic blocks of inner loops. */
325 if (bb->loop_father != m_loop)
326 continue;
327
328 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
329 {
330 gimple *stmt = gsi_stmt (gsi);
331 if (is_gimple_debug (stmt))
332 continue;
333
334 if (gimple_has_side_effects (stmt))
335 return false;
336
337 m_num_stmts++;
338 if (gcall *call = dyn_cast <gcall *> (stmt))
339 {
340 /* In basic block of outer loop, the call should be cheap since
341 it will be moved to inner loop. */
342 if (iloop != NULL
343 && !gimple_inexpensive_call_p (call))
344 return false;
345 continue;
346 }
347
348 if (!iloop || !gimple_vuse (stmt))
349 continue;
350
351 /* Support stmt accessing memory in outer loop only if it is for
352 inner loop's reduction. */
353 if (iloop->find_reduction_by_stmt (stmt))
354 continue;
355
356 tree lhs;
357 /* Support loop invariant memory reference if it's only used once by
358 inner loop. */
359 /* ??? How's this checking for invariantness? */
360 if (gimple_assign_single_p (stmt)
361 && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
362 && TREE_CODE (lhs) == SSA_NAME
363 && single_use_in_loop (lhs, iloop->m_loop))
364 continue;
365
366 return false;
367 }
368 /* Check if loop has too many stmts. */
369 if (m_num_stmts > MAX_NUM_STMT)
370 return false;
371
372 /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
373 loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
374 if (!iloop || bb == m_loop->header
375 || bb == iloop->m_exit->dest)
376 continue;
377
378 /* Don't allow any other PHI nodes. */
379 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
380 if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
381 return false;
382 }
383
384 return true;
385 }
386
387 /* Programmers and optimizers (like loop store motion) may optimize code:
388
389 for (int i = 0; i < N; i++)
390 for (int j = 0; j < N; j++)
391 a[i] += b[j][i] * c[j][i];
392
393 into reduction:
394
395 for (int i = 0; i < N; i++)
396 {
397 // producer. Note sum can be intitialized to a constant.
398 int sum = a[i];
399 for (int j = 0; j < N; j++)
400 {
401 sum += b[j][i] * c[j][i];
402 }
403 // consumer.
404 a[i] = sum;
405 }
406
407 The result code can't be interchanged without undoing the optimization.
408 This function classifies this kind reduction and records information so
409 that we can undo the store motion during interchange. */
410
411 void
412 loop_cand::classify_simple_reduction (reduction_p re)
413 {
414 gimple *producer, *consumer;
415
416 /* Check init variable of reduction and how it is initialized. */
417 if (TREE_CODE (re->init) == SSA_NAME)
418 {
419 producer = SSA_NAME_DEF_STMT (re->init);
420 re->producer = producer;
421 basic_block bb = gimple_bb (producer);
422 if (!bb || bb->loop_father != m_outer)
423 return;
424
425 if (!gimple_assign_load_p (producer))
426 return;
427
428 re->init_ref = gimple_assign_rhs1 (producer);
429 }
430 else if (CONSTANT_CLASS_P (re->init))
431 m_const_init_reduc++;
432 else
433 return;
434
435 /* Check how reduction variable is used. */
436 consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
437 if (!consumer
438 || !gimple_store_p (consumer))
439 return;
440
441 re->fini_ref = gimple_get_lhs (consumer);
442 re->consumer = consumer;
443
444 /* Simple reduction with constant initializer. */
445 if (!re->init_ref)
446 {
447 gcc_assert (CONSTANT_CLASS_P (re->init));
448 re->init_ref = unshare_expr (re->fini_ref);
449 }
450
451 /* Require memory references in producer and consumer are the same so
452 that we can undo reduction during interchange. */
453 if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
454 return;
455
456 re->type = SIMPLE_RTYPE;
457 }
458
459 /* Analyze reduction variable VAR for inner loop of the loop nest to be
460 interchanged. Return true if analysis succeeds. */
461
462 bool
463 loop_cand::analyze_iloop_reduction_var (tree var)
464 {
465 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
466 gphi *lcssa_phi = NULL, *use_phi;
467 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
468 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
469 reduction_p re;
470 gimple *stmt, *next_def, *single_use = NULL;
471 use_operand_p use_p;
472 imm_use_iterator iterator;
473
474 if (TREE_CODE (next) != SSA_NAME)
475 return false;
476
477 next_def = SSA_NAME_DEF_STMT (next);
478 basic_block bb = gimple_bb (next_def);
479 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
480 return false;
481
482 /* In restricted reduction, the var is (and must be) used in defining
483 the updated var. The process can be depicted as below:
484
485 var ;; = PHI<init, next>
486 |
487 |
488 v
489 +---------------------+
490 | reduction operators | <-- other operands
491 +---------------------+
492 |
493 |
494 v
495 next
496
497 In terms loop interchange, we don't change how NEXT is computed based
498 on VAR and OTHER OPERANDS. In case of double reduction in loop nest
499 to be interchanged, we don't changed it at all. In the case of simple
500 reduction in inner loop, we only make change how VAR/NEXT is loaded or
501 stored. With these conditions, we can relax restrictions on reduction
502 in a way that reduction operation is seen as black box. In general,
503 we can ignore reassociation of reduction operator; we can handle fake
504 reductions in which VAR is not even used to compute NEXT. */
505 if (! single_imm_use (var, &use_p, &single_use)
506 || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
507 return false;
508
509 /* Check the reduction operation. We require a left-associative operation.
510 For FP math we also need to be allowed to associate operations. */
511 if (gassign *ass = dyn_cast <gassign *> (single_use))
512 {
513 enum tree_code code = gimple_assign_rhs_code (ass);
514 if (! (associative_tree_code (code)
515 || (code == MINUS_EXPR
516 && use_p->use == gimple_assign_rhs1_ptr (ass)))
517 || (FLOAT_TYPE_P (TREE_TYPE (var))
518 && ! flag_associative_math))
519 return false;
520 }
521 else
522 return false;
523
524 /* Handle and verify a series of stmts feeding the reduction op. */
525 if (single_use != next_def
526 && !check_reduction_path (dump_user_location_t (), m_loop, phi, next,
527 gimple_assign_rhs_code (single_use)))
528 return false;
529
530 /* Only support cases in which INIT is used in inner loop. */
531 if (TREE_CODE (init) == SSA_NAME)
532 FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
533 {
534 stmt = USE_STMT (use_p);
535 if (is_gimple_debug (stmt))
536 continue;
537
538 if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
539 return false;
540 }
541
542 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
543 {
544 stmt = USE_STMT (use_p);
545 if (is_gimple_debug (stmt))
546 continue;
547
548 /* Or else it's used in PHI itself. */
549 use_phi = dyn_cast <gphi *> (stmt);
550 if (use_phi == phi)
551 continue;
552
553 if (use_phi != NULL
554 && lcssa_phi == NULL
555 && gimple_bb (stmt) == m_exit->dest
556 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
557 lcssa_phi = use_phi;
558 else
559 return false;
560 }
561 if (!lcssa_phi)
562 return false;
563
564 re = XCNEW (struct reduction);
565 re->var = var;
566 re->init = init;
567 re->next = next;
568 re->phi = phi;
569 re->lcssa_phi = lcssa_phi;
570
571 classify_simple_reduction (re);
572
573 if (dump_file && (dump_flags & TDF_DETAILS))
574 dump_reduction (re);
575
576 m_reductions.safe_push (re);
577 return true;
578 }
579
580 /* Analyze reduction variable VAR for outer loop of the loop nest to be
581 interchanged. ILOOP is not NULL and points to inner loop. For the
582 moment, we only support double reduction for outer loop, like:
583
584 for (int i = 0; i < n; i++)
585 {
586 int sum = 0;
587
588 for (int j = 0; j < n; j++) // outer loop
589 for (int k = 0; k < n; k++) // inner loop
590 sum += a[i][k]*b[k][j];
591
592 s[i] = sum;
593 }
594
595 Note the innermost two loops are the loop nest to be interchanged.
596 Return true if analysis succeeds. */
597
598 bool
599 loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
600 {
601 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
602 gphi *lcssa_phi = NULL, *use_phi;
603 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
604 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
605 reduction_p re;
606 gimple *stmt, *next_def;
607 use_operand_p use_p;
608 imm_use_iterator iterator;
609
610 if (TREE_CODE (next) != SSA_NAME)
611 return false;
612
613 next_def = SSA_NAME_DEF_STMT (next);
614 basic_block bb = gimple_bb (next_def);
615 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
616 return false;
617
618 /* Find inner loop's simple reduction that uses var as initializer. */
619 reduction_p inner_re = NULL;
620 for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
621 if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
622 break;
623
624 if (inner_re == NULL
625 || inner_re->type != UNKNOWN_RTYPE
626 || inner_re->producer != phi)
627 return false;
628
629 /* In case of double reduction, outer loop's reduction should be updated
630 by inner loop's simple reduction. */
631 if (next_def != inner_re->lcssa_phi)
632 return false;
633
634 /* Outer loop's reduction should only be used to initialize inner loop's
635 simple reduction. */
636 if (! single_imm_use (var, &use_p, &stmt)
637 || stmt != inner_re->phi)
638 return false;
639
640 /* Check this reduction is correctly used outside of loop via lcssa phi. */
641 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
642 {
643 stmt = USE_STMT (use_p);
644 if (is_gimple_debug (stmt))
645 continue;
646
647 /* Or else it's used in PHI itself. */
648 use_phi = dyn_cast <gphi *> (stmt);
649 if (use_phi == phi)
650 continue;
651
652 if (lcssa_phi == NULL
653 && use_phi != NULL
654 && gimple_bb (stmt) == m_exit->dest
655 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
656 lcssa_phi = use_phi;
657 else
658 return false;
659 }
660 if (!lcssa_phi)
661 return false;
662
663 re = XCNEW (struct reduction);
664 re->var = var;
665 re->init = init;
666 re->next = next;
667 re->phi = phi;
668 re->lcssa_phi = lcssa_phi;
669 re->type = DOUBLE_RTYPE;
670 inner_re->type = DOUBLE_RTYPE;
671
672 if (dump_file && (dump_flags & TDF_DETAILS))
673 dump_reduction (re);
674
675 m_reductions.safe_push (re);
676 return true;
677 }
678
679 /* Return true if VAR is induction variable of current loop whose scev is
680 specified by CHREC. */
681
682 bool
683 loop_cand::analyze_induction_var (tree var, tree chrec)
684 {
685 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
686 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
687
688 /* Var is loop invariant, though it's unlikely to happen. */
689 if (tree_does_not_contain_chrecs (chrec))
690 {
691 /* Punt on floating point invariants if honoring signed zeros,
692 representing that as + 0.0 would change the result if init
693 is -0.0. Similarly for SNaNs it can raise exception. */
694 if (HONOR_SIGNED_ZEROS (chrec) || HONOR_SNANS (chrec))
695 return false;
696 struct induction *iv = XCNEW (struct induction);
697 iv->var = var;
698 iv->init_val = init;
699 iv->init_expr = chrec;
700 iv->step = build_zero_cst (TREE_TYPE (chrec));
701 m_inductions.safe_push (iv);
702 return true;
703 }
704
705 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
706 || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
707 || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
708 || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
709 return false;
710
711 struct induction *iv = XCNEW (struct induction);
712 iv->var = var;
713 iv->init_val = init;
714 iv->init_expr = CHREC_LEFT (chrec);
715 iv->step = CHREC_RIGHT (chrec);
716
717 if (dump_file && (dump_flags & TDF_DETAILS))
718 dump_induction (m_loop, iv);
719
720 m_inductions.safe_push (iv);
721 return true;
722 }
723
724 /* Return true if all loop carried variables defined in loop header can
725 be successfully analyzed. */
726
727 bool
728 loop_cand::analyze_carried_vars (loop_cand *iloop)
729 {
730 edge e = loop_preheader_edge (m_outer);
731 gphi_iterator gsi;
732
733 if (dump_file && (dump_flags & TDF_DETAILS))
734 fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
735
736 for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
737 {
738 gphi *phi = gsi.phi ();
739
740 tree var = PHI_RESULT (phi);
741 if (virtual_operand_p (var))
742 continue;
743
744 tree chrec = analyze_scalar_evolution (m_loop, var);
745 chrec = instantiate_scev (e, m_loop, chrec);
746
747 /* Analyze var as reduction variable. */
748 if (chrec_contains_undetermined (chrec)
749 || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
750 {
751 if (iloop && !analyze_oloop_reduction_var (iloop, var))
752 return false;
753 if (!iloop && !analyze_iloop_reduction_var (var))
754 return false;
755 }
756 /* Analyze var as induction variable. */
757 else if (!analyze_induction_var (var, chrec))
758 return false;
759 }
760
761 return true;
762 }
763
764 /* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
765
766 bool
767 loop_cand::analyze_lcssa_phis (void)
768 {
769 gphi_iterator gsi;
770 for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
771 {
772 gphi *phi = gsi.phi ();
773
774 if (virtual_operand_p (PHI_RESULT (phi)))
775 continue;
776
777 /* TODO: We only support lcssa phi for reduction for now. */
778 if (!find_reduction_by_stmt (phi))
779 return false;
780 }
781
782 return true;
783 }
784
785 /* CONSUMER is a stmt in BB storing reduction result into memory object.
786 When the reduction is intialized from constant value, we need to add
787 a stmt loading from the memory object to target basic block in inner
788 loop during undoing the reduction. Problem is that memory reference
789 may use ssa variables not dominating the target basic block. This
790 function finds all stmts on which CONSUMER depends in basic block BB,
791 records and returns them via STMTS. */
792
793 static void
794 find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
795 {
796 auto_vec<gimple *, 4> worklist;
797 use_operand_p use_p;
798 ssa_op_iter iter;
799 gimple *stmt, *def_stmt;
800 gimple_stmt_iterator gsi;
801
802 /* First clear flag for stmts in bb. */
803 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
804 gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
805
806 /* DFS search all depended stmts in bb and mark flag for these stmts. */
807 worklist.safe_push (consumer);
808 while (!worklist.is_empty ())
809 {
810 stmt = worklist.pop ();
811 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
812 {
813 def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
814
815 if (is_a <gphi *> (def_stmt)
816 || gimple_bb (def_stmt) != bb
817 || gimple_plf (def_stmt, GF_PLF_1))
818 continue;
819
820 worklist.safe_push (def_stmt);
821 }
822 gimple_set_plf (stmt, GF_PLF_1, true);
823 }
824 for (gsi = gsi_start_nondebug_bb (bb);
825 !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
826 {
827 /* Move dep stmts to sequence STMTS. */
828 if (gimple_plf (stmt, GF_PLF_1))
829 {
830 gsi_remove (&gsi, false);
831 gimple_seq_add_stmt_without_update (stmts, stmt);
832 }
833 else
834 gsi_next_nondebug (&gsi);
835 }
836 }
837
838 /* User can write, optimizers can generate simple reduction RE for inner
839 loop. In order to make interchange valid, we have to undo reduction by
840 moving producer and consumer stmts into the inner loop. For example,
841 below code:
842
843 init = MEM_REF[idx]; //producer
844 loop:
845 var = phi<init, next>
846 next = var op ...
847 reduc_sum = phi<next>
848 MEM_REF[idx] = reduc_sum //consumer
849
850 is transformed into:
851
852 loop:
853 new_var = MEM_REF[idx]; //producer after moving
854 next = new_var op ...
855 MEM_REF[idx] = next; //consumer after moving
856
857 Note if the reduction variable is initialized to constant, like:
858
859 var = phi<0.0, next>
860
861 we compute new_var as below:
862
863 loop:
864 tmp = MEM_REF[idx];
865 new_var = !first_iteration ? tmp : 0.0;
866
867 so that the initial const is used in the first iteration of loop. Also
868 record ssa variables for dead code elimination in DCE_SEEDS. */
869
870 void
871 loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
872 {
873 gimple *stmt;
874 gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
875 gimple_seq stmts = NULL;
876 tree new_var;
877
878 /* Prepare the initialization stmts and insert it to inner loop. */
879 if (re->producer != NULL)
880 {
881 gimple_set_vuse (re->producer, NULL_TREE);
882 update_stmt (re->producer);
883 from = gsi_for_stmt (re->producer);
884 gsi_remove (&from, false);
885 gimple_seq_add_stmt_without_update (&stmts, re->producer);
886 new_var = re->init;
887 }
888 else
889 {
890 /* Find all stmts on which expression "MEM_REF[idx]" depends. */
891 find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
892 /* Because we generate new stmt loading from the MEM_REF to TMP. */
893 tree cond, tmp = copy_ssa_name (re->var);
894 stmt = gimple_build_assign (tmp, re->init_ref);
895 gimple_seq_add_stmt_without_update (&stmts, stmt);
896
897 /* Init new_var to MEM_REF or CONST depending on if it is the first
898 iteration. */
899 induction_p iv = m_inductions[0];
900 cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
901 new_var = copy_ssa_name (re->var);
902 stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
903 gimple_seq_add_stmt_without_update (&stmts, stmt);
904 }
905 gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
906
907 /* Replace all uses of reduction var with new variable. */
908 use_operand_p use_p;
909 imm_use_iterator iterator;
910 FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
911 {
912 FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
913 SET_USE (use_p, new_var);
914
915 update_stmt (stmt);
916 }
917
918 /* Move consumer stmt into inner loop, just after reduction next's def. */
919 unlink_stmt_vdef (re->consumer);
920 release_ssa_name (gimple_vdef (re->consumer));
921 gimple_set_vdef (re->consumer, NULL_TREE);
922 gimple_set_vuse (re->consumer, NULL_TREE);
923 gimple_assign_set_rhs1 (re->consumer, re->next);
924 update_stmt (re->consumer);
925 from = gsi_for_stmt (re->consumer);
926 to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
927 gsi_move_after (&from, &to);
928
929 /* Mark the reduction variables for DCE. */
930 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
931 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
932 }
933
934 /* Free DATAREFS and its auxiliary memory. */
935
936 static void
937 free_data_refs_with_aux (vec<data_reference_p> datarefs)
938 {
939 data_reference_p dr;
940 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
941 if (dr->aux != NULL)
942 {
943 DR_ACCESS_STRIDE (dr)->release ();
944 delete (vec<tree> *) dr->aux;
945 }
946
947 free_data_refs (datarefs);
948 }
949
950 /* Class for loop interchange transformation. */
951
952 class tree_loop_interchange
953 {
954 public:
955 tree_loop_interchange (vec<class loop *> loop_nest)
956 : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
957 m_dce_seeds (BITMAP_ALLOC (NULL)) { }
958 ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
959 bool interchange (vec<data_reference_p>, vec<ddr_p>);
960
961 private:
962 void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
963 bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
964 void interchange_loops (loop_cand &, loop_cand &);
965 void map_inductions_to_loop (loop_cand &, loop_cand &);
966 void move_code_to_inner_loop (class loop *, class loop *, basic_block *);
967
968 /* The whole loop nest in which interchange is ongoing. */
969 vec<class loop *> m_loop_nest;
970 /* We create new IV which is only used in loop's exit condition check.
971 In case of 3-level loop nest interchange, when we interchange the
972 innermost two loops, new IV created in the middle level loop does
973 not need to be preserved in interchanging the outermost two loops
974 later. We record the IV so that it can be skipped. */
975 tree m_niters_iv_var;
976 /* Bitmap of seed variables for dead code elimination after interchange. */
977 bitmap m_dce_seeds;
978 };
979
980 /* Update data refs' access stride and dependence information after loop
981 interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
982 nest. DATAREFS are data references. DDRS are data dependences. */
983
984 void
985 tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
986 vec<data_reference_p> datarefs,
987 vec<ddr_p> ddrs)
988 {
989 struct data_reference *dr;
990 struct data_dependence_relation *ddr;
991
992 /* Update strides of data references. */
993 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
994 {
995 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
996 gcc_assert (stride->length () > i_idx);
997 std::swap ((*stride)[i_idx], (*stride)[o_idx]);
998 }
999 /* Update data dependences. */
1000 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1001 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1002 {
1003 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1004 {
1005 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1006 std::swap (dist_vect[i_idx], dist_vect[o_idx]);
1007 }
1008 }
1009 }
1010
1011 /* Check data dependence relations, return TRUE if it's valid to interchange
1012 two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
1013 loops is valid only if dist vector, after interchanging, doesn't have '>'
1014 as the leftmost non-'=' direction. Practically, this function have been
1015 conservative here by not checking some valid cases. */
1016
1017 bool
1018 tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
1019 vec<ddr_p> ddrs)
1020 {
1021 struct data_dependence_relation *ddr;
1022
1023 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1024 {
1025 /* Skip no-dependence case. */
1026 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1027 continue;
1028
1029 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1030 {
1031 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1032 unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
1033
1034 /* If there is no carried dependence. */
1035 if (level == 0)
1036 continue;
1037
1038 level --;
1039
1040 /* If dependence is not carried by any loop in between the two
1041 loops [oloop, iloop] to interchange. */
1042 if (level < o_idx || level > i_idx)
1043 continue;
1044
1045 /* Be conservative, skip case if either direction at i_idx/o_idx
1046 levels is not '=' or '<'. */
1047 if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
1048 return false;
1049 }
1050 }
1051
1052 return true;
1053 }
1054
1055 /* Interchange two loops specified by ILOOP and OLOOP. */
1056
1057 void
1058 tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
1059 {
1060 reduction_p re;
1061 gimple_stmt_iterator gsi;
1062 tree i_niters, o_niters, var_after;
1063
1064 /* Undo inner loop's simple reduction. */
1065 for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
1066 if (re->type != DOUBLE_RTYPE)
1067 {
1068 if (re->producer)
1069 reset_debug_uses (re->producer);
1070
1071 iloop.undo_simple_reduction (re, m_dce_seeds);
1072 }
1073
1074 /* Only need to reset debug uses for double reduction. */
1075 for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
1076 {
1077 gcc_assert (re->type == DOUBLE_RTYPE);
1078 reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
1079 reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
1080 }
1081
1082 /* Prepare niters for both loops. */
1083 class loop *loop_nest = m_loop_nest[0];
1084 edge instantiate_below = loop_preheader_edge (loop_nest);
1085 gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
1086 i_niters = number_of_latch_executions (iloop.m_loop);
1087 i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
1088 i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
1089 i_niters);
1090 i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
1091 NULL_TREE, false, GSI_CONTINUE_LINKING);
1092 o_niters = number_of_latch_executions (oloop.m_loop);
1093 if (oloop.m_loop != loop_nest)
1094 {
1095 o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
1096 o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
1097 o_niters);
1098 }
1099 o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
1100 NULL_TREE, false, GSI_CONTINUE_LINKING);
1101
1102 /* Move src's code to tgt loop. This is necessary when src is the outer
1103 loop and tgt is the inner loop. */
1104 move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
1105
1106 /* Map outer loop's IV to inner loop, and vice versa. */
1107 map_inductions_to_loop (oloop, iloop);
1108 map_inductions_to_loop (iloop, oloop);
1109
1110 /* Create canonical IV for both loops. Note canonical IV for outer/inner
1111 loop is actually from inner/outer loop. Also we record the new IV
1112 created for the outer loop so that it can be skipped in later loop
1113 interchange. */
1114 create_canonical_iv (oloop.m_loop, oloop.m_exit,
1115 i_niters, &m_niters_iv_var, &var_after);
1116 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1117 create_canonical_iv (iloop.m_loop, iloop.m_exit,
1118 o_niters, NULL, &var_after);
1119 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1120
1121 /* Scrap niters estimation of interchanged loops. */
1122 iloop.m_loop->any_upper_bound = false;
1123 iloop.m_loop->any_likely_upper_bound = false;
1124 free_numbers_of_iterations_estimates (iloop.m_loop);
1125 oloop.m_loop->any_upper_bound = false;
1126 oloop.m_loop->any_likely_upper_bound = false;
1127 free_numbers_of_iterations_estimates (oloop.m_loop);
1128
1129 /* Clear all cached scev information. This is expensive but shouldn't be
1130 a problem given we interchange in very limited times. */
1131 scev_reset_htab ();
1132
1133 /* ??? The association between the loop data structure and the
1134 CFG changed, so what was loop N at the source level is now
1135 loop M. We should think of retaining the association or breaking
1136 it fully by creating a new loop instead of re-using the "wrong" one. */
1137 }
1138
1139 /* Map induction variables of SRC loop to TGT loop. The function firstly
1140 creates the same IV of SRC loop in TGT loop, then deletes the original
1141 IV and re-initialize it using the newly created IV. For example, loop
1142 nest:
1143
1144 for (i = 0; i < N; i++)
1145 for (j = 0; j < M; j++)
1146 {
1147 //use of i;
1148 //use of j;
1149 }
1150
1151 will be transformed into:
1152
1153 for (jj = 0; jj < M; jj++)
1154 for (ii = 0; ii < N; ii++)
1155 {
1156 //use of ii;
1157 //use of jj;
1158 }
1159
1160 after loop interchange. */
1161
1162 void
1163 tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
1164 {
1165 induction_p iv;
1166 edge e = tgt.m_exit;
1167 gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
1168
1169 /* Map source loop's IV to target loop. */
1170 for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
1171 {
1172 gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
1173 gcc_assert (is_a <gphi *> (stmt));
1174
1175 use_operand_p use_p;
1176 /* Only map original IV to target loop. */
1177 if (m_niters_iv_var != iv->var)
1178 {
1179 /* Map the IV by creating the same one in target loop. */
1180 tree var_before, var_after;
1181 tree base = unshare_expr (iv->init_expr);
1182 tree step = unshare_expr (iv->step);
1183 create_iv (base, step, SSA_NAME_VAR (iv->var),
1184 tgt.m_loop, &incr_pos, false, &var_before, &var_after);
1185 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
1186 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1187
1188 /* Replace uses of the original IV var with newly created IV var. */
1189 imm_use_iterator imm_iter;
1190 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
1191 {
1192 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1193 SET_USE (use_p, var_before);
1194
1195 update_stmt (use_stmt);
1196 }
1197 }
1198
1199 /* Mark all uses for DCE. */
1200 ssa_op_iter op_iter;
1201 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
1202 {
1203 tree use = USE_FROM_PTR (use_p);
1204 if (TREE_CODE (use) == SSA_NAME
1205 && ! SSA_NAME_IS_DEFAULT_DEF (use))
1206 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
1207 }
1208
1209 /* Delete definition of the original IV in the source loop. */
1210 gsi = gsi_for_stmt (stmt);
1211 remove_phi_node (&gsi, true);
1212 }
1213 }
1214
1215 /* Move stmts of outer loop to inner loop. */
1216
1217 void
1218 tree_loop_interchange::move_code_to_inner_loop (class loop *outer,
1219 class loop *inner,
1220 basic_block *outer_bbs)
1221 {
1222 basic_block oloop_exit_bb = single_exit (outer)->src;
1223 gimple_stmt_iterator gsi, to;
1224
1225 for (unsigned i = 0; i < outer->num_nodes; i++)
1226 {
1227 basic_block bb = outer_bbs[i];
1228
1229 /* Skip basic blocks of inner loop. */
1230 if (flow_bb_inside_loop_p (inner, bb))
1231 continue;
1232
1233 /* Move code from header/latch to header/latch. */
1234 if (bb == outer->header)
1235 to = gsi_after_labels (inner->header);
1236 else if (bb == outer->latch)
1237 to = gsi_after_labels (inner->latch);
1238 else
1239 /* Otherwise, simply move to exit->src. */
1240 to = gsi_last_bb (single_exit (inner)->src);
1241
1242 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1243 {
1244 gimple *stmt = gsi_stmt (gsi);
1245
1246 if (oloop_exit_bb == bb
1247 && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
1248 {
1249 gsi_next (&gsi);
1250 continue;
1251 }
1252
1253 if (gimple_vdef (stmt))
1254 {
1255 unlink_stmt_vdef (stmt);
1256 release_ssa_name (gimple_vdef (stmt));
1257 gimple_set_vdef (stmt, NULL_TREE);
1258 }
1259 if (gimple_vuse (stmt))
1260 {
1261 gimple_set_vuse (stmt, NULL_TREE);
1262 update_stmt (stmt);
1263 }
1264
1265 reset_debug_uses (stmt);
1266 gsi_move_before (&gsi, &to);
1267 }
1268 }
1269 }
1270
1271 /* Given data reference DR in LOOP_NEST, the function computes DR's access
1272 stride at each level of loop from innermost LOOP to outer. On success,
1273 it saves access stride at each level loop in a vector which is pointed
1274 by DR->aux. For example:
1275
1276 int arr[100][100][100];
1277 for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
1278 for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
1279 for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
1280 arr[i][j - 1][k] = 0; */
1281
1282 static void
1283 compute_access_stride (class loop *loop_nest, class loop *loop,
1284 data_reference_p dr)
1285 {
1286 vec<tree> *strides = new vec<tree> ();
1287 basic_block bb = gimple_bb (DR_STMT (dr));
1288
1289 while (!flow_bb_inside_loop_p (loop, bb))
1290 {
1291 strides->safe_push (build_int_cst (sizetype, 0));
1292 loop = loop_outer (loop);
1293 }
1294 gcc_assert (loop == bb->loop_father);
1295
1296 tree ref = DR_REF (dr);
1297 if (TREE_CODE (ref) == COMPONENT_REF
1298 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1299 {
1300 /* We can't take address of bitfields. If the bitfield is at constant
1301 offset from the start of the struct, just use address of the
1302 struct, for analysis of the strides that shouldn't matter. */
1303 if (!TREE_OPERAND (ref, 2)
1304 || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
1305 ref = TREE_OPERAND (ref, 0);
1306 /* Otherwise, if we have a bit field representative, use that. */
1307 else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
1308 != NULL_TREE)
1309 {
1310 tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
1311 ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
1312 repr, TREE_OPERAND (ref, 2));
1313 }
1314 /* Otherwise punt. */
1315 else
1316 {
1317 dr->aux = strides;
1318 return;
1319 }
1320 }
1321 tree scev_base = build_fold_addr_expr (ref);
1322 tree scev = analyze_scalar_evolution (loop, scev_base);
1323 scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
1324 if (! chrec_contains_undetermined (scev))
1325 {
1326 tree sl = scev;
1327 class loop *expected = loop;
1328 while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
1329 {
1330 class loop *sl_loop = get_chrec_loop (sl);
1331 while (sl_loop != expected)
1332 {
1333 strides->safe_push (size_int (0));
1334 expected = loop_outer (expected);
1335 }
1336 strides->safe_push (CHREC_RIGHT (sl));
1337 sl = CHREC_LEFT (sl);
1338 expected = loop_outer (expected);
1339 }
1340 if (! tree_contains_chrecs (sl, NULL))
1341 while (expected != loop_outer (loop_nest))
1342 {
1343 strides->safe_push (size_int (0));
1344 expected = loop_outer (expected);
1345 }
1346 }
1347
1348 dr->aux = strides;
1349 }
1350
1351 /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
1352 access strides with respect to each level loop for all data refs in
1353 DATAREFS from inner loop to outer loop. On success, it returns the
1354 outermost loop that access strides can be computed successfully for
1355 all data references. If access strides cannot be computed at least
1356 for two levels of loop for any data reference, it returns NULL. */
1357
1358 static class loop *
1359 compute_access_strides (class loop *loop_nest, class loop *loop,
1360 vec<data_reference_p> datarefs)
1361 {
1362 unsigned i, j, num_loops = (unsigned) -1;
1363 data_reference_p dr;
1364 vec<tree> *stride;
1365
1366 for (i = 0; datarefs.iterate (i, &dr); ++i)
1367 {
1368 compute_access_stride (loop_nest, loop, dr);
1369 stride = DR_ACCESS_STRIDE (dr);
1370 if (stride->length () < num_loops)
1371 {
1372 num_loops = stride->length ();
1373 if (num_loops < 2)
1374 return NULL;
1375 }
1376 }
1377
1378 for (i = 0; datarefs.iterate (i, &dr); ++i)
1379 {
1380 stride = DR_ACCESS_STRIDE (dr);
1381 if (stride->length () > num_loops)
1382 stride->truncate (num_loops);
1383
1384 for (j = 0; j < (num_loops >> 1); ++j)
1385 std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
1386 }
1387
1388 loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
1389 gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
1390 return loop;
1391 }
1392
1393 /* Prune access strides for data references in DATAREFS by removing strides
1394 of loops that isn't in current LOOP_NEST. */
1395
1396 static void
1397 prune_access_strides_not_in_loop (class loop *loop_nest,
1398 class loop *innermost,
1399 vec<data_reference_p> datarefs)
1400 {
1401 data_reference_p dr;
1402 unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
1403 gcc_assert (num_loops > 1);
1404
1405 /* Block remove strides of loops that is not in current loop nest. */
1406 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1407 {
1408 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1409 if (stride->length () > num_loops)
1410 stride->block_remove (0, stride->length () - num_loops);
1411 }
1412 }
1413
1414 /* Dump access strides for all DATAREFS. */
1415
1416 static void
1417 dump_access_strides (vec<data_reference_p> datarefs)
1418 {
1419 data_reference_p dr;
1420 fprintf (dump_file, "Access Strides for DRs:\n");
1421 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1422 {
1423 fprintf (dump_file, " ");
1424 print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
1425 fprintf (dump_file, ":\t\t<");
1426
1427 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1428 unsigned num_loops = stride->length ();
1429 for (unsigned j = 0; j < num_loops; ++j)
1430 {
1431 print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
1432 fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
1433 }
1434 }
1435 }
1436
1437 /* Return true if it's profitable to interchange two loops whose index
1438 in whole loop nest vector are I_IDX/O_IDX respectively. The function
1439 computes and compares three types information from all DATAREFS:
1440 1) Access stride for loop I_IDX and O_IDX.
1441 2) Number of invariant memory references with respect to I_IDX before
1442 and after loop interchange.
1443 3) Flags indicating if all memory references access sequential memory
1444 in ILOOP, before and after loop interchange.
1445 If INNMOST_LOOP_P is true, the two loops for interchanging are the two
1446 innermost loops in loop nest. This function also dumps information if
1447 DUMP_INFO_P is true. */
1448
1449 static bool
1450 should_interchange_loops (unsigned i_idx, unsigned o_idx,
1451 vec<data_reference_p> datarefs,
1452 unsigned i_stmt_cost, unsigned o_stmt_cost,
1453 bool innermost_loops_p, bool dump_info_p = true)
1454 {
1455 unsigned HOST_WIDE_INT ratio;
1456 unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
1457 struct data_reference *dr;
1458 bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
1459 widest_int iloop_strides = 0, oloop_strides = 0;
1460 unsigned num_unresolved_drs = 0;
1461 unsigned num_resolved_ok_drs = 0;
1462 unsigned num_resolved_not_ok_drs = 0;
1463
1464 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1465 fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
1466
1467 for (i = 0; datarefs.iterate (i, &dr); ++i)
1468 {
1469 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1470 tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
1471
1472 bool subloop_stride_p = false;
1473 /* Data ref can't be invariant or sequential access at current loop if
1474 its address changes with respect to any subloops. */
1475 for (j = i_idx + 1; j < stride->length (); ++j)
1476 if (!integer_zerop ((*stride)[j]))
1477 {
1478 subloop_stride_p = true;
1479 break;
1480 }
1481
1482 if (integer_zerop (iloop_stride))
1483 {
1484 if (!subloop_stride_p)
1485 num_old_inv_drs++;
1486 }
1487 if (integer_zerop (oloop_stride))
1488 {
1489 if (!subloop_stride_p)
1490 num_new_inv_drs++;
1491 }
1492
1493 if (TREE_CODE (iloop_stride) == INTEGER_CST
1494 && TREE_CODE (oloop_stride) == INTEGER_CST)
1495 {
1496 iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
1497 oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
1498 }
1499 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1500 iloop_stride, oloop_stride))
1501 num_resolved_ok_drs++;
1502 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1503 oloop_stride, iloop_stride))
1504 num_resolved_not_ok_drs++;
1505 else
1506 num_unresolved_drs++;
1507
1508 /* Data ref can't be sequential access if its address changes in sub
1509 loop. */
1510 if (subloop_stride_p)
1511 {
1512 all_seq_dr_before_p = false;
1513 all_seq_dr_after_p = false;
1514 continue;
1515 }
1516 /* Track if all data references are sequential accesses before/after loop
1517 interchange. Note invariant is considered sequential here. */
1518 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
1519 if (all_seq_dr_before_p
1520 && ! (integer_zerop (iloop_stride)
1521 || operand_equal_p (access_size, iloop_stride, 0)))
1522 all_seq_dr_before_p = false;
1523 if (all_seq_dr_after_p
1524 && ! (integer_zerop (oloop_stride)
1525 || operand_equal_p (access_size, oloop_stride, 0)))
1526 all_seq_dr_after_p = false;
1527 }
1528
1529 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1530 {
1531 fprintf (dump_file, "\toverall:\t\t");
1532 print_decu (iloop_strides, dump_file);
1533 fprintf (dump_file, "\t");
1534 print_decu (oloop_strides, dump_file);
1535 fprintf (dump_file, "\n");
1536
1537 fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
1538 num_old_inv_drs, num_new_inv_drs);
1539 fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
1540 all_seq_dr_before_p ? "true" : "false",
1541 all_seq_dr_after_p ? "true" : "false");
1542 fprintf (dump_file, "OK to interchage with variable strides: %d\n",
1543 num_resolved_ok_drs);
1544 fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
1545 num_resolved_not_ok_drs);
1546 fprintf (dump_file, "Variable strides we cannot decide: %d\n",
1547 num_unresolved_drs);
1548 fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
1549 fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
1550 }
1551
1552 if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
1553 return false;
1554
1555 /* Stmts of outer loop will be moved to inner loop. If there are two many
1556 such stmts, it could make inner loop costly. Here we compare stmt cost
1557 between outer and inner loops. */
1558 if (i_stmt_cost && o_stmt_cost
1559 && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
1560 && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
1561 return false;
1562
1563 /* We use different stride comparison ratio for interchanging innermost
1564 two loops or not. The idea is to be conservative in interchange for
1565 the innermost loops. */
1566 ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
1567 /* Do interchange if it gives better data locality behavior. */
1568 if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
1569 return true;
1570 if (wi::gtu_p (iloop_strides, oloop_strides))
1571 {
1572 /* Or it creates more invariant memory references. */
1573 if ((!all_seq_dr_before_p || all_seq_dr_after_p)
1574 && num_new_inv_drs > num_old_inv_drs)
1575 return true;
1576 /* Or it makes all memory references sequential. */
1577 if (num_new_inv_drs >= num_old_inv_drs
1578 && !all_seq_dr_before_p && all_seq_dr_after_p)
1579 return true;
1580 }
1581
1582 return false;
1583 }
1584
1585 /* Try to interchange inner loop of a loop nest to outer level. */
1586
1587 bool
1588 tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
1589 vec<ddr_p> ddrs)
1590 {
1591 dump_user_location_t loc = find_loop_location (m_loop_nest[0]);
1592 bool changed_p = false;
1593 /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
1594 The overall effect is to push inner loop to outermost level in whole
1595 loop nest. */
1596 for (unsigned i = m_loop_nest.length (); i > 1; --i)
1597 {
1598 unsigned i_idx = i - 1, o_idx = i - 2;
1599
1600 /* Check validity for loop interchange. */
1601 if (!valid_data_dependences (i_idx, o_idx, ddrs))
1602 break;
1603
1604 loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
1605 loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
1606
1607 /* Check if we can do transformation for loop interchange. */
1608 if (!iloop.analyze_carried_vars (NULL)
1609 || !iloop.analyze_lcssa_phis ()
1610 || !oloop.analyze_carried_vars (&iloop)
1611 || !oloop.analyze_lcssa_phis ()
1612 || !iloop.can_interchange_p (NULL)
1613 || !oloop.can_interchange_p (&iloop))
1614 break;
1615
1616 /* Outer loop's stmts will be moved to inner loop during interchange.
1617 If there are many of them, it may make inner loop very costly. We
1618 need to check number of outer loop's stmts in profit cost model of
1619 interchange. */
1620 int stmt_cost = oloop.m_num_stmts;
1621 /* Count out the exit checking stmt of outer loop. */
1622 stmt_cost --;
1623 /* Count out IV's increasing stmt, IVOPTs takes care if it. */
1624 stmt_cost -= oloop.m_inductions.length ();
1625 /* Count in the additional load and cond_expr stmts caused by inner
1626 loop's constant initialized reduction. */
1627 stmt_cost += iloop.m_const_init_reduc * 2;
1628 if (stmt_cost < 0)
1629 stmt_cost = 0;
1630
1631 /* Check profitability for loop interchange. */
1632 if (should_interchange_loops (i_idx, o_idx, datarefs,
1633 (unsigned) iloop.m_num_stmts,
1634 (unsigned) stmt_cost,
1635 iloop.m_loop->inner == NULL))
1636 {
1637 if (dump_file && (dump_flags & TDF_DETAILS))
1638 fprintf (dump_file,
1639 "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
1640 oloop.m_loop->num, iloop.m_loop->num);
1641
1642 changed_p = true;
1643 interchange_loops (iloop, oloop);
1644 /* No need to update if there is no further loop interchange. */
1645 if (o_idx > 0)
1646 update_data_info (i_idx, o_idx, datarefs, ddrs);
1647 }
1648 else
1649 {
1650 if (dump_file && (dump_flags & TDF_DETAILS))
1651 fprintf (dump_file,
1652 "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
1653 oloop.m_loop->num, iloop.m_loop->num);
1654 }
1655 }
1656 simple_dce_from_worklist (m_dce_seeds);
1657
1658 if (changed_p && dump_enabled_p ())
1659 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1660 "loops interchanged in loop nest\n");
1661
1662 return changed_p;
1663 }
1664
1665
1666 /* Loop interchange pass. */
1667
1668 namespace {
1669
1670 const pass_data pass_data_linterchange =
1671 {
1672 GIMPLE_PASS, /* type */
1673 "linterchange", /* name */
1674 OPTGROUP_LOOP, /* optinfo_flags */
1675 TV_LINTERCHANGE, /* tv_id */
1676 PROP_cfg, /* properties_required */
1677 0, /* properties_provided */
1678 0, /* properties_destroyed */
1679 0, /* todo_flags_start */
1680 0, /* todo_flags_finish */
1681 };
1682
1683 class pass_linterchange : public gimple_opt_pass
1684 {
1685 public:
1686 pass_linterchange (gcc::context *ctxt)
1687 : gimple_opt_pass (pass_data_linterchange, ctxt)
1688 {}
1689
1690 /* opt_pass methods: */
1691 opt_pass * clone () { return new pass_linterchange (m_ctxt); }
1692 virtual bool gate (function *) { return flag_loop_interchange; }
1693 virtual unsigned int execute (function *);
1694
1695 }; // class pass_linterchange
1696
1697
1698 /* Return true if LOOP has proper form for interchange. We check three
1699 conditions in the function:
1700 1) In general, a loop can be interchanged only if it doesn't have
1701 basic blocks other than header, exit and latch besides possible
1702 inner loop nest. This basically restricts loop interchange to
1703 below form loop nests:
1704
1705 header<---+
1706 | |
1707 v |
1708 INNER_LOOP |
1709 | |
1710 v |
1711 exit--->latch
1712
1713 2) Data reference in basic block that executes in different times
1714 than loop head/exit is not allowed.
1715 3) Record the innermost outer loop that doesn't form rectangle loop
1716 nest with LOOP. */
1717
1718 static bool
1719 proper_loop_form_for_interchange (class loop *loop, class loop **min_outer)
1720 {
1721 edge e0, e1, exit;
1722
1723 /* Don't interchange if loop has unsupported information for the moment. */
1724 if (loop->safelen > 0
1725 || loop->constraints != 0
1726 || loop->can_be_parallel
1727 || loop->dont_vectorize
1728 || loop->force_vectorize
1729 || loop->in_oacc_kernels_region
1730 || loop->orig_loop_num != 0
1731 || loop->simduid != NULL_TREE)
1732 return false;
1733
1734 /* Don't interchange if outer loop has basic block other than header, exit
1735 and latch. */
1736 if (loop->inner != NULL
1737 && loop->num_nodes != loop->inner->num_nodes + 3)
1738 return false;
1739
1740 if ((exit = single_dom_exit (loop)) == NULL)
1741 return false;
1742
1743 /* Check control flow on loop header/exit blocks. */
1744 if (loop->header == exit->src
1745 && (EDGE_COUNT (loop->header->preds) != 2
1746 || EDGE_COUNT (loop->header->succs) != 2))
1747 return false;
1748 else if (loop->header != exit->src
1749 && (EDGE_COUNT (loop->header->preds) != 2
1750 || !single_succ_p (loop->header)
1751 || unsupported_edge (single_succ_edge (loop->header))
1752 || EDGE_COUNT (exit->src->succs) != 2
1753 || !single_pred_p (exit->src)
1754 || unsupported_edge (single_pred_edge (exit->src))))
1755 return false;
1756
1757 e0 = EDGE_PRED (loop->header, 0);
1758 e1 = EDGE_PRED (loop->header, 1);
1759 if (unsupported_edge (e0) || unsupported_edge (e1)
1760 || (e0->src != loop->latch && e1->src != loop->latch)
1761 || (e0->src->loop_father == loop && e1->src->loop_father == loop))
1762 return false;
1763
1764 e0 = EDGE_SUCC (exit->src, 0);
1765 e1 = EDGE_SUCC (exit->src, 1);
1766 if (unsupported_edge (e0) || unsupported_edge (e1)
1767 || (e0->dest != loop->latch && e1->dest != loop->latch)
1768 || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
1769 return false;
1770
1771 /* Don't interchange if any reference is in basic block that doesn't
1772 dominate exit block. */
1773 basic_block *bbs = get_loop_body (loop);
1774 for (unsigned i = 0; i < loop->num_nodes; i++)
1775 {
1776 basic_block bb = bbs[i];
1777
1778 if (bb->loop_father != loop
1779 || bb == loop->header || bb == exit->src
1780 || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
1781 continue;
1782
1783 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
1784 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
1785 if (gimple_vuse (gsi_stmt (gsi)))
1786 {
1787 free (bbs);
1788 return false;
1789 }
1790 }
1791 free (bbs);
1792
1793 tree niters = number_of_latch_executions (loop);
1794 niters = analyze_scalar_evolution (loop_outer (loop), niters);
1795 if (!niters || chrec_contains_undetermined (niters))
1796 return false;
1797
1798 /* Record the innermost outer loop that doesn't form rectangle loop nest. */
1799 for (loop_p loop2 = loop_outer (loop);
1800 loop2 && flow_loop_nested_p (*min_outer, loop2);
1801 loop2 = loop_outer (loop2))
1802 {
1803 niters = instantiate_scev (loop_preheader_edge (loop2),
1804 loop_outer (loop), niters);
1805 if (!evolution_function_is_invariant_p (niters, loop2->num))
1806 {
1807 *min_outer = loop2;
1808 break;
1809 }
1810 }
1811 return true;
1812 }
1813
1814 /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
1815 should be interchanged by looking into all DATAREFS. */
1816
1817 static bool
1818 should_interchange_loop_nest (class loop *loop_nest, class loop *innermost,
1819 vec<data_reference_p> datarefs)
1820 {
1821 unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
1822 gcc_assert (idx > 0);
1823
1824 /* Check if any two adjacent loops should be interchanged. */
1825 for (class loop *loop = innermost;
1826 loop != loop_nest; loop = loop_outer (loop), idx--)
1827 if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
1828 loop == innermost, false))
1829 return true;
1830
1831 return false;
1832 }
1833
1834 /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
1835 dependences for loop interchange and store it in DDRS. Note we compute
1836 dependences directly rather than call generic interface so that we can
1837 return on unknown dependence instantly. */
1838
1839 static bool
1840 tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
1841 vec<data_reference_p> datarefs,
1842 vec<ddr_p> *ddrs)
1843 {
1844 struct data_reference *a, *b;
1845 class loop *innermost = loop_nest.last ();
1846
1847 for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
1848 {
1849 bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
1850 for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
1851 if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
1852 {
1853 bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
1854 /* Don't support multiple write references in outer loop. */
1855 if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
1856 return false;
1857
1858 ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
1859 ddrs->safe_push (ddr);
1860 compute_affine_dependence (ddr, loop_nest[0]);
1861
1862 /* Give up if ddr is unknown dependence or classic direct vector
1863 is not available. */
1864 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1865 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
1866 && DDR_NUM_DIR_VECTS (ddr) == 0))
1867 return false;
1868
1869 /* If either data references is in outer loop of nest, we require
1870 no dependence here because the data reference need to be moved
1871 into inner loop during interchange. */
1872 if (a_outer_p && b_outer_p
1873 && operand_equal_p (DR_REF (a), DR_REF (b), 0))
1874 continue;
1875 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1876 && (a_outer_p || b_outer_p))
1877 return false;
1878 }
1879 }
1880
1881 return true;
1882 }
1883
1884 /* Prune DATAREFS by removing any data reference not inside of LOOP. */
1885
1886 static inline void
1887 prune_datarefs_not_in_loop (class loop *loop, vec<data_reference_p> datarefs)
1888 {
1889 unsigned i, j;
1890 struct data_reference *dr;
1891
1892 for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
1893 {
1894 if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
1895 datarefs[j++] = dr;
1896 else
1897 {
1898 if (dr->aux)
1899 {
1900 DR_ACCESS_STRIDE (dr)->release ();
1901 delete (vec<tree> *) dr->aux;
1902 }
1903 free_data_ref (dr);
1904 }
1905 }
1906 datarefs.truncate (j);
1907 }
1908
1909 /* Find and store data references in DATAREFS for LOOP nest. If there's
1910 difficult data reference in a basic block, we shrink the loop nest to
1911 inner loop of that basic block's father loop. On success, return the
1912 outer loop of the result loop nest. */
1913
1914 static class loop *
1915 prepare_data_references (class loop *loop, vec<data_reference_p> *datarefs)
1916 {
1917 class loop *loop_nest = loop;
1918 vec<data_reference_p> *bb_refs;
1919 basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
1920
1921 for (unsigned i = 0; i < loop->num_nodes; i++)
1922 bbs[i]->aux = NULL;
1923
1924 /* Find data references for all basic blocks. Shrink loop nest on difficult
1925 data reference. */
1926 for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
1927 {
1928 bb = bbs[i];
1929 if (!flow_bb_inside_loop_p (loop_nest, bb))
1930 continue;
1931
1932 bb_refs = new vec<data_reference_p> ();
1933 if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
1934 {
1935 loop_nest = bb->loop_father->inner;
1936 if (loop_nest && !loop_nest->inner)
1937 loop_nest = NULL;
1938
1939 free_data_refs (*bb_refs);
1940 delete bb_refs;
1941 }
1942 else if (bb_refs->is_empty ())
1943 delete bb_refs;
1944 else
1945 bb->aux = bb_refs;
1946 }
1947
1948 /* Collect all data references in loop nest. */
1949 for (unsigned i = 0; i < loop->num_nodes; i++)
1950 {
1951 bb = bbs[i];
1952 if (!bb->aux)
1953 continue;
1954
1955 bb_refs = (vec<data_reference_p> *) bb->aux;
1956 if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
1957 datarefs->safe_splice (*bb_refs);
1958 else
1959 free_data_refs (*bb_refs);
1960
1961 delete bb_refs;
1962 bb->aux = NULL;
1963 }
1964 free (bbs);
1965
1966 return loop_nest;
1967 }
1968
1969 /* Given innermost LOOP, return true if perfect loop nest can be found and
1970 data dependences can be computed. If succeed, record the perfect loop
1971 nest in LOOP_NEST; record all data references in DATAREFS and record all
1972 data dependence relations in DDRS.
1973
1974 We do support a restricted form of imperfect loop nest, i.e, loop nest
1975 with load/store in outer loop initializing/finalizing simple reduction
1976 of the innermost loop. For such outer loop reference, we require that
1977 it has no dependence with others sinve it will be moved to inner loop
1978 in interchange. */
1979
1980 static bool
1981 prepare_perfect_loop_nest (class loop *loop, vec<loop_p> *loop_nest,
1982 vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
1983 {
1984 class loop *start_loop = NULL, *innermost = loop;
1985 class loop *outermost = loops_for_fn (cfun)->tree_root;
1986
1987 /* Find loop nest from the innermost loop. The outermost is the innermost
1988 outer*/
1989 while (loop->num != 0 && loop->inner == start_loop
1990 && flow_loop_nested_p (outermost, loop))
1991 {
1992 if (!proper_loop_form_for_interchange (loop, &outermost))
1993 break;
1994
1995 start_loop = loop;
1996 /* If this loop has sibling loop, the father loop won't be in perfect
1997 loop nest. */
1998 if (loop->next != NULL)
1999 break;
2000
2001 loop = loop_outer (loop);
2002 }
2003 if (!start_loop || !start_loop->inner)
2004 return false;
2005
2006 /* Prepare the data reference vector for the loop nest, pruning outer
2007 loops we cannot handle. */
2008 start_loop = prepare_data_references (start_loop, datarefs);
2009 if (!start_loop
2010 /* Check if there is no data reference. */
2011 || datarefs->is_empty ()
2012 /* Check if there are too many of data references. */
2013 || (int) datarefs->length () > MAX_DATAREFS)
2014 return false;
2015
2016 /* Compute access strides for all data references, pruning outer
2017 loops we cannot analyze refs in. */
2018 start_loop = compute_access_strides (start_loop, innermost, *datarefs);
2019 if (!start_loop)
2020 return false;
2021
2022 /* Check if any interchange is profitable in the loop nest. */
2023 if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
2024 return false;
2025
2026 /* Check if data dependences can be computed for loop nest starting from
2027 start_loop. */
2028 loop = start_loop;
2029 do {
2030 loop_nest->truncate (0);
2031
2032 if (loop != start_loop)
2033 prune_datarefs_not_in_loop (start_loop, *datarefs);
2034
2035 if (find_loop_nest (start_loop, loop_nest)
2036 && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
2037 {
2038 if (dump_file && (dump_flags & TDF_DETAILS))
2039 fprintf (dump_file,
2040 "\nConsider loop interchange for loop_nest<%d - %d>\n",
2041 start_loop->num, innermost->num);
2042
2043 if (loop != start_loop)
2044 prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
2045
2046 if (dump_file && (dump_flags & TDF_DETAILS))
2047 dump_access_strides (*datarefs);
2048
2049 return true;
2050 }
2051
2052 free_dependence_relations (*ddrs);
2053 *ddrs = vNULL;
2054 /* Try to compute data dependences with the outermost loop stripped. */
2055 loop = start_loop;
2056 start_loop = start_loop->inner;
2057 } while (start_loop && start_loop->inner);
2058
2059 return false;
2060 }
2061
2062 /* Main entry for loop interchange pass. */
2063
2064 unsigned int
2065 pass_linterchange::execute (function *fun)
2066 {
2067 if (number_of_loops (fun) <= 2)
2068 return 0;
2069
2070 bool changed_p = false;
2071 class loop *loop;
2072 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2073 {
2074 vec<loop_p> loop_nest = vNULL;
2075 vec<data_reference_p> datarefs = vNULL;
2076 vec<ddr_p> ddrs = vNULL;
2077 if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
2078 {
2079 tree_loop_interchange loop_interchange (loop_nest);
2080 changed_p |= loop_interchange.interchange (datarefs, ddrs);
2081 }
2082 free_dependence_relations (ddrs);
2083 free_data_refs_with_aux (datarefs);
2084 loop_nest.release ();
2085 }
2086
2087 if (changed_p)
2088 {
2089 unsigned todo = TODO_update_ssa_only_virtuals;
2090 todo |= loop_invariant_motion_in_fun (cfun, false);
2091 scev_reset ();
2092 return todo;
2093 }
2094 return 0;
2095 }
2096
2097 } // anon namespace
2098
2099 gimple_opt_pass *
2100 make_pass_linterchange (gcc::context *ctxt)
2101 {
2102 return new pass_linterchange (ctxt);
2103 }