]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/gimple-loop-interchange.cc
[Ada] Warning for out-of-order record representation clauses
[thirdparty/gcc.git] / gcc / gimple-loop-interchange.cc
1 /* Loop interchange.
2 Copyright (C) 2017-2019 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "is-a.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "gimple-pretty-print.h"
31 #include "fold-const.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimplify-me.h"
35 #include "cfgloop.h"
36 #include "params.h"
37 #include "tree-ssa.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-ssa-loop-manip.h"
40 #include "tree-ssa-loop-niter.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-dce.h"
43 #include "tree-data-ref.h"
44 #include "tree-vectorizer.h"
45
46 /* This pass performs loop interchange: for example, the loop nest
47
48 for (int j = 0; j < N; j++)
49 for (int k = 0; k < N; k++)
50 for (int i = 0; i < N; i++)
51 c[i][j] = c[i][j] + a[i][k]*b[k][j];
52
53 is transformed to
54
55 for (int i = 0; i < N; i++)
56 for (int j = 0; j < N; j++)
57 for (int k = 0; k < N; k++)
58 c[i][j] = c[i][j] + a[i][k]*b[k][j];
59
60 This pass implements loop interchange in the following steps:
61
62 1) Find perfect loop nest for each innermost loop and compute data
63 dependence relations for it. For above example, loop nest is
64 <loop_j, loop_k, loop_i>.
65 2) From innermost to outermost loop, this pass tries to interchange
66 each loop pair. For above case, it firstly tries to interchange
67 <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
68 Then it tries to interchange <loop_j, loop_i> and loop nest becomes
69 <loop_i, loop_j, loop_k>. The overall effect is to move innermost
70 loop to the outermost position. For loop pair <loop_i, loop_j>
71 to be interchanged, we:
72 3) Check if data dependence relations are valid for loop interchange.
73 4) Check if both loops can be interchanged in terms of transformation.
74 5) Check if interchanging the two loops is profitable.
75 6) Interchange the two loops by mapping induction variables.
76
77 This pass also handles reductions in loop nest. So far we only support
78 simple reduction of inner loop and double reduction of the loop nest. */
79
80 /* Maximum number of stmts in each loop that should be interchanged. */
81 #define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
82 /* Maximum number of data references in loop nest. */
83 #define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
84
85 /* Comparison ratio of access stride between inner/outer loops to be
86 interchanged. This is the minimum stride ratio for loop interchange
87 to be profitable. */
88 #define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
89 /* The same as above, but we require higher ratio for interchanging the
90 innermost two loops. */
91 #define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
92
93 /* Comparison ratio of stmt cost between inner/outer loops. Loops won't
94 be interchanged if outer loop has too many stmts. */
95 #define STMT_COST_RATIO (3)
96
97 /* Vector of strides that DR accesses in each level loop of a loop nest. */
98 #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
99
100 /* Structure recording loop induction variable. */
101 typedef struct induction
102 {
103 /* IV itself. */
104 tree var;
105 /* IV's initializing value, which is the init arg of the IV PHI node. */
106 tree init_val;
107 /* IV's initializing expr, which is (the expanded result of) init_val. */
108 tree init_expr;
109 /* IV's step. */
110 tree step;
111 } *induction_p;
112
113 /* Enum type for loop reduction variable. */
114 enum reduction_type
115 {
116 UNKNOWN_RTYPE = 0,
117 SIMPLE_RTYPE,
118 DOUBLE_RTYPE
119 };
120
121 /* Structure recording loop reduction variable. */
122 typedef struct reduction
123 {
124 /* Reduction itself. */
125 tree var;
126 /* PHI node defining reduction variable. */
127 gphi *phi;
128 /* Init and next variables of the reduction. */
129 tree init;
130 tree next;
131 /* Lcssa PHI node if reduction is used outside of its definition loop. */
132 gphi *lcssa_phi;
133 /* Stmts defining init and next. */
134 gimple *producer;
135 gimple *consumer;
136 /* If init is loaded from memory, this is the loading memory reference. */
137 tree init_ref;
138 /* If reduction is finally stored to memory, this is the stored memory
139 reference. */
140 tree fini_ref;
141 enum reduction_type type;
142 } *reduction_p;
143
144
145 /* Dump reduction RE. */
146
147 static void
148 dump_reduction (reduction_p re)
149 {
150 if (re->type == SIMPLE_RTYPE)
151 fprintf (dump_file, " Simple reduction: ");
152 else if (re->type == DOUBLE_RTYPE)
153 fprintf (dump_file, " Double reduction: ");
154 else
155 fprintf (dump_file, " Unknown reduction: ");
156
157 print_gimple_stmt (dump_file, re->phi, 0);
158 }
159
160 /* Dump LOOP's induction IV. */
161 static void
162 dump_induction (class loop *loop, induction_p iv)
163 {
164 fprintf (dump_file, " Induction: ");
165 print_generic_expr (dump_file, iv->var, TDF_SLIM);
166 fprintf (dump_file, " = {");
167 print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
168 fprintf (dump_file, ", ");
169 print_generic_expr (dump_file, iv->step, TDF_SLIM);
170 fprintf (dump_file, "}_%d\n", loop->num);
171 }
172
173 /* Loop candidate for interchange. */
174
175 class loop_cand
176 {
177 public:
178 loop_cand (class loop *, class loop *);
179 ~loop_cand ();
180
181 reduction_p find_reduction_by_stmt (gimple *);
182 void classify_simple_reduction (reduction_p);
183 bool analyze_iloop_reduction_var (tree);
184 bool analyze_oloop_reduction_var (loop_cand *, tree);
185 bool analyze_induction_var (tree, tree);
186 bool analyze_carried_vars (loop_cand *);
187 bool analyze_lcssa_phis (void);
188 bool can_interchange_p (loop_cand *);
189 void undo_simple_reduction (reduction_p, bitmap);
190
191 /* The loop itself. */
192 class loop *m_loop;
193 /* The outer loop for interchange. It equals to loop if this loop cand
194 itself represents the outer loop. */
195 class loop *m_outer;
196 /* Vector of induction variables in loop. */
197 vec<induction_p> m_inductions;
198 /* Vector of reduction variables in loop. */
199 vec<reduction_p> m_reductions;
200 /* Lcssa PHI nodes of this loop. */
201 vec<gphi *> m_lcssa_nodes;
202 /* Single exit edge of this loop. */
203 edge m_exit;
204 /* Basic blocks of this loop. */
205 basic_block *m_bbs;
206 /* Number of stmts of this loop. Inner loops' stmts are not included. */
207 int m_num_stmts;
208 /* Number of constant initialized simple reduction. */
209 int m_const_init_reduc;
210 };
211
212 /* Constructor. */
213
214 loop_cand::loop_cand (class loop *loop, class loop *outer)
215 : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
216 m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
217 {
218 m_inductions.create (3);
219 m_reductions.create (3);
220 m_lcssa_nodes.create (3);
221 }
222
223 /* Destructor. */
224
225 loop_cand::~loop_cand ()
226 {
227 induction_p iv;
228 for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
229 free (iv);
230
231 reduction_p re;
232 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
233 free (re);
234
235 m_inductions.release ();
236 m_reductions.release ();
237 m_lcssa_nodes.release ();
238 free (m_bbs);
239 }
240
241 /* Return single use stmt of VAR in LOOP, otherwise return NULL. */
242
243 static gimple *
244 single_use_in_loop (tree var, class loop *loop)
245 {
246 gimple *stmt, *res = NULL;
247 use_operand_p use_p;
248 imm_use_iterator iterator;
249
250 FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
251 {
252 stmt = USE_STMT (use_p);
253 if (is_gimple_debug (stmt))
254 continue;
255
256 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
257 continue;
258
259 if (res)
260 return NULL;
261
262 res = stmt;
263 }
264 return res;
265 }
266
267 /* Return true if E is unsupported in loop interchange, i.e, E is a complex
268 edge or part of irreducible loop. */
269
270 static inline bool
271 unsupported_edge (edge e)
272 {
273 return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
274 }
275
276 /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
277 stmt. */
278
279 reduction_p
280 loop_cand::find_reduction_by_stmt (gimple *stmt)
281 {
282 gphi *phi = dyn_cast <gphi *> (stmt);
283 reduction_p re;
284
285 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
286 if ((phi != NULL && phi == re->lcssa_phi)
287 || (stmt == re->producer || stmt == re->consumer))
288 return re;
289
290 return NULL;
291 }
292
293 /* Return true if current loop_cand be interchanged. ILOOP is not NULL if
294 current loop_cand is outer loop in loop nest. */
295
296 bool
297 loop_cand::can_interchange_p (loop_cand *iloop)
298 {
299 /* For now we only support at most one reduction. */
300 unsigned allowed_reduction_num = 1;
301
302 /* Only support reduction if the loop nest to be interchanged is the
303 innermostin two loops. */
304 if ((iloop == NULL && m_loop->inner != NULL)
305 || (iloop != NULL && iloop->m_loop->inner != NULL))
306 allowed_reduction_num = 0;
307
308 if (m_reductions.length () > allowed_reduction_num
309 || (m_reductions.length () == 1
310 && m_reductions[0]->type == UNKNOWN_RTYPE))
311 return false;
312
313 /* Only support lcssa PHI node which is for reduction. */
314 if (m_lcssa_nodes.length () > allowed_reduction_num)
315 return false;
316
317 /* Check if basic block has any unsupported operation. Note basic blocks
318 of inner loops are not checked here. */
319 for (unsigned i = 0; i < m_loop->num_nodes; i++)
320 {
321 basic_block bb = m_bbs[i];
322 gphi_iterator psi;
323 gimple_stmt_iterator gsi;
324
325 /* Skip basic blocks of inner loops. */
326 if (bb->loop_father != m_loop)
327 continue;
328
329 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
330 {
331 gimple *stmt = gsi_stmt (gsi);
332 if (is_gimple_debug (stmt))
333 continue;
334
335 if (gimple_has_side_effects (stmt))
336 return false;
337
338 m_num_stmts++;
339 if (gcall *call = dyn_cast <gcall *> (stmt))
340 {
341 /* In basic block of outer loop, the call should be cheap since
342 it will be moved to inner loop. */
343 if (iloop != NULL
344 && !gimple_inexpensive_call_p (call))
345 return false;
346 continue;
347 }
348
349 if (!iloop || !gimple_vuse (stmt))
350 continue;
351
352 /* Support stmt accessing memory in outer loop only if it is for
353 inner loop's reduction. */
354 if (iloop->find_reduction_by_stmt (stmt))
355 continue;
356
357 tree lhs;
358 /* Support loop invariant memory reference if it's only used once by
359 inner loop. */
360 /* ??? How's this checking for invariantness? */
361 if (gimple_assign_single_p (stmt)
362 && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
363 && TREE_CODE (lhs) == SSA_NAME
364 && single_use_in_loop (lhs, iloop->m_loop))
365 continue;
366
367 return false;
368 }
369 /* Check if loop has too many stmts. */
370 if (m_num_stmts > MAX_NUM_STMT)
371 return false;
372
373 /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
374 loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
375 if (!iloop || bb == m_loop->header
376 || bb == iloop->m_exit->dest)
377 continue;
378
379 /* Don't allow any other PHI nodes. */
380 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
381 if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
382 return false;
383 }
384
385 return true;
386 }
387
388 /* Programmers and optimizers (like loop store motion) may optimize code:
389
390 for (int i = 0; i < N; i++)
391 for (int j = 0; j < N; j++)
392 a[i] += b[j][i] * c[j][i];
393
394 into reduction:
395
396 for (int i = 0; i < N; i++)
397 {
398 // producer. Note sum can be intitialized to a constant.
399 int sum = a[i];
400 for (int j = 0; j < N; j++)
401 {
402 sum += b[j][i] * c[j][i];
403 }
404 // consumer.
405 a[i] = sum;
406 }
407
408 The result code can't be interchanged without undoing the optimization.
409 This function classifies this kind reduction and records information so
410 that we can undo the store motion during interchange. */
411
412 void
413 loop_cand::classify_simple_reduction (reduction_p re)
414 {
415 gimple *producer, *consumer;
416
417 /* Check init variable of reduction and how it is initialized. */
418 if (TREE_CODE (re->init) == SSA_NAME)
419 {
420 producer = SSA_NAME_DEF_STMT (re->init);
421 re->producer = producer;
422 basic_block bb = gimple_bb (producer);
423 if (!bb || bb->loop_father != m_outer)
424 return;
425
426 if (!gimple_assign_load_p (producer))
427 return;
428
429 re->init_ref = gimple_assign_rhs1 (producer);
430 }
431 else if (CONSTANT_CLASS_P (re->init))
432 m_const_init_reduc++;
433 else
434 return;
435
436 /* Check how reduction variable is used. */
437 consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
438 if (!consumer
439 || !gimple_store_p (consumer))
440 return;
441
442 re->fini_ref = gimple_get_lhs (consumer);
443 re->consumer = consumer;
444
445 /* Simple reduction with constant initializer. */
446 if (!re->init_ref)
447 {
448 gcc_assert (CONSTANT_CLASS_P (re->init));
449 re->init_ref = unshare_expr (re->fini_ref);
450 }
451
452 /* Require memory references in producer and consumer are the same so
453 that we can undo reduction during interchange. */
454 if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
455 return;
456
457 re->type = SIMPLE_RTYPE;
458 }
459
460 /* Analyze reduction variable VAR for inner loop of the loop nest to be
461 interchanged. Return true if analysis succeeds. */
462
463 bool
464 loop_cand::analyze_iloop_reduction_var (tree var)
465 {
466 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
467 gphi *lcssa_phi = NULL, *use_phi;
468 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
469 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
470 reduction_p re;
471 gimple *stmt, *next_def, *single_use = NULL;
472 use_operand_p use_p;
473 imm_use_iterator iterator;
474
475 if (TREE_CODE (next) != SSA_NAME)
476 return false;
477
478 next_def = SSA_NAME_DEF_STMT (next);
479 basic_block bb = gimple_bb (next_def);
480 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
481 return false;
482
483 /* In restricted reduction, the var is (and must be) used in defining
484 the updated var. The process can be depicted as below:
485
486 var ;; = PHI<init, next>
487 |
488 |
489 v
490 +---------------------+
491 | reduction operators | <-- other operands
492 +---------------------+
493 |
494 |
495 v
496 next
497
498 In terms loop interchange, we don't change how NEXT is computed based
499 on VAR and OTHER OPERANDS. In case of double reduction in loop nest
500 to be interchanged, we don't changed it at all. In the case of simple
501 reduction in inner loop, we only make change how VAR/NEXT is loaded or
502 stored. With these conditions, we can relax restrictions on reduction
503 in a way that reduction operation is seen as black box. In general,
504 we can ignore reassociation of reduction operator; we can handle fake
505 reductions in which VAR is not even used to compute NEXT. */
506 if (! single_imm_use (var, &use_p, &single_use)
507 || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
508 return false;
509
510 /* Check the reduction operation. We require a left-associative operation.
511 For FP math we also need to be allowed to associate operations. */
512 if (gassign *ass = dyn_cast <gassign *> (single_use))
513 {
514 enum tree_code code = gimple_assign_rhs_code (ass);
515 if (! (associative_tree_code (code)
516 || (code == MINUS_EXPR
517 && use_p->use == gimple_assign_rhs1_ptr (ass)))
518 || (FLOAT_TYPE_P (TREE_TYPE (var))
519 && ! flag_associative_math))
520 return false;
521 }
522 else
523 return false;
524
525 /* Handle and verify a series of stmts feeding the reduction op. */
526 if (single_use != next_def
527 && !check_reduction_path (dump_user_location_t (), m_loop, phi, next,
528 gimple_assign_rhs_code (single_use)))
529 return false;
530
531 /* Only support cases in which INIT is used in inner loop. */
532 if (TREE_CODE (init) == SSA_NAME)
533 FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
534 {
535 stmt = USE_STMT (use_p);
536 if (is_gimple_debug (stmt))
537 continue;
538
539 if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
540 return false;
541 }
542
543 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
544 {
545 stmt = USE_STMT (use_p);
546 if (is_gimple_debug (stmt))
547 continue;
548
549 /* Or else it's used in PHI itself. */
550 use_phi = dyn_cast <gphi *> (stmt);
551 if (use_phi == phi)
552 continue;
553
554 if (use_phi != NULL
555 && lcssa_phi == NULL
556 && gimple_bb (stmt) == m_exit->dest
557 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
558 lcssa_phi = use_phi;
559 else
560 return false;
561 }
562 if (!lcssa_phi)
563 return false;
564
565 re = XCNEW (struct reduction);
566 re->var = var;
567 re->init = init;
568 re->next = next;
569 re->phi = phi;
570 re->lcssa_phi = lcssa_phi;
571
572 classify_simple_reduction (re);
573
574 if (dump_file && (dump_flags & TDF_DETAILS))
575 dump_reduction (re);
576
577 m_reductions.safe_push (re);
578 return true;
579 }
580
581 /* Analyze reduction variable VAR for outer loop of the loop nest to be
582 interchanged. ILOOP is not NULL and points to inner loop. For the
583 moment, we only support double reduction for outer loop, like:
584
585 for (int i = 0; i < n; i++)
586 {
587 int sum = 0;
588
589 for (int j = 0; j < n; j++) // outer loop
590 for (int k = 0; k < n; k++) // inner loop
591 sum += a[i][k]*b[k][j];
592
593 s[i] = sum;
594 }
595
596 Note the innermost two loops are the loop nest to be interchanged.
597 Return true if analysis succeeds. */
598
599 bool
600 loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
601 {
602 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
603 gphi *lcssa_phi = NULL, *use_phi;
604 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
605 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
606 reduction_p re;
607 gimple *stmt, *next_def;
608 use_operand_p use_p;
609 imm_use_iterator iterator;
610
611 if (TREE_CODE (next) != SSA_NAME)
612 return false;
613
614 next_def = SSA_NAME_DEF_STMT (next);
615 basic_block bb = gimple_bb (next_def);
616 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
617 return false;
618
619 /* Find inner loop's simple reduction that uses var as initializer. */
620 reduction_p inner_re = NULL;
621 for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
622 if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
623 break;
624
625 if (inner_re == NULL
626 || inner_re->type != UNKNOWN_RTYPE
627 || inner_re->producer != phi)
628 return false;
629
630 /* In case of double reduction, outer loop's reduction should be updated
631 by inner loop's simple reduction. */
632 if (next_def != inner_re->lcssa_phi)
633 return false;
634
635 /* Outer loop's reduction should only be used to initialize inner loop's
636 simple reduction. */
637 if (! single_imm_use (var, &use_p, &stmt)
638 || stmt != inner_re->phi)
639 return false;
640
641 /* Check this reduction is correctly used outside of loop via lcssa phi. */
642 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
643 {
644 stmt = USE_STMT (use_p);
645 if (is_gimple_debug (stmt))
646 continue;
647
648 /* Or else it's used in PHI itself. */
649 use_phi = dyn_cast <gphi *> (stmt);
650 if (use_phi == phi)
651 continue;
652
653 if (lcssa_phi == NULL
654 && use_phi != NULL
655 && gimple_bb (stmt) == m_exit->dest
656 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
657 lcssa_phi = use_phi;
658 else
659 return false;
660 }
661 if (!lcssa_phi)
662 return false;
663
664 re = XCNEW (struct reduction);
665 re->var = var;
666 re->init = init;
667 re->next = next;
668 re->phi = phi;
669 re->lcssa_phi = lcssa_phi;
670 re->type = DOUBLE_RTYPE;
671 inner_re->type = DOUBLE_RTYPE;
672
673 if (dump_file && (dump_flags & TDF_DETAILS))
674 dump_reduction (re);
675
676 m_reductions.safe_push (re);
677 return true;
678 }
679
680 /* Return true if VAR is induction variable of current loop whose scev is
681 specified by CHREC. */
682
683 bool
684 loop_cand::analyze_induction_var (tree var, tree chrec)
685 {
686 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
687 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
688
689 /* Var is loop invariant, though it's unlikely to happen. */
690 if (tree_does_not_contain_chrecs (chrec))
691 {
692 /* Punt on floating point invariants if honoring signed zeros,
693 representing that as + 0.0 would change the result if init
694 is -0.0. Similarly for SNaNs it can raise exception. */
695 if (HONOR_SIGNED_ZEROS (chrec) || HONOR_SNANS (chrec))
696 return false;
697 struct induction *iv = XCNEW (struct induction);
698 iv->var = var;
699 iv->init_val = init;
700 iv->init_expr = chrec;
701 iv->step = build_zero_cst (TREE_TYPE (chrec));
702 m_inductions.safe_push (iv);
703 return true;
704 }
705
706 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
707 || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
708 || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
709 || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
710 return false;
711
712 struct induction *iv = XCNEW (struct induction);
713 iv->var = var;
714 iv->init_val = init;
715 iv->init_expr = CHREC_LEFT (chrec);
716 iv->step = CHREC_RIGHT (chrec);
717
718 if (dump_file && (dump_flags & TDF_DETAILS))
719 dump_induction (m_loop, iv);
720
721 m_inductions.safe_push (iv);
722 return true;
723 }
724
725 /* Return true if all loop carried variables defined in loop header can
726 be successfully analyzed. */
727
728 bool
729 loop_cand::analyze_carried_vars (loop_cand *iloop)
730 {
731 edge e = loop_preheader_edge (m_outer);
732 gphi_iterator gsi;
733
734 if (dump_file && (dump_flags & TDF_DETAILS))
735 fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
736
737 for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
738 {
739 gphi *phi = gsi.phi ();
740
741 tree var = PHI_RESULT (phi);
742 if (virtual_operand_p (var))
743 continue;
744
745 tree chrec = analyze_scalar_evolution (m_loop, var);
746 chrec = instantiate_scev (e, m_loop, chrec);
747
748 /* Analyze var as reduction variable. */
749 if (chrec_contains_undetermined (chrec)
750 || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
751 {
752 if (iloop && !analyze_oloop_reduction_var (iloop, var))
753 return false;
754 if (!iloop && !analyze_iloop_reduction_var (var))
755 return false;
756 }
757 /* Analyze var as induction variable. */
758 else if (!analyze_induction_var (var, chrec))
759 return false;
760 }
761
762 return true;
763 }
764
765 /* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
766
767 bool
768 loop_cand::analyze_lcssa_phis (void)
769 {
770 gphi_iterator gsi;
771 for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
772 {
773 gphi *phi = gsi.phi ();
774
775 if (virtual_operand_p (PHI_RESULT (phi)))
776 continue;
777
778 /* TODO: We only support lcssa phi for reduction for now. */
779 if (!find_reduction_by_stmt (phi))
780 return false;
781 }
782
783 return true;
784 }
785
786 /* CONSUMER is a stmt in BB storing reduction result into memory object.
787 When the reduction is intialized from constant value, we need to add
788 a stmt loading from the memory object to target basic block in inner
789 loop during undoing the reduction. Problem is that memory reference
790 may use ssa variables not dominating the target basic block. This
791 function finds all stmts on which CONSUMER depends in basic block BB,
792 records and returns them via STMTS. */
793
794 static void
795 find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
796 {
797 auto_vec<gimple *, 4> worklist;
798 use_operand_p use_p;
799 ssa_op_iter iter;
800 gimple *stmt, *def_stmt;
801 gimple_stmt_iterator gsi;
802
803 /* First clear flag for stmts in bb. */
804 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
805 gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
806
807 /* DFS search all depended stmts in bb and mark flag for these stmts. */
808 worklist.safe_push (consumer);
809 while (!worklist.is_empty ())
810 {
811 stmt = worklist.pop ();
812 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
813 {
814 def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
815
816 if (is_a <gphi *> (def_stmt)
817 || gimple_bb (def_stmt) != bb
818 || gimple_plf (def_stmt, GF_PLF_1))
819 continue;
820
821 worklist.safe_push (def_stmt);
822 }
823 gimple_set_plf (stmt, GF_PLF_1, true);
824 }
825 for (gsi = gsi_start_nondebug_bb (bb);
826 !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
827 {
828 /* Move dep stmts to sequence STMTS. */
829 if (gimple_plf (stmt, GF_PLF_1))
830 {
831 gsi_remove (&gsi, false);
832 gimple_seq_add_stmt_without_update (stmts, stmt);
833 }
834 else
835 gsi_next_nondebug (&gsi);
836 }
837 }
838
839 /* User can write, optimizers can generate simple reduction RE for inner
840 loop. In order to make interchange valid, we have to undo reduction by
841 moving producer and consumer stmts into the inner loop. For example,
842 below code:
843
844 init = MEM_REF[idx]; //producer
845 loop:
846 var = phi<init, next>
847 next = var op ...
848 reduc_sum = phi<next>
849 MEM_REF[idx] = reduc_sum //consumer
850
851 is transformed into:
852
853 loop:
854 new_var = MEM_REF[idx]; //producer after moving
855 next = new_var op ...
856 MEM_REF[idx] = next; //consumer after moving
857
858 Note if the reduction variable is initialized to constant, like:
859
860 var = phi<0.0, next>
861
862 we compute new_var as below:
863
864 loop:
865 tmp = MEM_REF[idx];
866 new_var = !first_iteration ? tmp : 0.0;
867
868 so that the initial const is used in the first iteration of loop. Also
869 record ssa variables for dead code elimination in DCE_SEEDS. */
870
871 void
872 loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
873 {
874 gimple *stmt;
875 gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
876 gimple_seq stmts = NULL;
877 tree new_var;
878
879 /* Prepare the initialization stmts and insert it to inner loop. */
880 if (re->producer != NULL)
881 {
882 gimple_set_vuse (re->producer, NULL_TREE);
883 from = gsi_for_stmt (re->producer);
884 gsi_remove (&from, false);
885 gimple_seq_add_stmt_without_update (&stmts, re->producer);
886 new_var = re->init;
887 }
888 else
889 {
890 /* Find all stmts on which expression "MEM_REF[idx]" depends. */
891 find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
892 /* Because we generate new stmt loading from the MEM_REF to TMP. */
893 tree cond, tmp = copy_ssa_name (re->var);
894 stmt = gimple_build_assign (tmp, re->init_ref);
895 gimple_seq_add_stmt_without_update (&stmts, stmt);
896
897 /* Init new_var to MEM_REF or CONST depending on if it is the first
898 iteration. */
899 induction_p iv = m_inductions[0];
900 cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
901 new_var = copy_ssa_name (re->var);
902 stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
903 gimple_seq_add_stmt_without_update (&stmts, stmt);
904 }
905 gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
906
907 /* Replace all uses of reduction var with new variable. */
908 use_operand_p use_p;
909 imm_use_iterator iterator;
910 FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
911 {
912 FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
913 SET_USE (use_p, new_var);
914
915 update_stmt (stmt);
916 }
917
918 /* Move consumer stmt into inner loop, just after reduction next's def. */
919 unlink_stmt_vdef (re->consumer);
920 release_ssa_name (gimple_vdef (re->consumer));
921 gimple_set_vdef (re->consumer, NULL_TREE);
922 gimple_set_vuse (re->consumer, NULL_TREE);
923 gimple_assign_set_rhs1 (re->consumer, re->next);
924 from = gsi_for_stmt (re->consumer);
925 to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
926 gsi_move_after (&from, &to);
927
928 /* Mark the reduction variables for DCE. */
929 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
930 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
931 }
932
933 /* Free DATAREFS and its auxiliary memory. */
934
935 static void
936 free_data_refs_with_aux (vec<data_reference_p> datarefs)
937 {
938 data_reference_p dr;
939 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
940 if (dr->aux != NULL)
941 {
942 DR_ACCESS_STRIDE (dr)->release ();
943 delete (vec<tree> *) dr->aux;
944 }
945
946 free_data_refs (datarefs);
947 }
948
949 /* Class for loop interchange transformation. */
950
951 class tree_loop_interchange
952 {
953 public:
954 tree_loop_interchange (vec<class loop *> loop_nest)
955 : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
956 m_dce_seeds (BITMAP_ALLOC (NULL)) { }
957 ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
958 bool interchange (vec<data_reference_p>, vec<ddr_p>);
959
960 private:
961 void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
962 bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
963 void interchange_loops (loop_cand &, loop_cand &);
964 void map_inductions_to_loop (loop_cand &, loop_cand &);
965 void move_code_to_inner_loop (class loop *, class loop *, basic_block *);
966
967 /* The whole loop nest in which interchange is ongoing. */
968 vec<class loop *> m_loop_nest;
969 /* We create new IV which is only used in loop's exit condition check.
970 In case of 3-level loop nest interchange, when we interchange the
971 innermost two loops, new IV created in the middle level loop does
972 not need to be preserved in interchanging the outermost two loops
973 later. We record the IV so that it can be skipped. */
974 tree m_niters_iv_var;
975 /* Bitmap of seed variables for dead code elimination after interchange. */
976 bitmap m_dce_seeds;
977 };
978
979 /* Update data refs' access stride and dependence information after loop
980 interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
981 nest. DATAREFS are data references. DDRS are data dependences. */
982
983 void
984 tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
985 vec<data_reference_p> datarefs,
986 vec<ddr_p> ddrs)
987 {
988 struct data_reference *dr;
989 struct data_dependence_relation *ddr;
990
991 /* Update strides of data references. */
992 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
993 {
994 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
995 gcc_assert (stride->length () > i_idx);
996 std::swap ((*stride)[i_idx], (*stride)[o_idx]);
997 }
998 /* Update data dependences. */
999 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1000 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1001 {
1002 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1003 {
1004 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1005 std::swap (dist_vect[i_idx], dist_vect[o_idx]);
1006 }
1007 }
1008 }
1009
1010 /* Check data dependence relations, return TRUE if it's valid to interchange
1011 two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
1012 loops is valid only if dist vector, after interchanging, doesn't have '>'
1013 as the leftmost non-'=' direction. Practically, this function have been
1014 conservative here by not checking some valid cases. */
1015
1016 bool
1017 tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
1018 vec<ddr_p> ddrs)
1019 {
1020 struct data_dependence_relation *ddr;
1021
1022 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
1023 {
1024 /* Skip no-dependence case. */
1025 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1026 continue;
1027
1028 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
1029 {
1030 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
1031 unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
1032
1033 /* If there is no carried dependence. */
1034 if (level == 0)
1035 continue;
1036
1037 level --;
1038
1039 /* If dependence is not carried by any loop in between the two
1040 loops [oloop, iloop] to interchange. */
1041 if (level < o_idx || level > i_idx)
1042 continue;
1043
1044 /* Be conservative, skip case if either direction at i_idx/o_idx
1045 levels is not '=' or '<'. */
1046 if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
1047 return false;
1048 }
1049 }
1050
1051 return true;
1052 }
1053
1054 /* Interchange two loops specified by ILOOP and OLOOP. */
1055
1056 void
1057 tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
1058 {
1059 reduction_p re;
1060 gimple_stmt_iterator gsi;
1061 tree i_niters, o_niters, var_after;
1062
1063 /* Undo inner loop's simple reduction. */
1064 for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
1065 if (re->type != DOUBLE_RTYPE)
1066 {
1067 if (re->producer)
1068 reset_debug_uses (re->producer);
1069
1070 iloop.undo_simple_reduction (re, m_dce_seeds);
1071 }
1072
1073 /* Only need to reset debug uses for double reduction. */
1074 for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
1075 {
1076 gcc_assert (re->type == DOUBLE_RTYPE);
1077 reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
1078 reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
1079 }
1080
1081 /* Prepare niters for both loops. */
1082 class loop *loop_nest = m_loop_nest[0];
1083 edge instantiate_below = loop_preheader_edge (loop_nest);
1084 gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
1085 i_niters = number_of_latch_executions (iloop.m_loop);
1086 i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
1087 i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
1088 i_niters);
1089 i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
1090 NULL_TREE, false, GSI_CONTINUE_LINKING);
1091 o_niters = number_of_latch_executions (oloop.m_loop);
1092 if (oloop.m_loop != loop_nest)
1093 {
1094 o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
1095 o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
1096 o_niters);
1097 }
1098 o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
1099 NULL_TREE, false, GSI_CONTINUE_LINKING);
1100
1101 /* Move src's code to tgt loop. This is necessary when src is the outer
1102 loop and tgt is the inner loop. */
1103 move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
1104
1105 /* Map outer loop's IV to inner loop, and vice versa. */
1106 map_inductions_to_loop (oloop, iloop);
1107 map_inductions_to_loop (iloop, oloop);
1108
1109 /* Create canonical IV for both loops. Note canonical IV for outer/inner
1110 loop is actually from inner/outer loop. Also we record the new IV
1111 created for the outer loop so that it can be skipped in later loop
1112 interchange. */
1113 create_canonical_iv (oloop.m_loop, oloop.m_exit,
1114 i_niters, &m_niters_iv_var, &var_after);
1115 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1116 create_canonical_iv (iloop.m_loop, iloop.m_exit,
1117 o_niters, NULL, &var_after);
1118 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1119
1120 /* Scrap niters estimation of interchanged loops. */
1121 iloop.m_loop->any_upper_bound = false;
1122 iloop.m_loop->any_likely_upper_bound = false;
1123 free_numbers_of_iterations_estimates (iloop.m_loop);
1124 oloop.m_loop->any_upper_bound = false;
1125 oloop.m_loop->any_likely_upper_bound = false;
1126 free_numbers_of_iterations_estimates (oloop.m_loop);
1127
1128 /* Clear all cached scev information. This is expensive but shouldn't be
1129 a problem given we interchange in very limited times. */
1130 scev_reset_htab ();
1131
1132 /* ??? The association between the loop data structure and the
1133 CFG changed, so what was loop N at the source level is now
1134 loop M. We should think of retaining the association or breaking
1135 it fully by creating a new loop instead of re-using the "wrong" one. */
1136 }
1137
1138 /* Map induction variables of SRC loop to TGT loop. The function firstly
1139 creates the same IV of SRC loop in TGT loop, then deletes the original
1140 IV and re-initialize it using the newly created IV. For example, loop
1141 nest:
1142
1143 for (i = 0; i < N; i++)
1144 for (j = 0; j < M; j++)
1145 {
1146 //use of i;
1147 //use of j;
1148 }
1149
1150 will be transformed into:
1151
1152 for (jj = 0; jj < M; jj++)
1153 for (ii = 0; ii < N; ii++)
1154 {
1155 //use of ii;
1156 //use of jj;
1157 }
1158
1159 after loop interchange. */
1160
1161 void
1162 tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
1163 {
1164 induction_p iv;
1165 edge e = tgt.m_exit;
1166 gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
1167
1168 /* Map source loop's IV to target loop. */
1169 for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
1170 {
1171 gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
1172 gcc_assert (is_a <gphi *> (stmt));
1173
1174 use_operand_p use_p;
1175 /* Only map original IV to target loop. */
1176 if (m_niters_iv_var != iv->var)
1177 {
1178 /* Map the IV by creating the same one in target loop. */
1179 tree var_before, var_after;
1180 tree base = unshare_expr (iv->init_expr);
1181 tree step = unshare_expr (iv->step);
1182 create_iv (base, step, SSA_NAME_VAR (iv->var),
1183 tgt.m_loop, &incr_pos, false, &var_before, &var_after);
1184 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
1185 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
1186
1187 /* Replace uses of the original IV var with newly created IV var. */
1188 imm_use_iterator imm_iter;
1189 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
1190 {
1191 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1192 SET_USE (use_p, var_before);
1193
1194 update_stmt (use_stmt);
1195 }
1196 }
1197
1198 /* Mark all uses for DCE. */
1199 ssa_op_iter op_iter;
1200 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
1201 {
1202 tree use = USE_FROM_PTR (use_p);
1203 if (TREE_CODE (use) == SSA_NAME
1204 && ! SSA_NAME_IS_DEFAULT_DEF (use))
1205 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
1206 }
1207
1208 /* Delete definition of the original IV in the source loop. */
1209 gsi = gsi_for_stmt (stmt);
1210 remove_phi_node (&gsi, true);
1211 }
1212 }
1213
1214 /* Move stmts of outer loop to inner loop. */
1215
1216 void
1217 tree_loop_interchange::move_code_to_inner_loop (class loop *outer,
1218 class loop *inner,
1219 basic_block *outer_bbs)
1220 {
1221 basic_block oloop_exit_bb = single_exit (outer)->src;
1222 gimple_stmt_iterator gsi, to;
1223
1224 for (unsigned i = 0; i < outer->num_nodes; i++)
1225 {
1226 basic_block bb = outer_bbs[i];
1227
1228 /* Skip basic blocks of inner loop. */
1229 if (flow_bb_inside_loop_p (inner, bb))
1230 continue;
1231
1232 /* Move code from header/latch to header/latch. */
1233 if (bb == outer->header)
1234 to = gsi_after_labels (inner->header);
1235 else if (bb == outer->latch)
1236 to = gsi_after_labels (inner->latch);
1237 else
1238 /* Otherwise, simply move to exit->src. */
1239 to = gsi_last_bb (single_exit (inner)->src);
1240
1241 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1242 {
1243 gimple *stmt = gsi_stmt (gsi);
1244
1245 if (oloop_exit_bb == bb
1246 && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
1247 {
1248 gsi_next (&gsi);
1249 continue;
1250 }
1251
1252 if (gimple_vuse (stmt))
1253 gimple_set_vuse (stmt, NULL_TREE);
1254 if (gimple_vdef (stmt))
1255 {
1256 unlink_stmt_vdef (stmt);
1257 release_ssa_name (gimple_vdef (stmt));
1258 gimple_set_vdef (stmt, NULL_TREE);
1259 }
1260
1261 reset_debug_uses (stmt);
1262 gsi_move_before (&gsi, &to);
1263 }
1264 }
1265 }
1266
1267 /* Given data reference DR in LOOP_NEST, the function computes DR's access
1268 stride at each level of loop from innermost LOOP to outer. On success,
1269 it saves access stride at each level loop in a vector which is pointed
1270 by DR->aux. For example:
1271
1272 int arr[100][100][100];
1273 for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
1274 for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
1275 for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
1276 arr[i][j - 1][k] = 0; */
1277
1278 static void
1279 compute_access_stride (class loop *loop_nest, class loop *loop,
1280 data_reference_p dr)
1281 {
1282 vec<tree> *strides = new vec<tree> ();
1283 basic_block bb = gimple_bb (DR_STMT (dr));
1284
1285 while (!flow_bb_inside_loop_p (loop, bb))
1286 {
1287 strides->safe_push (build_int_cst (sizetype, 0));
1288 loop = loop_outer (loop);
1289 }
1290 gcc_assert (loop == bb->loop_father);
1291
1292 tree ref = DR_REF (dr);
1293 if (TREE_CODE (ref) == COMPONENT_REF
1294 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1295 {
1296 /* We can't take address of bitfields. If the bitfield is at constant
1297 offset from the start of the struct, just use address of the
1298 struct, for analysis of the strides that shouldn't matter. */
1299 if (!TREE_OPERAND (ref, 2)
1300 || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
1301 ref = TREE_OPERAND (ref, 0);
1302 /* Otherwise, if we have a bit field representative, use that. */
1303 else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
1304 != NULL_TREE)
1305 {
1306 tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
1307 ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
1308 repr, TREE_OPERAND (ref, 2));
1309 }
1310 /* Otherwise punt. */
1311 else
1312 {
1313 dr->aux = strides;
1314 return;
1315 }
1316 }
1317 tree scev_base = build_fold_addr_expr (ref);
1318 tree scev = analyze_scalar_evolution (loop, scev_base);
1319 scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
1320 if (! chrec_contains_undetermined (scev))
1321 {
1322 tree sl = scev;
1323 class loop *expected = loop;
1324 while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
1325 {
1326 class loop *sl_loop = get_chrec_loop (sl);
1327 while (sl_loop != expected)
1328 {
1329 strides->safe_push (size_int (0));
1330 expected = loop_outer (expected);
1331 }
1332 strides->safe_push (CHREC_RIGHT (sl));
1333 sl = CHREC_LEFT (sl);
1334 expected = loop_outer (expected);
1335 }
1336 if (! tree_contains_chrecs (sl, NULL))
1337 while (expected != loop_outer (loop_nest))
1338 {
1339 strides->safe_push (size_int (0));
1340 expected = loop_outer (expected);
1341 }
1342 }
1343
1344 dr->aux = strides;
1345 }
1346
1347 /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
1348 access strides with respect to each level loop for all data refs in
1349 DATAREFS from inner loop to outer loop. On success, it returns the
1350 outermost loop that access strides can be computed successfully for
1351 all data references. If access strides cannot be computed at least
1352 for two levels of loop for any data reference, it returns NULL. */
1353
1354 static class loop *
1355 compute_access_strides (class loop *loop_nest, class loop *loop,
1356 vec<data_reference_p> datarefs)
1357 {
1358 unsigned i, j, num_loops = (unsigned) -1;
1359 data_reference_p dr;
1360 vec<tree> *stride;
1361
1362 for (i = 0; datarefs.iterate (i, &dr); ++i)
1363 {
1364 compute_access_stride (loop_nest, loop, dr);
1365 stride = DR_ACCESS_STRIDE (dr);
1366 if (stride->length () < num_loops)
1367 {
1368 num_loops = stride->length ();
1369 if (num_loops < 2)
1370 return NULL;
1371 }
1372 }
1373
1374 for (i = 0; datarefs.iterate (i, &dr); ++i)
1375 {
1376 stride = DR_ACCESS_STRIDE (dr);
1377 if (stride->length () > num_loops)
1378 stride->truncate (num_loops);
1379
1380 for (j = 0; j < (num_loops >> 1); ++j)
1381 std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
1382 }
1383
1384 loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
1385 gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
1386 return loop;
1387 }
1388
1389 /* Prune access strides for data references in DATAREFS by removing strides
1390 of loops that isn't in current LOOP_NEST. */
1391
1392 static void
1393 prune_access_strides_not_in_loop (class loop *loop_nest,
1394 class loop *innermost,
1395 vec<data_reference_p> datarefs)
1396 {
1397 data_reference_p dr;
1398 unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
1399 gcc_assert (num_loops > 1);
1400
1401 /* Block remove strides of loops that is not in current loop nest. */
1402 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1403 {
1404 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1405 if (stride->length () > num_loops)
1406 stride->block_remove (0, stride->length () - num_loops);
1407 }
1408 }
1409
1410 /* Dump access strides for all DATAREFS. */
1411
1412 static void
1413 dump_access_strides (vec<data_reference_p> datarefs)
1414 {
1415 data_reference_p dr;
1416 fprintf (dump_file, "Access Strides for DRs:\n");
1417 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
1418 {
1419 fprintf (dump_file, " ");
1420 print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
1421 fprintf (dump_file, ":\t\t<");
1422
1423 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1424 unsigned num_loops = stride->length ();
1425 for (unsigned j = 0; j < num_loops; ++j)
1426 {
1427 print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
1428 fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
1429 }
1430 }
1431 }
1432
1433 /* Return true if it's profitable to interchange two loops whose index
1434 in whole loop nest vector are I_IDX/O_IDX respectively. The function
1435 computes and compares three types information from all DATAREFS:
1436 1) Access stride for loop I_IDX and O_IDX.
1437 2) Number of invariant memory references with respect to I_IDX before
1438 and after loop interchange.
1439 3) Flags indicating if all memory references access sequential memory
1440 in ILOOP, before and after loop interchange.
1441 If INNMOST_LOOP_P is true, the two loops for interchanging are the two
1442 innermost loops in loop nest. This function also dumps information if
1443 DUMP_INFO_P is true. */
1444
1445 static bool
1446 should_interchange_loops (unsigned i_idx, unsigned o_idx,
1447 vec<data_reference_p> datarefs,
1448 unsigned i_stmt_cost, unsigned o_stmt_cost,
1449 bool innermost_loops_p, bool dump_info_p = true)
1450 {
1451 unsigned HOST_WIDE_INT ratio;
1452 unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
1453 struct data_reference *dr;
1454 bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
1455 widest_int iloop_strides = 0, oloop_strides = 0;
1456 unsigned num_unresolved_drs = 0;
1457 unsigned num_resolved_ok_drs = 0;
1458 unsigned num_resolved_not_ok_drs = 0;
1459
1460 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1461 fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
1462
1463 for (i = 0; datarefs.iterate (i, &dr); ++i)
1464 {
1465 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
1466 tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
1467
1468 bool subloop_stride_p = false;
1469 /* Data ref can't be invariant or sequential access at current loop if
1470 its address changes with respect to any subloops. */
1471 for (j = i_idx + 1; j < stride->length (); ++j)
1472 if (!integer_zerop ((*stride)[j]))
1473 {
1474 subloop_stride_p = true;
1475 break;
1476 }
1477
1478 if (integer_zerop (iloop_stride))
1479 {
1480 if (!subloop_stride_p)
1481 num_old_inv_drs++;
1482 }
1483 if (integer_zerop (oloop_stride))
1484 {
1485 if (!subloop_stride_p)
1486 num_new_inv_drs++;
1487 }
1488
1489 if (TREE_CODE (iloop_stride) == INTEGER_CST
1490 && TREE_CODE (oloop_stride) == INTEGER_CST)
1491 {
1492 iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
1493 oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
1494 }
1495 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1496 iloop_stride, oloop_stride))
1497 num_resolved_ok_drs++;
1498 else if (multiple_of_p (TREE_TYPE (iloop_stride),
1499 oloop_stride, iloop_stride))
1500 num_resolved_not_ok_drs++;
1501 else
1502 num_unresolved_drs++;
1503
1504 /* Data ref can't be sequential access if its address changes in sub
1505 loop. */
1506 if (subloop_stride_p)
1507 {
1508 all_seq_dr_before_p = false;
1509 all_seq_dr_after_p = false;
1510 continue;
1511 }
1512 /* Track if all data references are sequential accesses before/after loop
1513 interchange. Note invariant is considered sequential here. */
1514 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
1515 if (all_seq_dr_before_p
1516 && ! (integer_zerop (iloop_stride)
1517 || operand_equal_p (access_size, iloop_stride, 0)))
1518 all_seq_dr_before_p = false;
1519 if (all_seq_dr_after_p
1520 && ! (integer_zerop (oloop_stride)
1521 || operand_equal_p (access_size, oloop_stride, 0)))
1522 all_seq_dr_after_p = false;
1523 }
1524
1525 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
1526 {
1527 fprintf (dump_file, "\toverall:\t\t");
1528 print_decu (iloop_strides, dump_file);
1529 fprintf (dump_file, "\t");
1530 print_decu (oloop_strides, dump_file);
1531 fprintf (dump_file, "\n");
1532
1533 fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
1534 num_old_inv_drs, num_new_inv_drs);
1535 fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
1536 all_seq_dr_before_p ? "true" : "false",
1537 all_seq_dr_after_p ? "true" : "false");
1538 fprintf (dump_file, "OK to interchage with variable strides: %d\n",
1539 num_resolved_ok_drs);
1540 fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
1541 num_resolved_not_ok_drs);
1542 fprintf (dump_file, "Variable strides we cannot decide: %d\n",
1543 num_unresolved_drs);
1544 fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
1545 fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
1546 }
1547
1548 if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
1549 return false;
1550
1551 /* Stmts of outer loop will be moved to inner loop. If there are two many
1552 such stmts, it could make inner loop costly. Here we compare stmt cost
1553 between outer and inner loops. */
1554 if (i_stmt_cost && o_stmt_cost
1555 && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
1556 && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
1557 return false;
1558
1559 /* We use different stride comparison ratio for interchanging innermost
1560 two loops or not. The idea is to be conservative in interchange for
1561 the innermost loops. */
1562 ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
1563 /* Do interchange if it gives better data locality behavior. */
1564 if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
1565 return true;
1566 if (wi::gtu_p (iloop_strides, oloop_strides))
1567 {
1568 /* Or it creates more invariant memory references. */
1569 if ((!all_seq_dr_before_p || all_seq_dr_after_p)
1570 && num_new_inv_drs > num_old_inv_drs)
1571 return true;
1572 /* Or it makes all memory references sequential. */
1573 if (num_new_inv_drs >= num_old_inv_drs
1574 && !all_seq_dr_before_p && all_seq_dr_after_p)
1575 return true;
1576 }
1577
1578 return false;
1579 }
1580
1581 /* Try to interchange inner loop of a loop nest to outer level. */
1582
1583 bool
1584 tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
1585 vec<ddr_p> ddrs)
1586 {
1587 dump_user_location_t loc = find_loop_location (m_loop_nest[0]);
1588 bool changed_p = false;
1589 /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
1590 The overall effect is to push inner loop to outermost level in whole
1591 loop nest. */
1592 for (unsigned i = m_loop_nest.length (); i > 1; --i)
1593 {
1594 unsigned i_idx = i - 1, o_idx = i - 2;
1595
1596 /* Check validity for loop interchange. */
1597 if (!valid_data_dependences (i_idx, o_idx, ddrs))
1598 break;
1599
1600 loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
1601 loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
1602
1603 /* Check if we can do transformation for loop interchange. */
1604 if (!iloop.analyze_carried_vars (NULL)
1605 || !iloop.analyze_lcssa_phis ()
1606 || !oloop.analyze_carried_vars (&iloop)
1607 || !oloop.analyze_lcssa_phis ()
1608 || !iloop.can_interchange_p (NULL)
1609 || !oloop.can_interchange_p (&iloop))
1610 break;
1611
1612 /* Outer loop's stmts will be moved to inner loop during interchange.
1613 If there are many of them, it may make inner loop very costly. We
1614 need to check number of outer loop's stmts in profit cost model of
1615 interchange. */
1616 int stmt_cost = oloop.m_num_stmts;
1617 /* Count out the exit checking stmt of outer loop. */
1618 stmt_cost --;
1619 /* Count out IV's increasing stmt, IVOPTs takes care if it. */
1620 stmt_cost -= oloop.m_inductions.length ();
1621 /* Count in the additional load and cond_expr stmts caused by inner
1622 loop's constant initialized reduction. */
1623 stmt_cost += iloop.m_const_init_reduc * 2;
1624 if (stmt_cost < 0)
1625 stmt_cost = 0;
1626
1627 /* Check profitability for loop interchange. */
1628 if (should_interchange_loops (i_idx, o_idx, datarefs,
1629 (unsigned) iloop.m_num_stmts,
1630 (unsigned) stmt_cost,
1631 iloop.m_loop->inner == NULL))
1632 {
1633 if (dump_file && (dump_flags & TDF_DETAILS))
1634 fprintf (dump_file,
1635 "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
1636 oloop.m_loop->num, iloop.m_loop->num);
1637
1638 changed_p = true;
1639 interchange_loops (iloop, oloop);
1640 /* No need to update if there is no further loop interchange. */
1641 if (o_idx > 0)
1642 update_data_info (i_idx, o_idx, datarefs, ddrs);
1643 }
1644 else
1645 {
1646 if (dump_file && (dump_flags & TDF_DETAILS))
1647 fprintf (dump_file,
1648 "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
1649 oloop.m_loop->num, iloop.m_loop->num);
1650 }
1651 }
1652 simple_dce_from_worklist (m_dce_seeds);
1653
1654 if (changed_p && dump_enabled_p ())
1655 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
1656 "loops interchanged in loop nest\n");
1657
1658 return changed_p;
1659 }
1660
1661
1662 /* Loop interchange pass. */
1663
1664 namespace {
1665
1666 const pass_data pass_data_linterchange =
1667 {
1668 GIMPLE_PASS, /* type */
1669 "linterchange", /* name */
1670 OPTGROUP_LOOP, /* optinfo_flags */
1671 TV_LINTERCHANGE, /* tv_id */
1672 PROP_cfg, /* properties_required */
1673 0, /* properties_provided */
1674 0, /* properties_destroyed */
1675 0, /* todo_flags_start */
1676 0, /* todo_flags_finish */
1677 };
1678
1679 class pass_linterchange : public gimple_opt_pass
1680 {
1681 public:
1682 pass_linterchange (gcc::context *ctxt)
1683 : gimple_opt_pass (pass_data_linterchange, ctxt)
1684 {}
1685
1686 /* opt_pass methods: */
1687 opt_pass * clone () { return new pass_linterchange (m_ctxt); }
1688 virtual bool gate (function *) { return flag_loop_interchange; }
1689 virtual unsigned int execute (function *);
1690
1691 }; // class pass_linterchange
1692
1693
1694 /* Return true if LOOP has proper form for interchange. We check three
1695 conditions in the function:
1696 1) In general, a loop can be interchanged only if it doesn't have
1697 basic blocks other than header, exit and latch besides possible
1698 inner loop nest. This basically restricts loop interchange to
1699 below form loop nests:
1700
1701 header<---+
1702 | |
1703 v |
1704 INNER_LOOP |
1705 | |
1706 v |
1707 exit--->latch
1708
1709 2) Data reference in basic block that executes in different times
1710 than loop head/exit is not allowed.
1711 3) Record the innermost outer loop that doesn't form rectangle loop
1712 nest with LOOP. */
1713
1714 static bool
1715 proper_loop_form_for_interchange (class loop *loop, class loop **min_outer)
1716 {
1717 edge e0, e1, exit;
1718
1719 /* Don't interchange if loop has unsupported information for the moment. */
1720 if (loop->safelen > 0
1721 || loop->constraints != 0
1722 || loop->can_be_parallel
1723 || loop->dont_vectorize
1724 || loop->force_vectorize
1725 || loop->in_oacc_kernels_region
1726 || loop->orig_loop_num != 0
1727 || loop->simduid != NULL_TREE)
1728 return false;
1729
1730 /* Don't interchange if outer loop has basic block other than header, exit
1731 and latch. */
1732 if (loop->inner != NULL
1733 && loop->num_nodes != loop->inner->num_nodes + 3)
1734 return false;
1735
1736 if ((exit = single_dom_exit (loop)) == NULL)
1737 return false;
1738
1739 /* Check control flow on loop header/exit blocks. */
1740 if (loop->header == exit->src
1741 && (EDGE_COUNT (loop->header->preds) != 2
1742 || EDGE_COUNT (loop->header->succs) != 2))
1743 return false;
1744 else if (loop->header != exit->src
1745 && (EDGE_COUNT (loop->header->preds) != 2
1746 || !single_succ_p (loop->header)
1747 || unsupported_edge (single_succ_edge (loop->header))
1748 || EDGE_COUNT (exit->src->succs) != 2
1749 || !single_pred_p (exit->src)
1750 || unsupported_edge (single_pred_edge (exit->src))))
1751 return false;
1752
1753 e0 = EDGE_PRED (loop->header, 0);
1754 e1 = EDGE_PRED (loop->header, 1);
1755 if (unsupported_edge (e0) || unsupported_edge (e1)
1756 || (e0->src != loop->latch && e1->src != loop->latch)
1757 || (e0->src->loop_father == loop && e1->src->loop_father == loop))
1758 return false;
1759
1760 e0 = EDGE_SUCC (exit->src, 0);
1761 e1 = EDGE_SUCC (exit->src, 1);
1762 if (unsupported_edge (e0) || unsupported_edge (e1)
1763 || (e0->dest != loop->latch && e1->dest != loop->latch)
1764 || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
1765 return false;
1766
1767 /* Don't interchange if any reference is in basic block that doesn't
1768 dominate exit block. */
1769 basic_block *bbs = get_loop_body (loop);
1770 for (unsigned i = 0; i < loop->num_nodes; i++)
1771 {
1772 basic_block bb = bbs[i];
1773
1774 if (bb->loop_father != loop
1775 || bb == loop->header || bb == exit->src
1776 || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
1777 continue;
1778
1779 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
1780 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
1781 if (gimple_vuse (gsi_stmt (gsi)))
1782 {
1783 free (bbs);
1784 return false;
1785 }
1786 }
1787 free (bbs);
1788
1789 tree niters = number_of_latch_executions (loop);
1790 niters = analyze_scalar_evolution (loop_outer (loop), niters);
1791 if (!niters || chrec_contains_undetermined (niters))
1792 return false;
1793
1794 /* Record the innermost outer loop that doesn't form rectangle loop nest. */
1795 for (loop_p loop2 = loop_outer (loop);
1796 loop2 && flow_loop_nested_p (*min_outer, loop2);
1797 loop2 = loop_outer (loop2))
1798 {
1799 niters = instantiate_scev (loop_preheader_edge (loop2),
1800 loop_outer (loop), niters);
1801 if (!evolution_function_is_invariant_p (niters, loop2->num))
1802 {
1803 *min_outer = loop2;
1804 break;
1805 }
1806 }
1807 return true;
1808 }
1809
1810 /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
1811 should be interchanged by looking into all DATAREFS. */
1812
1813 static bool
1814 should_interchange_loop_nest (class loop *loop_nest, class loop *innermost,
1815 vec<data_reference_p> datarefs)
1816 {
1817 unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
1818 gcc_assert (idx > 0);
1819
1820 /* Check if any two adjacent loops should be interchanged. */
1821 for (class loop *loop = innermost;
1822 loop != loop_nest; loop = loop_outer (loop), idx--)
1823 if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
1824 loop == innermost, false))
1825 return true;
1826
1827 return false;
1828 }
1829
1830 /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
1831 dependences for loop interchange and store it in DDRS. Note we compute
1832 dependences directly rather than call generic interface so that we can
1833 return on unknown dependence instantly. */
1834
1835 static bool
1836 tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
1837 vec<data_reference_p> datarefs,
1838 vec<ddr_p> *ddrs)
1839 {
1840 struct data_reference *a, *b;
1841 class loop *innermost = loop_nest.last ();
1842
1843 for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
1844 {
1845 bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
1846 for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
1847 if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
1848 {
1849 bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
1850 /* Don't support multiple write references in outer loop. */
1851 if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
1852 return false;
1853
1854 ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
1855 ddrs->safe_push (ddr);
1856 compute_affine_dependence (ddr, loop_nest[0]);
1857
1858 /* Give up if ddr is unknown dependence or classic direct vector
1859 is not available. */
1860 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
1861 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
1862 && DDR_NUM_DIR_VECTS (ddr) == 0))
1863 return false;
1864
1865 /* If either data references is in outer loop of nest, we require
1866 no dependence here because the data reference need to be moved
1867 into inner loop during interchange. */
1868 if (a_outer_p && b_outer_p
1869 && operand_equal_p (DR_REF (a), DR_REF (b), 0))
1870 continue;
1871 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1872 && (a_outer_p || b_outer_p))
1873 return false;
1874 }
1875 }
1876
1877 return true;
1878 }
1879
1880 /* Prune DATAREFS by removing any data reference not inside of LOOP. */
1881
1882 static inline void
1883 prune_datarefs_not_in_loop (class loop *loop, vec<data_reference_p> datarefs)
1884 {
1885 unsigned i, j;
1886 struct data_reference *dr;
1887
1888 for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
1889 {
1890 if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
1891 datarefs[j++] = dr;
1892 else
1893 {
1894 if (dr->aux)
1895 {
1896 DR_ACCESS_STRIDE (dr)->release ();
1897 delete (vec<tree> *) dr->aux;
1898 }
1899 free_data_ref (dr);
1900 }
1901 }
1902 datarefs.truncate (j);
1903 }
1904
1905 /* Find and store data references in DATAREFS for LOOP nest. If there's
1906 difficult data reference in a basic block, we shrink the loop nest to
1907 inner loop of that basic block's father loop. On success, return the
1908 outer loop of the result loop nest. */
1909
1910 static class loop *
1911 prepare_data_references (class loop *loop, vec<data_reference_p> *datarefs)
1912 {
1913 class loop *loop_nest = loop;
1914 vec<data_reference_p> *bb_refs;
1915 basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
1916
1917 for (unsigned i = 0; i < loop->num_nodes; i++)
1918 bbs[i]->aux = NULL;
1919
1920 /* Find data references for all basic blocks. Shrink loop nest on difficult
1921 data reference. */
1922 for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
1923 {
1924 bb = bbs[i];
1925 if (!flow_bb_inside_loop_p (loop_nest, bb))
1926 continue;
1927
1928 bb_refs = new vec<data_reference_p> ();
1929 if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
1930 {
1931 loop_nest = bb->loop_father->inner;
1932 if (loop_nest && !loop_nest->inner)
1933 loop_nest = NULL;
1934
1935 free_data_refs (*bb_refs);
1936 delete bb_refs;
1937 }
1938 else if (bb_refs->is_empty ())
1939 delete bb_refs;
1940 else
1941 bb->aux = bb_refs;
1942 }
1943
1944 /* Collect all data references in loop nest. */
1945 for (unsigned i = 0; i < loop->num_nodes; i++)
1946 {
1947 bb = bbs[i];
1948 if (!bb->aux)
1949 continue;
1950
1951 bb_refs = (vec<data_reference_p> *) bb->aux;
1952 if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
1953 datarefs->safe_splice (*bb_refs);
1954 else
1955 free_data_refs (*bb_refs);
1956
1957 delete bb_refs;
1958 bb->aux = NULL;
1959 }
1960 free (bbs);
1961
1962 return loop_nest;
1963 }
1964
1965 /* Given innermost LOOP, return true if perfect loop nest can be found and
1966 data dependences can be computed. If succeed, record the perfect loop
1967 nest in LOOP_NEST; record all data references in DATAREFS and record all
1968 data dependence relations in DDRS.
1969
1970 We do support a restricted form of imperfect loop nest, i.e, loop nest
1971 with load/store in outer loop initializing/finalizing simple reduction
1972 of the innermost loop. For such outer loop reference, we require that
1973 it has no dependence with others sinve it will be moved to inner loop
1974 in interchange. */
1975
1976 static bool
1977 prepare_perfect_loop_nest (class loop *loop, vec<loop_p> *loop_nest,
1978 vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
1979 {
1980 class loop *start_loop = NULL, *innermost = loop;
1981 class loop *outermost = loops_for_fn (cfun)->tree_root;
1982
1983 /* Find loop nest from the innermost loop. The outermost is the innermost
1984 outer*/
1985 while (loop->num != 0 && loop->inner == start_loop
1986 && flow_loop_nested_p (outermost, loop))
1987 {
1988 if (!proper_loop_form_for_interchange (loop, &outermost))
1989 break;
1990
1991 start_loop = loop;
1992 /* If this loop has sibling loop, the father loop won't be in perfect
1993 loop nest. */
1994 if (loop->next != NULL)
1995 break;
1996
1997 loop = loop_outer (loop);
1998 }
1999 if (!start_loop || !start_loop->inner)
2000 return false;
2001
2002 /* Prepare the data reference vector for the loop nest, pruning outer
2003 loops we cannot handle. */
2004 start_loop = prepare_data_references (start_loop, datarefs);
2005 if (!start_loop
2006 /* Check if there is no data reference. */
2007 || datarefs->is_empty ()
2008 /* Check if there are too many of data references. */
2009 || (int) datarefs->length () > MAX_DATAREFS)
2010 return false;
2011
2012 /* Compute access strides for all data references, pruning outer
2013 loops we cannot analyze refs in. */
2014 start_loop = compute_access_strides (start_loop, innermost, *datarefs);
2015 if (!start_loop)
2016 return false;
2017
2018 /* Check if any interchange is profitable in the loop nest. */
2019 if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
2020 return false;
2021
2022 /* Check if data dependences can be computed for loop nest starting from
2023 start_loop. */
2024 loop = start_loop;
2025 do {
2026 loop_nest->truncate (0);
2027
2028 if (loop != start_loop)
2029 prune_datarefs_not_in_loop (start_loop, *datarefs);
2030
2031 if (find_loop_nest (start_loop, loop_nest)
2032 && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
2033 {
2034 if (dump_file && (dump_flags & TDF_DETAILS))
2035 fprintf (dump_file,
2036 "\nConsider loop interchange for loop_nest<%d - %d>\n",
2037 start_loop->num, innermost->num);
2038
2039 if (loop != start_loop)
2040 prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
2041
2042 if (dump_file && (dump_flags & TDF_DETAILS))
2043 dump_access_strides (*datarefs);
2044
2045 return true;
2046 }
2047
2048 free_dependence_relations (*ddrs);
2049 *ddrs = vNULL;
2050 /* Try to compute data dependences with the outermost loop stripped. */
2051 loop = start_loop;
2052 start_loop = start_loop->inner;
2053 } while (start_loop && start_loop->inner);
2054
2055 return false;
2056 }
2057
2058 /* Main entry for loop interchange pass. */
2059
2060 unsigned int
2061 pass_linterchange::execute (function *fun)
2062 {
2063 if (number_of_loops (fun) <= 2)
2064 return 0;
2065
2066 bool changed_p = false;
2067 class loop *loop;
2068 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2069 {
2070 vec<loop_p> loop_nest = vNULL;
2071 vec<data_reference_p> datarefs = vNULL;
2072 vec<ddr_p> ddrs = vNULL;
2073 if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
2074 {
2075 tree_loop_interchange loop_interchange (loop_nest);
2076 changed_p |= loop_interchange.interchange (datarefs, ddrs);
2077 }
2078 free_dependence_relations (ddrs);
2079 free_data_refs_with_aux (datarefs);
2080 loop_nest.release ();
2081 }
2082
2083 return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
2084 }
2085
2086 } // anon namespace
2087
2088 gimple_opt_pass *
2089 make_pass_linterchange (gcc::context *ctxt)
2090 {
2091 return new pass_linterchange (ctxt);
2092 }