]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lambda-code.c
vec.h: Update API to separate allocation mechanism from type.
[thirdparty/gcc.git] / gcc / lambda-code.c
CommitLineData
36d59cf7 1/* Loop transformation code generation
6a6305e4 2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
36d59cf7
DB
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "errors.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "rtl.h"
31#include "basic-block.h"
32#include "diagnostic.h"
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "timevar.h"
36#include "cfgloop.h"
37#include "expr.h"
38#include "optabs.h"
39#include "tree-chrec.h"
40#include "tree-data-ref.h"
41#include "tree-pass.h"
42#include "tree-scalar-evolution.h"
43#include "vec.h"
44#include "lambda.h"
45
46/* This loop nest code generation is based on non-singular matrix
47 math.
48
49 A little terminology and a general sketch of the algorithm. See "A singular
6cb38cd4 50 loop transformation framework based on non-singular matrices" by Wei Li and
36d59cf7
DB
51 Keshav Pingali for formal proofs that the various statements below are
52 correct.
53
464f49d8 54 A loop iteration space represents the points traversed by the loop. A point in the
36d59cf7 55 iteration space can be represented by a vector of size <loop depth>. You can
1f838355 56 therefore represent the iteration space as an integral combinations of a set
36d59cf7
DB
57 of basis vectors.
58
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
62
63 for i = 1 to 3, step 1 is a dense iteration space.
64
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
67
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69 2 is skipped.
70
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
74
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
6cb38cd4
KH
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
36d59cf7
DB
79 1. It traverses the iterations in the same lexicographic order as the source
80 space.
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
83
6cb38cd4 84 Given the auxiliary space, we use the lower triangular part to compute the
36d59cf7
DB
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
88
89 Sparse source spaces require another step, because you can't directly compute
6cb38cd4 90 the exact bounds of the auxiliary and target space from the sparse space.
36d59cf7
DB
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
94 space.
95
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
105
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
110 result.
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
114
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
116 of the lattice. */
117
d4e6fecb
NS
118/* FIXME: I'm sure the vectors used here could be heap allocated.
119 There certainly should be explicit VEC_frees, either way. (nathan
120 2005/04/14) */
f67d92e9 121
d4e6fecb
NS
122DEF_VEC_P(int);
123DEF_VEC_ALLOC_P(int,gc);
f67d92e9
DB
124
125static bool perfect_nestify (struct loops *,
d4e6fecb
NS
126 struct loop *, VEC(tree,gc) *,
127 VEC(tree,gc) *, VEC(int,gc) *, VEC(tree,gc) *);
36d59cf7
DB
128/* Lattice stuff that is internal to the code generation algorithm. */
129
130typedef struct
131{
132 /* Lattice base matrix. */
133 lambda_matrix base;
134 /* Lattice dimension. */
135 int dimension;
136 /* Origin vector for the coefficients. */
137 lambda_vector origin;
138 /* Origin matrix for the invariants. */
139 lambda_matrix origin_invariants;
140 /* Number of invariants. */
141 int invariants;
142} *lambda_lattice;
143
144#define LATTICE_BASE(T) ((T)->base)
145#define LATTICE_DIMENSION(T) ((T)->dimension)
146#define LATTICE_ORIGIN(T) ((T)->origin)
147#define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
148#define LATTICE_INVARIANTS(T) ((T)->invariants)
149
150static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
151 int, int);
152static lambda_lattice lambda_lattice_new (int, int);
153static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
154
155static tree find_induction_var_from_exit_cond (struct loop *);
156
157/* Create a new lambda body vector. */
158
159lambda_body_vector
160lambda_body_vector_new (int size)
161{
162 lambda_body_vector ret;
163
164 ret = ggc_alloc (sizeof (*ret));
165 LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
166 LBV_SIZE (ret) = size;
167 LBV_DENOMINATOR (ret) = 1;
168 return ret;
169}
170
171/* Compute the new coefficients for the vector based on the
172 *inverse* of the transformation matrix. */
173
174lambda_body_vector
175lambda_body_vector_compute_new (lambda_trans_matrix transform,
176 lambda_body_vector vect)
177{
178 lambda_body_vector temp;
179 int depth;
180
181 /* Make sure the matrix is square. */
599eabdb 182 gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
36d59cf7
DB
183
184 depth = LTM_ROWSIZE (transform);
185
186 temp = lambda_body_vector_new (depth);
187 LBV_DENOMINATOR (temp) =
188 LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
189 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
190 LTM_MATRIX (transform), depth,
191 LBV_COEFFICIENTS (temp));
192 LBV_SIZE (temp) = LBV_SIZE (vect);
193 return temp;
194}
195
196/* Print out a lambda body vector. */
197
198void
199print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
200{
201 print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
202}
203
204/* Return TRUE if two linear expressions are equal. */
205
206static bool
207lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
208 int depth, int invariants)
209{
210 int i;
211
212 if (lle1 == NULL || lle2 == NULL)
213 return false;
214 if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
215 return false;
216 if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
217 return false;
218 for (i = 0; i < depth; i++)
219 if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
220 return false;
221 for (i = 0; i < invariants; i++)
222 if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
223 LLE_INVARIANT_COEFFICIENTS (lle2)[i])
224 return false;
225 return true;
226}
227
228/* Create a new linear expression with dimension DIM, and total number
229 of invariants INVARIANTS. */
230
231lambda_linear_expression
232lambda_linear_expression_new (int dim, int invariants)
233{
234 lambda_linear_expression ret;
235
236 ret = ggc_alloc_cleared (sizeof (*ret));
237
238 LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
239 LLE_CONSTANT (ret) = 0;
240 LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
241 LLE_DENOMINATOR (ret) = 1;
242 LLE_NEXT (ret) = NULL;
243
244 return ret;
245}
246
247/* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
248 The starting letter used for variable names is START. */
249
250static void
251print_linear_expression (FILE * outfile, lambda_vector expr, int size,
252 char start)
253{
254 int i;
255 bool first = true;
256 for (i = 0; i < size; i++)
257 {
258 if (expr[i] != 0)
259 {
260 if (first)
261 {
262 if (expr[i] < 0)
263 fprintf (outfile, "-");
264 first = false;
265 }
266 else if (expr[i] > 0)
267 fprintf (outfile, " + ");
268 else
269 fprintf (outfile, " - ");
270 if (abs (expr[i]) == 1)
271 fprintf (outfile, "%c", start + i);
272 else
273 fprintf (outfile, "%d%c", abs (expr[i]), start + i);
274 }
275 }
276}
277
278/* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
279 depth/number of coefficients is given by DEPTH, the number of invariants is
280 given by INVARIANTS, and the character to start variable names with is given
281 by START. */
282
283void
284print_lambda_linear_expression (FILE * outfile,
285 lambda_linear_expression expr,
286 int depth, int invariants, char start)
287{
288 fprintf (outfile, "\tLinear expression: ");
289 print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
290 fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
291 fprintf (outfile, " invariants: ");
292 print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
293 invariants, 'A');
294 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
295}
296
297/* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
298 coefficients is given by DEPTH, the number of invariants is
299 given by INVARIANTS, and the character to start variable names with is given
8c27b7d4 300 by START. */
36d59cf7
DB
301
302void
303print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
304 int invariants, char start)
305{
306 int step;
307 lambda_linear_expression expr;
308
599eabdb 309 gcc_assert (loop);
36d59cf7
DB
310
311 expr = LL_LINEAR_OFFSET (loop);
312 step = LL_STEP (loop);
313 fprintf (outfile, " step size = %d \n", step);
314
315 if (expr)
316 {
317 fprintf (outfile, " linear offset: \n");
318 print_lambda_linear_expression (outfile, expr, depth, invariants,
319 start);
320 }
321
322 fprintf (outfile, " lower bound: \n");
323 for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
324 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
325 fprintf (outfile, " upper bound: \n");
326 for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
327 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
328}
329
330/* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
331 number of invariants. */
332
333lambda_loopnest
334lambda_loopnest_new (int depth, int invariants)
335{
336 lambda_loopnest ret;
337 ret = ggc_alloc (sizeof (*ret));
338
339 LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
340 LN_DEPTH (ret) = depth;
341 LN_INVARIANTS (ret) = invariants;
342
343 return ret;
344}
345
346/* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
347 character to use for loop names is given by START. */
348
349void
350print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
351{
352 int i;
353 for (i = 0; i < LN_DEPTH (nest); i++)
354 {
355 fprintf (outfile, "Loop %c\n", start + i);
356 print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
357 LN_INVARIANTS (nest), 'i');
358 fprintf (outfile, "\n");
359 }
360}
361
362/* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
471854f8 363 of invariants. */
36d59cf7
DB
364
365static lambda_lattice
366lambda_lattice_new (int depth, int invariants)
367{
368 lambda_lattice ret;
369 ret = ggc_alloc (sizeof (*ret));
370 LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
371 LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
372 LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
373 LATTICE_DIMENSION (ret) = depth;
374 LATTICE_INVARIANTS (ret) = invariants;
375 return ret;
376}
377
378/* Compute the lattice base for NEST. The lattice base is essentially a
379 non-singular transform from a dense base space to a sparse iteration space.
380 We use it so that we don't have to specially handle the case of a sparse
381 iteration space in other parts of the algorithm. As a result, this routine
382 only does something interesting (IE produce a matrix that isn't the
383 identity matrix) if NEST is a sparse space. */
384
385static lambda_lattice
386lambda_lattice_compute_base (lambda_loopnest nest)
387{
388 lambda_lattice ret;
389 int depth, invariants;
390 lambda_matrix base;
391
392 int i, j, step;
393 lambda_loop loop;
394 lambda_linear_expression expression;
395
396 depth = LN_DEPTH (nest);
397 invariants = LN_INVARIANTS (nest);
398
399 ret = lambda_lattice_new (depth, invariants);
400 base = LATTICE_BASE (ret);
401 for (i = 0; i < depth; i++)
402 {
403 loop = LN_LOOPS (nest)[i];
599eabdb 404 gcc_assert (loop);
36d59cf7
DB
405 step = LL_STEP (loop);
406 /* If we have a step of 1, then the base is one, and the
407 origin and invariant coefficients are 0. */
408 if (step == 1)
409 {
410 for (j = 0; j < depth; j++)
411 base[i][j] = 0;
412 base[i][i] = 1;
413 LATTICE_ORIGIN (ret)[i] = 0;
414 for (j = 0; j < invariants; j++)
415 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
416 }
417 else
418 {
419 /* Otherwise, we need the lower bound expression (which must
420 be an affine function) to determine the base. */
421 expression = LL_LOWER_BOUND (loop);
464f49d8 422 gcc_assert (expression && !LLE_NEXT (expression)
599eabdb 423 && LLE_DENOMINATOR (expression) == 1);
36d59cf7
DB
424
425 /* The lower triangular portion of the base is going to be the
426 coefficient times the step */
427 for (j = 0; j < i; j++)
428 base[i][j] = LLE_COEFFICIENTS (expression)[j]
429 * LL_STEP (LN_LOOPS (nest)[j]);
430 base[i][i] = step;
431 for (j = i + 1; j < depth; j++)
432 base[i][j] = 0;
433
434 /* Origin for this loop is the constant of the lower bound
435 expression. */
436 LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
437
438 /* Coefficient for the invariants are equal to the invariant
439 coefficients in the expression. */
440 for (j = 0; j < invariants; j++)
441 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
442 LLE_INVARIANT_COEFFICIENTS (expression)[j];
443 }
444 }
445 return ret;
446}
447
448/* Compute the greatest common denominator of two numbers (A and B) using
449 Euclid's algorithm. */
450
451static int
452gcd (int a, int b)
453{
454
455 int x, y, z;
456
457 x = abs (a);
458 y = abs (b);
459
460 while (x > 0)
461 {
462 z = y % x;
463 y = x;
464 x = z;
465 }
466
467 return (y);
468}
469
470/* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
471
472static int
473gcd_vector (lambda_vector vector, int size)
474{
475 int i;
476 int gcd1 = 0;
477
478 if (size > 0)
479 {
480 gcd1 = vector[0];
481 for (i = 1; i < size; i++)
482 gcd1 = gcd (gcd1, vector[i]);
483 }
484 return gcd1;
485}
486
487/* Compute the least common multiple of two numbers A and B . */
488
489static int
490lcm (int a, int b)
491{
492 return (abs (a) * abs (b) / gcd (a, b));
493}
494
feb075f4 495/* Perform Fourier-Motzkin elimination to calculate the bounds of the
aabcd309 496 auxiliary nest.
464f49d8 497 Fourier-Motzkin is a way of reducing systems of linear inequalities so that
feb075f4
DB
498 it is easy to calculate the answer and bounds.
499 A sketch of how it works:
500 Given a system of linear inequalities, ai * xj >= bk, you can always
501 rewrite the constraints so they are all of the form
502 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
503 in b1 ... bk, and some a in a1...ai)
504 You can then eliminate this x from the non-constant inequalities by
505 rewriting these as a <= b, x >= constant, and delete the x variable.
506 You can then repeat this for any remaining x variables, and then we have
507 an easy to use variable <= constant (or no variables at all) form that we
508 can construct our bounds from.
509
510 In our case, each time we eliminate, we construct part of the bound from
511 the ith variable, then delete the ith variable.
512
513 Remember the constant are in our vector a, our coefficient matrix is A,
514 and our invariant coefficient matrix is B.
515
516 SIZE is the size of the matrices being passed.
517 DEPTH is the loop nest depth.
518 INVARIANTS is the number of loop invariants.
519 A, B, and a are the coefficient matrix, invariant coefficient, and a
520 vector of constants, respectively. */
521
522static lambda_loopnest
523compute_nest_using_fourier_motzkin (int size,
524 int depth,
525 int invariants,
526 lambda_matrix A,
527 lambda_matrix B,
528 lambda_vector a)
529{
530
531 int multiple, f1, f2;
532 int i, j, k;
533 lambda_linear_expression expression;
534 lambda_loop loop;
535 lambda_loopnest auxillary_nest;
536 lambda_matrix swapmatrix, A1, B1;
537 lambda_vector swapvector, a1;
538 int newsize;
539
540 A1 = lambda_matrix_new (128, depth);
541 B1 = lambda_matrix_new (128, invariants);
542 a1 = lambda_vector_new (128);
543
544 auxillary_nest = lambda_loopnest_new (depth, invariants);
545
546 for (i = depth - 1; i >= 0; i--)
547 {
548 loop = lambda_loop_new ();
549 LN_LOOPS (auxillary_nest)[i] = loop;
550 LL_STEP (loop) = 1;
551
552 for (j = 0; j < size; j++)
553 {
554 if (A[j][i] < 0)
555 {
556 /* Any linear expression in the matrix with a coefficient less
557 than 0 becomes part of the new lower bound. */
558 expression = lambda_linear_expression_new (depth, invariants);
559
560 for (k = 0; k < i; k++)
561 LLE_COEFFICIENTS (expression)[k] = A[j][k];
562
563 for (k = 0; k < invariants; k++)
564 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
565
566 LLE_DENOMINATOR (expression) = -1 * A[j][i];
567 LLE_CONSTANT (expression) = -1 * a[j];
568
569 /* Ignore if identical to the existing lower bound. */
570 if (!lle_equal (LL_LOWER_BOUND (loop),
571 expression, depth, invariants))
572 {
573 LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
574 LL_LOWER_BOUND (loop) = expression;
575 }
576
577 }
578 else if (A[j][i] > 0)
579 {
580 /* Any linear expression with a coefficient greater than 0
471854f8 581 becomes part of the new upper bound. */
feb075f4
DB
582 expression = lambda_linear_expression_new (depth, invariants);
583 for (k = 0; k < i; k++)
584 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
585
586 for (k = 0; k < invariants; k++)
587 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
588
589 LLE_DENOMINATOR (expression) = A[j][i];
590 LLE_CONSTANT (expression) = a[j];
591
592 /* Ignore if identical to the existing upper bound. */
593 if (!lle_equal (LL_UPPER_BOUND (loop),
594 expression, depth, invariants))
595 {
596 LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
597 LL_UPPER_BOUND (loop) = expression;
598 }
599
600 }
601 }
602
603 /* This portion creates a new system of linear inequalities by deleting
604 the i'th variable, reducing the system by one variable. */
605 newsize = 0;
606 for (j = 0; j < size; j++)
607 {
608 /* If the coefficient for the i'th variable is 0, then we can just
609 eliminate the variable straightaway. Otherwise, we have to
610 multiply through by the coefficients we are eliminating. */
611 if (A[j][i] == 0)
612 {
613 lambda_vector_copy (A[j], A1[newsize], depth);
614 lambda_vector_copy (B[j], B1[newsize], invariants);
615 a1[newsize] = a[j];
616 newsize++;
617 }
618 else if (A[j][i] > 0)
619 {
620 for (k = 0; k < size; k++)
621 {
622 if (A[k][i] < 0)
623 {
624 multiple = lcm (A[j][i], A[k][i]);
625 f1 = multiple / A[j][i];
626 f2 = -1 * multiple / A[k][i];
627
628 lambda_vector_add_mc (A[j], f1, A[k], f2,
629 A1[newsize], depth);
630 lambda_vector_add_mc (B[j], f1, B[k], f2,
631 B1[newsize], invariants);
632 a1[newsize] = f1 * a[j] + f2 * a[k];
633 newsize++;
634 }
635 }
636 }
637 }
638
639 swapmatrix = A;
640 A = A1;
641 A1 = swapmatrix;
642
643 swapmatrix = B;
644 B = B1;
645 B1 = swapmatrix;
646
647 swapvector = a;
648 a = a1;
649 a1 = swapvector;
650
651 size = newsize;
652 }
653
654 return auxillary_nest;
655}
656
36d59cf7 657/* Compute the loop bounds for the auxiliary space NEST.
c4bda9f0
DB
658 Input system used is Ax <= b. TRANS is the unimodular transformation.
659 Given the original nest, this function will
660 1. Convert the nest into matrix form, which consists of a matrix for the
661 coefficients, a matrix for the
662 invariant coefficients, and a vector for the constants.
663 2. Use the matrix form to calculate the lattice base for the nest (which is
664 a dense space)
665 3. Compose the dense space transform with the user specified transform, to
666 get a transform we can easily calculate transformed bounds for.
667 4. Multiply the composed transformation matrix times the matrix form of the
668 loop.
669 5. Transform the newly created matrix (from step 4) back into a loop nest
670 using fourier motzkin elimination to figure out the bounds. */
36d59cf7
DB
671
672static lambda_loopnest
673lambda_compute_auxillary_space (lambda_loopnest nest,
674 lambda_trans_matrix trans)
675{
feb075f4
DB
676 lambda_matrix A, B, A1, B1;
677 lambda_vector a, a1;
36d59cf7 678 lambda_matrix invertedtrans;
30a6aaed 679 int depth, invariants, size;
feb075f4 680 int i, j;
36d59cf7
DB
681 lambda_loop loop;
682 lambda_linear_expression expression;
683 lambda_lattice lattice;
684
36d59cf7
DB
685 depth = LN_DEPTH (nest);
686 invariants = LN_INVARIANTS (nest);
687
688 /* Unfortunately, we can't know the number of constraints we'll have
689 ahead of time, but this should be enough even in ridiculous loop nest
690 cases. We abort if we go over this limit. */
691 A = lambda_matrix_new (128, depth);
692 B = lambda_matrix_new (128, invariants);
693 a = lambda_vector_new (128);
694
695 A1 = lambda_matrix_new (128, depth);
696 B1 = lambda_matrix_new (128, invariants);
697 a1 = lambda_vector_new (128);
698
699 /* Store the bounds in the equation matrix A, constant vector a, and
700 invariant matrix B, so that we have Ax <= a + B.
701 This requires a little equation rearranging so that everything is on the
702 correct side of the inequality. */
703 size = 0;
704 for (i = 0; i < depth; i++)
705 {
706 loop = LN_LOOPS (nest)[i];
707
708 /* First we do the lower bound. */
709 if (LL_STEP (loop) > 0)
710 expression = LL_LOWER_BOUND (loop);
711 else
712 expression = LL_UPPER_BOUND (loop);
713
714 for (; expression != NULL; expression = LLE_NEXT (expression))
715 {
716 /* Fill in the coefficient. */
717 for (j = 0; j < i; j++)
718 A[size][j] = LLE_COEFFICIENTS (expression)[j];
719
720 /* And the invariant coefficient. */
721 for (j = 0; j < invariants; j++)
722 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
723
724 /* And the constant. */
725 a[size] = LLE_CONSTANT (expression);
726
727 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
728 constants and single variables on */
729 A[size][i] = -1 * LLE_DENOMINATOR (expression);
730 a[size] *= -1;
731 for (j = 0; j < invariants; j++)
732 B[size][j] *= -1;
733
734 size++;
735 /* Need to increase matrix sizes above. */
599eabdb
DB
736 gcc_assert (size <= 127);
737
36d59cf7
DB
738 }
739
740 /* Then do the exact same thing for the upper bounds. */
741 if (LL_STEP (loop) > 0)
742 expression = LL_UPPER_BOUND (loop);
743 else
744 expression = LL_LOWER_BOUND (loop);
745
746 for (; expression != NULL; expression = LLE_NEXT (expression))
747 {
748 /* Fill in the coefficient. */
749 for (j = 0; j < i; j++)
750 A[size][j] = LLE_COEFFICIENTS (expression)[j];
751
752 /* And the invariant coefficient. */
753 for (j = 0; j < invariants; j++)
754 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
755
756 /* And the constant. */
757 a[size] = LLE_CONSTANT (expression);
758
759 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
760 for (j = 0; j < i; j++)
761 A[size][j] *= -1;
762 A[size][i] = LLE_DENOMINATOR (expression);
763 size++;
764 /* Need to increase matrix sizes above. */
599eabdb
DB
765 gcc_assert (size <= 127);
766
36d59cf7
DB
767 }
768 }
769
770 /* Compute the lattice base x = base * y + origin, where y is the
771 base space. */
772 lattice = lambda_lattice_compute_base (nest);
773
774 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
775
776 /* A1 = A * L */
777 lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
778
779 /* a1 = a - A * origin constant. */
780 lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
781 lambda_vector_add_mc (a, 1, a1, -1, a1, size);
782
783 /* B1 = B - A * origin invariant. */
784 lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
785 invariants);
786 lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
787
788 /* Now compute the auxiliary space bounds by first inverting U, multiplying
789 it by A1, then performing fourier motzkin. */
790
791 invertedtrans = lambda_matrix_new (depth, depth);
792
793 /* Compute the inverse of U. */
30a6aaed
KH
794 lambda_matrix_inverse (LTM_MATRIX (trans),
795 invertedtrans, depth);
36d59cf7
DB
796
797 /* A = A1 inv(U). */
798 lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
799
feb075f4
DB
800 return compute_nest_using_fourier_motzkin (size, depth, invariants,
801 A, B1, a1);
36d59cf7
DB
802}
803
804/* Compute the loop bounds for the target space, using the bounds of
c4bda9f0
DB
805 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
806 The target space loop bounds are computed by multiplying the triangular
aabcd309 807 matrix H by the auxiliary nest, to get the new loop bounds. The sign of
c4bda9f0
DB
808 the loop steps (positive or negative) is then used to swap the bounds if
809 the loop counts downwards.
36d59cf7
DB
810 Return the target loopnest. */
811
812static lambda_loopnest
813lambda_compute_target_space (lambda_loopnest auxillary_nest,
814 lambda_trans_matrix H, lambda_vector stepsigns)
815{
816 lambda_matrix inverse, H1;
817 int determinant, i, j;
818 int gcd1, gcd2;
819 int factor;
820
821 lambda_loopnest target_nest;
822 int depth, invariants;
823 lambda_matrix target;
824
825 lambda_loop auxillary_loop, target_loop;
826 lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
827
828 depth = LN_DEPTH (auxillary_nest);
829 invariants = LN_INVARIANTS (auxillary_nest);
830
831 inverse = lambda_matrix_new (depth, depth);
832 determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
833
834 /* H1 is H excluding its diagonal. */
835 H1 = lambda_matrix_new (depth, depth);
836 lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
837
838 for (i = 0; i < depth; i++)
839 H1[i][i] = 0;
840
841 /* Computes the linear offsets of the loop bounds. */
842 target = lambda_matrix_new (depth, depth);
843 lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
844
845 target_nest = lambda_loopnest_new (depth, invariants);
846
847 for (i = 0; i < depth; i++)
848 {
849
850 /* Get a new loop structure. */
851 target_loop = lambda_loop_new ();
852 LN_LOOPS (target_nest)[i] = target_loop;
853
854 /* Computes the gcd of the coefficients of the linear part. */
855 gcd1 = gcd_vector (target[i], i);
856
ea4b7848 857 /* Include the denominator in the GCD. */
36d59cf7
DB
858 gcd1 = gcd (gcd1, determinant);
859
ea4b7848 860 /* Now divide through by the gcd. */
36d59cf7
DB
861 for (j = 0; j < i; j++)
862 target[i][j] = target[i][j] / gcd1;
863
864 expression = lambda_linear_expression_new (depth, invariants);
865 lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
866 LLE_DENOMINATOR (expression) = determinant / gcd1;
867 LLE_CONSTANT (expression) = 0;
868 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
869 invariants);
870 LL_LINEAR_OFFSET (target_loop) = expression;
871 }
872
ea4b7848 873 /* For each loop, compute the new bounds from H. */
36d59cf7
DB
874 for (i = 0; i < depth; i++)
875 {
876 auxillary_loop = LN_LOOPS (auxillary_nest)[i];
877 target_loop = LN_LOOPS (target_nest)[i];
878 LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
879 factor = LTM_MATRIX (H)[i][i];
880
881 /* First we do the lower bound. */
882 auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
883
884 for (; auxillary_expr != NULL;
885 auxillary_expr = LLE_NEXT (auxillary_expr))
886 {
887 target_expr = lambda_linear_expression_new (depth, invariants);
888 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
889 depth, inverse, depth,
890 LLE_COEFFICIENTS (target_expr));
891 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
892 LLE_COEFFICIENTS (target_expr), depth,
893 factor);
894
895 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
896 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
897 LLE_INVARIANT_COEFFICIENTS (target_expr),
898 invariants);
899 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
900 LLE_INVARIANT_COEFFICIENTS (target_expr),
901 invariants, factor);
902 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
903
904 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
905 {
906 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
907 * determinant;
908 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
909 (target_expr),
910 LLE_INVARIANT_COEFFICIENTS
911 (target_expr), invariants,
912 determinant);
913 LLE_DENOMINATOR (target_expr) =
914 LLE_DENOMINATOR (target_expr) * determinant;
915 }
916 /* Find the gcd and divide by it here, rather than doing it
917 at the tree level. */
918 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
919 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
920 invariants);
921 gcd1 = gcd (gcd1, gcd2);
922 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
923 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
924 for (j = 0; j < depth; j++)
925 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
926 for (j = 0; j < invariants; j++)
927 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
928 LLE_CONSTANT (target_expr) /= gcd1;
929 LLE_DENOMINATOR (target_expr) /= gcd1;
930 /* Ignore if identical to existing bound. */
931 if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
932 invariants))
933 {
934 LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
935 LL_LOWER_BOUND (target_loop) = target_expr;
936 }
937 }
938 /* Now do the upper bound. */
939 auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
940
941 for (; auxillary_expr != NULL;
942 auxillary_expr = LLE_NEXT (auxillary_expr))
943 {
944 target_expr = lambda_linear_expression_new (depth, invariants);
945 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
946 depth, inverse, depth,
947 LLE_COEFFICIENTS (target_expr));
948 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
949 LLE_COEFFICIENTS (target_expr), depth,
950 factor);
951 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
952 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
953 LLE_INVARIANT_COEFFICIENTS (target_expr),
954 invariants);
955 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
956 LLE_INVARIANT_COEFFICIENTS (target_expr),
957 invariants, factor);
958 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
959
960 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
961 {
962 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
963 * determinant;
964 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
965 (target_expr),
966 LLE_INVARIANT_COEFFICIENTS
967 (target_expr), invariants,
968 determinant);
969 LLE_DENOMINATOR (target_expr) =
970 LLE_DENOMINATOR (target_expr) * determinant;
971 }
972 /* Find the gcd and divide by it here, instead of at the
973 tree level. */
974 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
975 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
976 invariants);
977 gcd1 = gcd (gcd1, gcd2);
978 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
979 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
980 for (j = 0; j < depth; j++)
981 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
982 for (j = 0; j < invariants; j++)
983 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
984 LLE_CONSTANT (target_expr) /= gcd1;
985 LLE_DENOMINATOR (target_expr) /= gcd1;
986 /* Ignore if equal to existing bound. */
987 if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
988 invariants))
989 {
990 LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
991 LL_UPPER_BOUND (target_loop) = target_expr;
992 }
993 }
994 }
995 for (i = 0; i < depth; i++)
996 {
997 target_loop = LN_LOOPS (target_nest)[i];
998 /* If necessary, exchange the upper and lower bounds and negate
999 the step size. */
1000 if (stepsigns[i] < 0)
1001 {
1002 LL_STEP (target_loop) *= -1;
1003 tmp_expr = LL_LOWER_BOUND (target_loop);
1004 LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1005 LL_UPPER_BOUND (target_loop) = tmp_expr;
1006 }
1007 }
1008 return target_nest;
1009}
1010
1011/* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
1012 result. */
1013
1014static lambda_vector
1015lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1016{
1017 lambda_matrix matrix, H;
1018 int size;
1019 lambda_vector newsteps;
1020 int i, j, factor, minimum_column;
1021 int temp;
1022
1023 matrix = LTM_MATRIX (trans);
1024 size = LTM_ROWSIZE (trans);
1025 H = lambda_matrix_new (size, size);
1026
1027 newsteps = lambda_vector_new (size);
1028 lambda_vector_copy (stepsigns, newsteps, size);
1029
1030 lambda_matrix_copy (matrix, H, size, size);
1031
1032 for (j = 0; j < size; j++)
1033 {
1034 lambda_vector row;
1035 row = H[j];
1036 for (i = j; i < size; i++)
1037 if (row[i] < 0)
1038 lambda_matrix_col_negate (H, size, i);
1039 while (lambda_vector_first_nz (row, size, j + 1) < size)
1040 {
1041 minimum_column = lambda_vector_min_nz (row, size, j);
1042 lambda_matrix_col_exchange (H, size, j, minimum_column);
1043
1044 temp = newsteps[j];
1045 newsteps[j] = newsteps[minimum_column];
1046 newsteps[minimum_column] = temp;
1047
1048 for (i = j + 1; i < size; i++)
1049 {
1050 factor = row[i] / row[j];
1051 lambda_matrix_col_add (H, size, j, i, -1 * factor);
1052 }
1053 }
1054 }
1055 return newsteps;
1056}
1057
1058/* Transform NEST according to TRANS, and return the new loopnest.
1059 This involves
1060 1. Computing a lattice base for the transformation
1061 2. Composing the dense base with the specified transformation (TRANS)
1062 3. Decomposing the combined transformation into a lower triangular portion,
1063 and a unimodular portion.
aabcd309
KH
1064 4. Computing the auxiliary nest using the unimodular portion.
1065 5. Computing the target nest using the auxiliary nest and the lower
36d59cf7
DB
1066 triangular portion. */
1067
1068lambda_loopnest
1069lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1070{
1071 lambda_loopnest auxillary_nest, target_nest;
1072
1073 int depth, invariants;
1074 int i, j;
1075 lambda_lattice lattice;
1076 lambda_trans_matrix trans1, H, U;
1077 lambda_loop loop;
1078 lambda_linear_expression expression;
1079 lambda_vector origin;
1080 lambda_matrix origin_invariants;
1081 lambda_vector stepsigns;
1082 int f;
1083
1084 depth = LN_DEPTH (nest);
1085 invariants = LN_INVARIANTS (nest);
1086
1087 /* Keep track of the signs of the loop steps. */
1088 stepsigns = lambda_vector_new (depth);
1089 for (i = 0; i < depth; i++)
1090 {
1091 if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1092 stepsigns[i] = 1;
1093 else
1094 stepsigns[i] = -1;
1095 }
1096
1097 /* Compute the lattice base. */
1098 lattice = lambda_lattice_compute_base (nest);
1099 trans1 = lambda_trans_matrix_new (depth, depth);
1100
1101 /* Multiply the transformation matrix by the lattice base. */
1102
1103 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1104 LTM_MATRIX (trans1), depth, depth, depth);
1105
1106 /* Compute the Hermite normal form for the new transformation matrix. */
1107 H = lambda_trans_matrix_new (depth, depth);
1108 U = lambda_trans_matrix_new (depth, depth);
1109 lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1110 LTM_MATRIX (U));
1111
1112 /* Compute the auxiliary loop nest's space from the unimodular
1113 portion. */
1114 auxillary_nest = lambda_compute_auxillary_space (nest, U);
1115
1116 /* Compute the loop step signs from the old step signs and the
1117 transformation matrix. */
1118 stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1119
1120 /* Compute the target loop nest space from the auxiliary nest and
1121 the lower triangular matrix H. */
1122 target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1123 origin = lambda_vector_new (depth);
1124 origin_invariants = lambda_matrix_new (depth, invariants);
1125 lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1126 LATTICE_ORIGIN (lattice), origin);
1127 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1128 origin_invariants, depth, depth, invariants);
1129
1130 for (i = 0; i < depth; i++)
1131 {
1132 loop = LN_LOOPS (target_nest)[i];
1133 expression = LL_LINEAR_OFFSET (loop);
1134 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1135 f = 1;
1136 else
1137 f = LLE_DENOMINATOR (expression);
1138
1139 LLE_CONSTANT (expression) += f * origin[i];
1140
1141 for (j = 0; j < invariants; j++)
1142 LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1143 f * origin_invariants[i][j];
1144 }
1145
1146 return target_nest;
1147
1148}
1149
1150/* Convert a gcc tree expression EXPR to a lambda linear expression, and
1151 return the new expression. DEPTH is the depth of the loopnest.
1152 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1153 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1154 is the amount we have to add/subtract from the expression because of the
1155 type of comparison it is used in. */
1156
1157static lambda_linear_expression
1158gcc_tree_to_linear_expression (int depth, tree expr,
d4e6fecb
NS
1159 VEC(tree,gc) *outerinductionvars,
1160 VEC(tree,gc) *invariants, int extra)
36d59cf7
DB
1161{
1162 lambda_linear_expression lle = NULL;
1163 switch (TREE_CODE (expr))
1164 {
1165 case INTEGER_CST:
1166 {
1167 lle = lambda_linear_expression_new (depth, 2 * depth);
1168 LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1169 if (extra != 0)
464f49d8 1170 LLE_CONSTANT (lle) += extra;
36d59cf7
DB
1171
1172 LLE_DENOMINATOR (lle) = 1;
1173 }
1174 break;
1175 case SSA_NAME:
1176 {
1177 tree iv, invar;
1178 size_t i;
1179 for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1180 if (iv != NULL)
1181 {
1182 if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1183 {
1184 lle = lambda_linear_expression_new (depth, 2 * depth);
1185 LLE_COEFFICIENTS (lle)[i] = 1;
1186 if (extra != 0)
1187 LLE_CONSTANT (lle) = extra;
1188
1189 LLE_DENOMINATOR (lle) = 1;
1190 }
1191 }
1192 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1193 if (invar != NULL)
1194 {
1195 if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1196 {
1197 lle = lambda_linear_expression_new (depth, 2 * depth);
1198 LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1199 if (extra != 0)
1200 LLE_CONSTANT (lle) = extra;
1201 LLE_DENOMINATOR (lle) = 1;
1202 }
1203 }
1204 }
1205 break;
1206 default:
1207 return NULL;
1208 }
1209
1210 return lle;
1211}
1212
464f49d8
DB
1213/* Return the depth of the loopnest NEST */
1214
1215static int
1216depth_of_nest (struct loop *nest)
1217{
1218 size_t depth = 0;
1219 while (nest)
1220 {
1221 depth++;
1222 nest = nest->inner;
1223 }
1224 return depth;
1225}
1226
1227
36d59cf7
DB
1228/* Return true if OP is invariant in LOOP and all outer loops. */
1229
1230static bool
feb075f4 1231invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
36d59cf7 1232{
f67d92e9
DB
1233 if (is_gimple_min_invariant (op))
1234 return true;
36d59cf7
DB
1235 if (loop->depth == 0)
1236 return true;
feb075f4
DB
1237 if (!expr_invariant_in_loop_p (loop, op))
1238 return false;
1239 if (loop->outer
1240 && !invariant_in_loop_and_outer_loops (loop->outer, op))
1241 return false;
1242 return true;
36d59cf7
DB
1243}
1244
1245/* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1246 or NULL if it could not be converted.
1247 DEPTH is the depth of the loop.
1248 INVARIANTS is a pointer to the array of loop invariants.
1249 The induction variable for this loop should be stored in the parameter
1250 OURINDUCTIONVAR.
1251 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1252
1253static lambda_loop
1254gcc_loop_to_lambda_loop (struct loop *loop, int depth,
d4e6fecb 1255 VEC(tree,gc) ** invariants,
36d59cf7 1256 tree * ourinductionvar,
d4e6fecb
NS
1257 VEC(tree,gc) * outerinductionvars,
1258 VEC(tree,gc) ** lboundvars,
1259 VEC(tree,gc) ** uboundvars,
1260 VEC(int,gc) ** steps)
36d59cf7
DB
1261{
1262 tree phi;
1263 tree exit_cond;
1264 tree access_fn, inductionvar;
1265 tree step;
1266 lambda_loop lloop = NULL;
1267 lambda_linear_expression lbound, ubound;
1268 tree test;
1269 int stepint;
1270 int extra = 0;
464f49d8 1271 tree lboundvar, uboundvar, uboundresult;
36d59cf7
DB
1272 use_optype uses;
1273
f67d92e9 1274 /* Find out induction var and exit condition. */
36d59cf7 1275 inductionvar = find_induction_var_from_exit_cond (loop);
36d59cf7
DB
1276 exit_cond = get_loop_exit_condition (loop);
1277
1278 if (inductionvar == NULL || exit_cond == NULL)
1279 {
1280 if (dump_file && (dump_flags & TDF_DETAILS))
1281 fprintf (dump_file,
1282 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1283 return NULL;
1284 }
1285
1286 test = TREE_OPERAND (exit_cond, 0);
36d59cf7 1287
36d59cf7
DB
1288 if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1289 {
1290
1291 if (dump_file && (dump_flags & TDF_DETAILS))
1292 fprintf (dump_file,
1293 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1294
1295 return NULL;
1296 }
1297
1298 phi = SSA_NAME_DEF_STMT (inductionvar);
1299 if (TREE_CODE (phi) != PHI_NODE)
1300 {
36d59cf7
DB
1301 uses = STMT_USE_OPS (phi);
1302
1303 if (!uses)
1304 {
1305
1306 if (dump_file && (dump_flags & TDF_DETAILS))
1307 fprintf (dump_file,
1308 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1309
1310 return NULL;
1311 }
1312
1313 phi = USE_OP (uses, 0);
1314 phi = SSA_NAME_DEF_STMT (phi);
1315 if (TREE_CODE (phi) != PHI_NODE)
1316 {
1317
1318 if (dump_file && (dump_flags & TDF_DETAILS))
1319 fprintf (dump_file,
1320 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1321 return NULL;
1322 }
1323
1324 }
464f49d8 1325
f67d92e9
DB
1326 /* The induction variable name/version we want to put in the array is the
1327 result of the induction variable phi node. */
1328 *ourinductionvar = PHI_RESULT (phi);
36d59cf7
DB
1329 access_fn = instantiate_parameters
1330 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
464f49d8 1331 if (access_fn == chrec_dont_know)
36d59cf7
DB
1332 {
1333 if (dump_file && (dump_flags & TDF_DETAILS))
1334 fprintf (dump_file,
464f49d8 1335 "Unable to convert loop: Access function for induction variable phi is unknown\n");
36d59cf7
DB
1336
1337 return NULL;
1338 }
1339
1340 step = evolution_part_in_loop_num (access_fn, loop->num);
1341 if (!step || step == chrec_dont_know)
1342 {
1343 if (dump_file && (dump_flags & TDF_DETAILS))
1344 fprintf (dump_file,
1345 "Unable to convert loop: Cannot determine step of loop.\n");
1346
1347 return NULL;
1348 }
1349 if (TREE_CODE (step) != INTEGER_CST)
1350 {
1351
1352 if (dump_file && (dump_flags & TDF_DETAILS))
1353 fprintf (dump_file,
1354 "Unable to convert loop: Step of loop is not integer.\n");
1355 return NULL;
1356 }
1357
1358 stepint = TREE_INT_CST_LOW (step);
1359
1360 /* Only want phis for induction vars, which will have two
1361 arguments. */
1362 if (PHI_NUM_ARGS (phi) != 2)
1363 {
1364 if (dump_file && (dump_flags & TDF_DETAILS))
1365 fprintf (dump_file,
1366 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1367 return NULL;
1368 }
1369
1370 /* Another induction variable check. One argument's source should be
1371 in the loop, one outside the loop. */
1372 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1373 && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1374 {
1375
1376 if (dump_file && (dump_flags & TDF_DETAILS))
1377 fprintf (dump_file,
1378 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1379
1380 return NULL;
1381 }
1382
1383 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
f67d92e9
DB
1384 {
1385 lboundvar = PHI_ARG_DEF (phi, 1);
1386 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1387 outerinductionvars, *invariants,
1388 0);
1389 }
36d59cf7 1390 else
f67d92e9
DB
1391 {
1392 lboundvar = PHI_ARG_DEF (phi, 0);
1393 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1394 outerinductionvars, *invariants,
1395 0);
1396 }
1397
36d59cf7
DB
1398 if (!lbound)
1399 {
1400
1401 if (dump_file && (dump_flags & TDF_DETAILS))
1402 fprintf (dump_file,
1403 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1404
1405 return NULL;
1406 }
599eabdb
DB
1407 /* One part of the test may be a loop invariant tree. */
1408 if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
feb075f4 1409 && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
d4e6fecb 1410 VEC_safe_push (tree, gc, *invariants, TREE_OPERAND (test, 1));
599eabdb 1411 else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
feb075f4 1412 && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
d4e6fecb 1413 VEC_safe_push (tree, gc, *invariants, TREE_OPERAND (test, 0));
599eabdb
DB
1414
1415 /* The non-induction variable part of the test is the upper bound variable.
1416 */
1417 if (TREE_OPERAND (test, 0) == inductionvar)
1418 uboundvar = TREE_OPERAND (test, 1);
1419 else
1420 uboundvar = TREE_OPERAND (test, 0);
1421
36d59cf7
DB
1422
1423 /* We only size the vectors assuming we have, at max, 2 times as many
1424 invariants as we do loops (one for each bound).
1425 This is just an arbitrary number, but it has to be matched against the
1426 code below. */
599eabdb
DB
1427 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1428
36d59cf7 1429
8c27b7d4 1430 /* We might have some leftover. */
36d59cf7
DB
1431 if (TREE_CODE (test) == LT_EXPR)
1432 extra = -1 * stepint;
1433 else if (TREE_CODE (test) == NE_EXPR)
1434 extra = -1 * stepint;
599eabdb
DB
1435 else if (TREE_CODE (test) == GT_EXPR)
1436 extra = -1 * stepint;
464f49d8
DB
1437 else if (TREE_CODE (test) == EQ_EXPR)
1438 extra = 1 * stepint;
1439
1440 ubound = gcc_tree_to_linear_expression (depth, uboundvar,
36d59cf7
DB
1441 outerinductionvars,
1442 *invariants, extra);
464f49d8
DB
1443 uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1444 build_int_cst (TREE_TYPE (uboundvar), extra));
d4e6fecb
NS
1445 VEC_safe_push (tree, gc, *uboundvars, uboundresult);
1446 VEC_safe_push (tree, gc, *lboundvars, lboundvar);
1447 VEC_safe_push (int, gc, *steps, stepint);
36d59cf7
DB
1448 if (!ubound)
1449 {
36d59cf7
DB
1450 if (dump_file && (dump_flags & TDF_DETAILS))
1451 fprintf (dump_file,
1452 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1453 return NULL;
1454 }
1455
1456 lloop = lambda_loop_new ();
1457 LL_STEP (lloop) = stepint;
1458 LL_LOWER_BOUND (lloop) = lbound;
1459 LL_UPPER_BOUND (lloop) = ubound;
1460 return lloop;
1461}
1462
1463/* Given a LOOP, find the induction variable it is testing against in the exit
1464 condition. Return the induction variable if found, NULL otherwise. */
1465
1466static tree
1467find_induction_var_from_exit_cond (struct loop *loop)
1468{
1469 tree expr = get_loop_exit_condition (loop);
599eabdb 1470 tree ivarop;
36d59cf7
DB
1471 tree test;
1472 if (expr == NULL_TREE)
1473 return NULL_TREE;
1474 if (TREE_CODE (expr) != COND_EXPR)
1475 return NULL_TREE;
1476 test = TREE_OPERAND (expr, 0);
6615c446 1477 if (!COMPARISON_CLASS_P (test))
36d59cf7 1478 return NULL_TREE;
464f49d8
DB
1479
1480 /* Find the side that is invariant in this loop. The ivar must be the other
1481 side. */
1482
1483 if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
599eabdb 1484 ivarop = TREE_OPERAND (test, 1);
464f49d8
DB
1485 else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1486 ivarop = TREE_OPERAND (test, 0);
1487 else
1488 return NULL_TREE;
1489
599eabdb 1490 if (TREE_CODE (ivarop) != SSA_NAME)
36d59cf7 1491 return NULL_TREE;
599eabdb 1492 return ivarop;
36d59cf7
DB
1493}
1494
d4e6fecb
NS
1495DEF_VEC_P(lambda_loop);
1496DEF_VEC_ALLOC_P(lambda_loop,gc);
1497
36d59cf7
DB
1498/* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1499 Return the new loop nest.
1500 INDUCTIONVARS is a pointer to an array of induction variables for the
1501 loopnest that will be filled in during this process.
1502 INVARIANTS is a pointer to an array of invariants that will be filled in
1503 during this process. */
1504
1505lambda_loopnest
f67d92e9
DB
1506gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1507 struct loop * loop_nest,
d4e6fecb
NS
1508 VEC(tree,gc) **inductionvars,
1509 VEC(tree,gc) **invariants,
f67d92e9 1510 bool need_perfect_nest)
36d59cf7
DB
1511{
1512 lambda_loopnest ret;
1513 struct loop *temp;
1514 int depth = 0;
1515 size_t i;
d4e6fecb
NS
1516 VEC(lambda_loop,gc) *loops = NULL;
1517 VEC(tree,gc) *uboundvars = NULL;
1518 VEC(tree,gc) *lboundvars = NULL;
1519 VEC(int,gc) *steps = NULL;
36d59cf7
DB
1520 lambda_loop newloop;
1521 tree inductionvar = NULL;
464f49d8
DB
1522
1523 depth = depth_of_nest (loop_nest);
36d59cf7
DB
1524 temp = loop_nest;
1525 while (temp)
1526 {
1527 newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
f67d92e9
DB
1528 &inductionvar, *inductionvars,
1529 &lboundvars, &uboundvars,
1530 &steps);
36d59cf7
DB
1531 if (!newloop)
1532 return NULL;
d4e6fecb
NS
1533 VEC_safe_push (tree, gc, *inductionvars, inductionvar);
1534 VEC_safe_push (lambda_loop, gc, loops, newloop);
36d59cf7
DB
1535 temp = temp->inner;
1536 }
464f49d8 1537 if (need_perfect_nest)
f67d92e9 1538 {
464f49d8
DB
1539 if (!perfect_nestify (currloops, loop_nest,
1540 lboundvars, uboundvars, steps, *inductionvars))
1541 {
1542 if (dump_file)
1543 fprintf (dump_file, "Not a perfect loop nest and couldn't convert to one.\n");
1544 return NULL;
1545 }
1546 else if (dump_file)
1547 fprintf (dump_file, "Successfully converted loop nest to perfect loop nest.\n");
1548
1549
f67d92e9 1550 }
36d59cf7
DB
1551 ret = lambda_loopnest_new (depth, 2 * depth);
1552 for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1553 LN_LOOPS (ret)[i] = newloop;
1554
1555 return ret;
1556
1557}
1558
464f49d8 1559
36d59cf7
DB
1560/* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1561 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1562 inserted for us are stored. INDUCTION_VARS is the array of induction
464f49d8
DB
1563 variables for the loop this LBV is from. TYPE is the tree type to use for
1564 the variables and trees involved. */
36d59cf7
DB
1565
1566static tree
464f49d8 1567lbv_to_gcc_expression (lambda_body_vector lbv,
d4e6fecb 1568 tree type, VEC(tree,gc) *induction_vars,
464f49d8 1569 tree * stmts_to_insert)
36d59cf7
DB
1570{
1571 tree stmts, stmt, resvar, name;
464f49d8 1572 tree iv;
36d59cf7
DB
1573 size_t i;
1574 tree_stmt_iterator tsi;
1575
1576 /* Create a statement list and a linear expression temporary. */
1577 stmts = alloc_stmt_list ();
464f49d8 1578 resvar = create_tmp_var (type, "lbvtmp");
36d59cf7
DB
1579 add_referenced_tmp_var (resvar);
1580
1581 /* Start at 0. */
1582 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1583 name = make_ssa_name (resvar, stmt);
1584 TREE_OPERAND (stmt, 0) = name;
1585 tsi = tsi_last (stmts);
1586 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1587
464f49d8 1588 for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
36d59cf7
DB
1589 {
1590 if (LBV_COEFFICIENTS (lbv)[i] != 0)
1591 {
1592 tree newname;
464f49d8
DB
1593 tree coeffmult;
1594
36d59cf7 1595 /* newname = coefficient * induction_variable */
464f49d8 1596 coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
36d59cf7 1597 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8
DB
1598 fold (build (MULT_EXPR, type, iv, coeffmult)));
1599
36d59cf7
DB
1600 newname = make_ssa_name (resvar, stmt);
1601 TREE_OPERAND (stmt, 0) = newname;
464f49d8 1602 fold_stmt (&stmt);
36d59cf7
DB
1603 tsi = tsi_last (stmts);
1604 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
464f49d8 1605
36d59cf7
DB
1606 /* name = name + newname */
1607 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8 1608 build (PLUS_EXPR, type, name, newname));
36d59cf7
DB
1609 name = make_ssa_name (resvar, stmt);
1610 TREE_OPERAND (stmt, 0) = name;
464f49d8 1611 fold_stmt (&stmt);
36d59cf7
DB
1612 tsi = tsi_last (stmts);
1613 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
464f49d8 1614
36d59cf7
DB
1615 }
1616 }
1617
1618 /* Handle any denominator that occurs. */
1619 if (LBV_DENOMINATOR (lbv) != 1)
1620 {
464f49d8 1621 tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
36d59cf7 1622 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8 1623 build (CEIL_DIV_EXPR, type, name, denominator));
36d59cf7
DB
1624 name = make_ssa_name (resvar, stmt);
1625 TREE_OPERAND (stmt, 0) = name;
464f49d8 1626 fold_stmt (&stmt);
36d59cf7
DB
1627 tsi = tsi_last (stmts);
1628 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1629 }
1630 *stmts_to_insert = stmts;
1631 return name;
1632}
1633
1634/* Convert a linear expression from coefficient and constant form to a
1635 gcc tree.
1636 Return the tree that represents the final value of the expression.
1637 LLE is the linear expression to convert.
1638 OFFSET is the linear offset to apply to the expression.
464f49d8 1639 TYPE is the tree type to use for the variables and math.
36d59cf7
DB
1640 INDUCTION_VARS is a vector of induction variables for the loops.
1641 INVARIANTS is a vector of the loop nest invariants.
1642 WRAP specifies what tree code to wrap the results in, if there is more than
1643 one (it is either MAX_EXPR, or MIN_EXPR).
1644 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1645 statements that need to be inserted for the linear expression. */
1646
1647static tree
1648lle_to_gcc_expression (lambda_linear_expression lle,
1649 lambda_linear_expression offset,
464f49d8 1650 tree type,
d4e6fecb
NS
1651 VEC(tree,gc) *induction_vars,
1652 VEC(tree,gc) *invariants,
36d59cf7
DB
1653 enum tree_code wrap, tree * stmts_to_insert)
1654{
1655 tree stmts, stmt, resvar, name;
1656 size_t i;
1657 tree_stmt_iterator tsi;
464f49d8 1658 tree iv, invar;
d4e6fecb 1659 VEC(tree,gc) *results = NULL;
36d59cf7
DB
1660
1661 name = NULL_TREE;
1662 /* Create a statement list and a linear expression temporary. */
1663 stmts = alloc_stmt_list ();
464f49d8 1664 resvar = create_tmp_var (type, "lletmp");
36d59cf7 1665 add_referenced_tmp_var (resvar);
36d59cf7
DB
1666
1667 /* Build up the linear expressions, and put the variable representing the
1668 result in the results array. */
1669 for (; lle != NULL; lle = LLE_NEXT (lle))
1670 {
1671 /* Start at name = 0. */
1672 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1673 name = make_ssa_name (resvar, stmt);
1674 TREE_OPERAND (stmt, 0) = name;
464f49d8 1675 fold_stmt (&stmt);
36d59cf7
DB
1676 tsi = tsi_last (stmts);
1677 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1678
1679 /* First do the induction variables.
1680 at the end, name = name + all the induction variables added
1681 together. */
464f49d8 1682 for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
36d59cf7
DB
1683 {
1684 if (LLE_COEFFICIENTS (lle)[i] != 0)
1685 {
1686 tree newname;
1687 tree mult;
1688 tree coeff;
1689
1690 /* mult = induction variable * coefficient. */
1691 if (LLE_COEFFICIENTS (lle)[i] == 1)
1692 {
1693 mult = VEC_index (tree, induction_vars, i);
1694 }
1695 else
1696 {
464f49d8 1697 coeff = build_int_cst (type,
36d59cf7 1698 LLE_COEFFICIENTS (lle)[i]);
464f49d8 1699 mult = fold (build (MULT_EXPR, type, iv, coeff));
36d59cf7
DB
1700 }
1701
1702 /* newname = mult */
1703 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1704 newname = make_ssa_name (resvar, stmt);
1705 TREE_OPERAND (stmt, 0) = newname;
464f49d8 1706 fold_stmt (&stmt);
36d59cf7
DB
1707 tsi = tsi_last (stmts);
1708 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1709
1710 /* name = name + newname */
1711 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8 1712 build (PLUS_EXPR, type, name, newname));
36d59cf7
DB
1713 name = make_ssa_name (resvar, stmt);
1714 TREE_OPERAND (stmt, 0) = name;
464f49d8 1715 fold_stmt (&stmt);
36d59cf7
DB
1716 tsi = tsi_last (stmts);
1717 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1718 }
1719 }
1720
1721 /* Handle our invariants.
1722 At the end, we have name = name + result of adding all multiplied
1723 invariants. */
464f49d8 1724 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
36d59cf7
DB
1725 {
1726 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1727 {
1728 tree newname;
1729 tree mult;
1730 tree coeff;
464f49d8 1731 int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
36d59cf7 1732 /* mult = invariant * coefficient */
464f49d8 1733 if (invcoeff == 1)
36d59cf7 1734 {
464f49d8 1735 mult = invar;
36d59cf7
DB
1736 }
1737 else
1738 {
464f49d8
DB
1739 coeff = build_int_cst (type, invcoeff);
1740 mult = fold (build (MULT_EXPR, type, invar, coeff));
36d59cf7
DB
1741 }
1742
1743 /* newname = mult */
1744 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1745 newname = make_ssa_name (resvar, stmt);
1746 TREE_OPERAND (stmt, 0) = newname;
464f49d8 1747 fold_stmt (&stmt);
36d59cf7
DB
1748 tsi = tsi_last (stmts);
1749 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1750
1751 /* name = name + newname */
1752 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8 1753 build (PLUS_EXPR, type, name, newname));
36d59cf7
DB
1754 name = make_ssa_name (resvar, stmt);
1755 TREE_OPERAND (stmt, 0) = name;
464f49d8 1756 fold_stmt (&stmt);
36d59cf7
DB
1757 tsi = tsi_last (stmts);
1758 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1759 }
1760 }
1761
1762 /* Now handle the constant.
1763 name = name + constant. */
1764 if (LLE_CONSTANT (lle) != 0)
1765 {
1766 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8
DB
1767 build (PLUS_EXPR, type, name,
1768 build_int_cst (type, LLE_CONSTANT (lle))));
36d59cf7
DB
1769 name = make_ssa_name (resvar, stmt);
1770 TREE_OPERAND (stmt, 0) = name;
464f49d8 1771 fold_stmt (&stmt);
36d59cf7
DB
1772 tsi = tsi_last (stmts);
1773 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1774 }
1775
1776 /* Now handle the offset.
1777 name = name + linear offset. */
1778 if (LLE_CONSTANT (offset) != 0)
1779 {
1780 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8
DB
1781 build (PLUS_EXPR, type, name,
1782 build_int_cst (type, LLE_CONSTANT (offset))));
36d59cf7
DB
1783 name = make_ssa_name (resvar, stmt);
1784 TREE_OPERAND (stmt, 0) = name;
464f49d8 1785 fold_stmt (&stmt);
36d59cf7
DB
1786 tsi = tsi_last (stmts);
1787 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1788 }
1789
1790 /* Handle any denominator that occurs. */
1791 if (LLE_DENOMINATOR (lle) != 1)
1792 {
1793 if (wrap == MAX_EXPR)
1794 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8
DB
1795 build (CEIL_DIV_EXPR, type, name,
1796 build_int_cst (type, LLE_DENOMINATOR (lle))));
36d59cf7
DB
1797 else if (wrap == MIN_EXPR)
1798 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8
DB
1799 build (FLOOR_DIV_EXPR, type, name,
1800 build_int_cst (type, LLE_DENOMINATOR (lle))));
36d59cf7 1801 else
599eabdb 1802 gcc_unreachable();
36d59cf7
DB
1803
1804 /* name = {ceil, floor}(name/denominator) */
1805 name = make_ssa_name (resvar, stmt);
1806 TREE_OPERAND (stmt, 0) = name;
1807 tsi = tsi_last (stmts);
1808 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1809 }
d4e6fecb 1810 VEC_safe_push (tree, gc, results, name);
36d59cf7
DB
1811 }
1812
1813 /* Again, out of laziness, we don't handle this case yet. It's not
1814 hard, it just hasn't occurred. */
599eabdb
DB
1815 gcc_assert (VEC_length (tree, results) <= 2);
1816
36d59cf7
DB
1817 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1818 if (VEC_length (tree, results) > 1)
1819 {
1820 tree op1 = VEC_index (tree, results, 0);
1821 tree op2 = VEC_index (tree, results, 1);
1822 stmt = build (MODIFY_EXPR, void_type_node, resvar,
464f49d8 1823 build (wrap, type, op1, op2));
36d59cf7
DB
1824 name = make_ssa_name (resvar, stmt);
1825 TREE_OPERAND (stmt, 0) = name;
1826 tsi = tsi_last (stmts);
1827 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1828 }
1829
1830 *stmts_to_insert = stmts;
1831 return name;
1832}
1833
1834/* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1835 it, back into gcc code. This changes the
1836 loops, their induction variables, and their bodies, so that they
1837 match the transformed loopnest.
1838 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1839 loopnest.
1840 OLD_IVS is a vector of induction variables from the old loopnest.
1841 INVARIANTS is a vector of loop invariants from the old loopnest.
1842 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1843 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1844 NEW_LOOPNEST. */
464f49d8 1845
36d59cf7
DB
1846void
1847lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
d4e6fecb
NS
1848 VEC(tree,gc) *old_ivs,
1849 VEC(tree,gc) *invariants,
36d59cf7
DB
1850 lambda_loopnest new_loopnest,
1851 lambda_trans_matrix transform)
1852{
1853
1854 struct loop *temp;
1855 size_t i = 0;
1856 size_t depth = 0;
d4e6fecb 1857 VEC(tree,gc) *new_ivs = NULL;
464f49d8
DB
1858 tree oldiv;
1859
36d59cf7 1860 block_stmt_iterator bsi;
36d59cf7
DB
1861
1862 if (dump_file)
1863 {
1864 transform = lambda_trans_matrix_inverse (transform);
1865 fprintf (dump_file, "Inverse of transformation matrix:\n");
1866 print_lambda_trans_matrix (dump_file, transform);
1867 }
464f49d8 1868 depth = depth_of_nest (old_loopnest);
36d59cf7
DB
1869 temp = old_loopnest;
1870
1871 while (temp)
1872 {
1873 lambda_loop newloop;
1874 basic_block bb;
13cf6837 1875 edge exit;
36d59cf7
DB
1876 tree ivvar, ivvarinced, exitcond, stmts;
1877 enum tree_code testtype;
1878 tree newupperbound, newlowerbound;
1879 lambda_linear_expression offset;
464f49d8 1880 tree type;
92d2b330 1881 bool insert_after;
e5e656a4 1882 tree inc_stmt;
464f49d8
DB
1883
1884 oldiv = VEC_index (tree, old_ivs, i);
1885 type = TREE_TYPE (oldiv);
1886
36d59cf7
DB
1887 /* First, build the new induction variable temporary */
1888
464f49d8 1889 ivvar = create_tmp_var (type, "lnivtmp");
36d59cf7
DB
1890 add_referenced_tmp_var (ivvar);
1891
d4e6fecb 1892 VEC_safe_push (tree, gc, new_ivs, ivvar);
36d59cf7
DB
1893
1894 newloop = LN_LOOPS (new_loopnest)[i];
1895
1896 /* Linear offset is a bit tricky to handle. Punt on the unhandled
8c27b7d4 1897 cases for now. */
36d59cf7 1898 offset = LL_LINEAR_OFFSET (newloop);
464f49d8 1899
599eabdb
DB
1900 gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1901 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
464f49d8 1902
36d59cf7 1903 /* Now build the new lower bounds, and insert the statements
8c27b7d4 1904 necessary to generate it on the loop preheader. */
36d59cf7
DB
1905 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1906 LL_LINEAR_OFFSET (newloop),
464f49d8 1907 type,
36d59cf7
DB
1908 new_ivs,
1909 invariants, MAX_EXPR, &stmts);
1910 bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
8e731e4e 1911 bsi_commit_edge_inserts ();
36d59cf7
DB
1912 /* Build the new upper bound and insert its statements in the
1913 basic block of the exit condition */
1914 newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1915 LL_LINEAR_OFFSET (newloop),
464f49d8 1916 type,
36d59cf7
DB
1917 new_ivs,
1918 invariants, MIN_EXPR, &stmts);
13cf6837 1919 exit = temp->single_exit;
36d59cf7
DB
1920 exitcond = get_loop_exit_condition (temp);
1921 bb = bb_for_stmt (exitcond);
1922 bsi = bsi_start (bb);
1923 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1924
92d2b330 1925 /* Create the new iv. */
36d59cf7 1926
92d2b330 1927 standard_iv_increment_position (temp, &bsi, &insert_after);
36d59cf7 1928 create_iv (newlowerbound,
464f49d8 1929 build_int_cst (type, LL_STEP (newloop)),
92d2b330 1930 ivvar, temp, &bsi, insert_after, &ivvar,
e5e656a4
DB
1931 NULL);
1932
1933 /* Unfortunately, the incremented ivvar that create_iv inserted may not
1934 dominate the block containing the exit condition.
1935 So we simply create our own incremented iv to use in the new exit
1936 test, and let redundancy elimination sort it out. */
1937 inc_stmt = build (PLUS_EXPR, type,
1938 ivvar, build_int_cst (type, LL_STEP (newloop)));
1939 inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1940 inc_stmt);
1941 ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1942 TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1943 bsi = bsi_for_stmt (exitcond);
1944 bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
36d59cf7
DB
1945
1946 /* Replace the exit condition with the new upper bound
1947 comparison. */
464f49d8 1948
36d59cf7 1949 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
464f49d8 1950
13cf6837
DB
1951 /* We want to build a conditional where true means exit the loop, and
1952 false means continue the loop.
1953 So swap the testtype if this isn't the way things are.*/
1954
1955 if (exit->flags & EDGE_FALSE_VALUE)
464f49d8 1956 testtype = swap_tree_comparison (testtype);
13cf6837 1957
36d59cf7
DB
1958 COND_EXPR_COND (exitcond) = build (testtype,
1959 boolean_type_node,
464f49d8 1960 newupperbound, ivvarinced);
f430bae8 1961 update_stmt (exitcond);
36d59cf7
DB
1962 VEC_replace (tree, new_ivs, i, ivvar);
1963
1964 i++;
1965 temp = temp->inner;
1966 }
464f49d8 1967
f67d92e9
DB
1968 /* Rewrite uses of the old ivs so that they are now specified in terms of
1969 the new ivs. */
464f49d8
DB
1970
1971 for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
f67d92e9 1972 {
f430bae8
AM
1973 imm_use_iterator imm_iter;
1974 use_operand_p imm_use;
1975 tree oldiv_def;
1976 tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1977
1978 gcc_assert (TREE_CODE (oldiv_stmt) == PHI_NODE
1979 || NUM_DEFS (STMT_DEF_OPS (oldiv_stmt)) == 1);
1980 if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1981 oldiv_def = PHI_RESULT (oldiv_stmt);
1982 else
1983 oldiv_def = DEF_OP (STMT_DEF_OPS (oldiv_stmt), 0);
1984
1985 FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
f67d92e9 1986 {
f430bae8 1987 tree stmt = USE_STMT (imm_use);
feb075f4
DB
1988 use_operand_p use_p;
1989 ssa_op_iter iter;
464f49d8 1990 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
feb075f4 1991 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
f67d92e9 1992 {
feb075f4 1993 if (USE_FROM_PTR (use_p) == oldiv)
f67d92e9
DB
1994 {
1995 tree newiv, stmts;
464f49d8 1996 lambda_body_vector lbv, newlbv;
f67d92e9
DB
1997 /* Compute the new expression for the induction
1998 variable. */
1999 depth = VEC_length (tree, new_ivs);
2000 lbv = lambda_body_vector_new (depth);
2001 LBV_COEFFICIENTS (lbv)[i] = 1;
464f49d8
DB
2002
2003 newlbv = lambda_body_vector_compute_new (transform, lbv);
2004
2005 newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
2006 new_ivs, &stmts);
1a1804c2 2007 bsi = bsi_for_stmt (stmt);
f67d92e9
DB
2008 /* Insert the statements to build that
2009 expression. */
2010 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
feb075f4 2011 propagate_value (use_p, newiv);
f430bae8 2012 update_stmt (stmt);
f67d92e9
DB
2013
2014 }
2015 }
2016 }
2017 }
36d59cf7
DB
2018}
2019
f67d92e9 2020
36d59cf7 2021/* Returns true when the vector V is lexicographically positive, in
b01d837f 2022 other words, when the first nonzero element is positive. */
36d59cf7
DB
2023
2024static bool
f67d92e9
DB
2025lambda_vector_lexico_pos (lambda_vector v,
2026 unsigned n)
36d59cf7
DB
2027{
2028 unsigned i;
2029 for (i = 0; i < n; i++)
2030 {
2031 if (v[i] == 0)
2032 continue;
2033 if (v[i] < 0)
2034 return false;
2035 if (v[i] > 0)
2036 return true;
2037 }
2038 return true;
2039}
2040
f67d92e9
DB
2041
2042/* Return TRUE if this is not interesting statement from the perspective of
2043 determining if we have a perfect loop nest. */
2044
2045static bool
2046not_interesting_stmt (tree stmt)
2047{
2048 /* Note that COND_EXPR's aren't interesting because if they were exiting the
2049 loop, we would have already failed the number of exits tests. */
2050 if (TREE_CODE (stmt) == LABEL_EXPR
2051 || TREE_CODE (stmt) == GOTO_EXPR
2052 || TREE_CODE (stmt) == COND_EXPR)
2053 return true;
2054 return false;
2055}
2056
2057/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
2058
2059static bool
2060phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2061{
2062 int i;
2063 for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2064 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2065 if (PHI_ARG_DEF (phi, i) == def)
2066 return true;
2067 return false;
2068}
2069
2070/* Return TRUE if STMT is a use of PHI_RESULT. */
2071
2072static bool
2073stmt_uses_phi_result (tree stmt, tree phi_result)
2074{
2075 use_optype uses = STMT_USE_OPS (stmt);
2076
2077 /* This is conservatively true, because we only want SIMPLE bumpers
471854f8 2078 of the form x +- constant for our pass. */
f67d92e9
DB
2079 if (NUM_USES (uses) != 1)
2080 return false;
2081 if (USE_OP (uses, 0) == phi_result)
2082 return true;
2083
2084 return false;
2085}
2086
2087/* STMT is a bumper stmt for LOOP if the version it defines is used in the
2088 in-loop-edge in a phi node, and the operand it uses is the result of that
2089 phi node.
2090 I.E. i_29 = i_3 + 1
2091 i_3 = PHI (0, i_29); */
2092
2093static bool
2094stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2095{
2096 tree use;
2097 tree def;
2098 def_optype defs = STMT_DEF_OPS (stmt);
f430bae8
AM
2099 imm_use_iterator iter;
2100 use_operand_p use_p;
f67d92e9
DB
2101
2102 if (NUM_DEFS (defs) != 1)
2103 return false;
2104 def = DEF_OP (defs, 0);
f430bae8 2105 FOR_EACH_IMM_USE_FAST (use_p, iter, def)
f67d92e9 2106 {
f430bae8 2107 use = USE_STMT (use_p);
f67d92e9
DB
2108 if (TREE_CODE (use) == PHI_NODE)
2109 {
2110 if (phi_loop_edge_uses_def (loop, use, def))
2111 if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2112 return true;
2113 }
2114 }
2115 return false;
2116}
464f49d8
DB
2117
2118
f67d92e9
DB
2119/* Return true if LOOP is a perfect loop nest.
2120 Perfect loop nests are those loop nests where all code occurs in the
2121 innermost loop body.
2122 If S is a program statement, then
2123
454ff5cb 2124 i.e.
f67d92e9
DB
2125 DO I = 1, 20
2126 S1
2127 DO J = 1, 20
2128 ...
2129 END DO
2130 END DO
2131 is not a perfect loop nest because of S1.
2132
2133 DO I = 1, 20
2134 DO J = 1, 20
2135 S1
2136 ...
2137 END DO
2138 END DO
2139 is a perfect loop nest.
2140
2141 Since we don't have high level loops anymore, we basically have to walk our
2142 statements and ignore those that are there because the loop needs them (IE
2143 the induction variable increment, and jump back to the top of the loop). */
2144
2145bool
2146perfect_nest_p (struct loop *loop)
2147{
2148 basic_block *bbs;
2149 size_t i;
2150 tree exit_cond;
2151
2152 if (!loop->inner)
2153 return true;
2154 bbs = get_loop_body (loop);
2155 exit_cond = get_loop_exit_condition (loop);
2156 for (i = 0; i < loop->num_nodes; i++)
2157 {
2158 if (bbs[i]->loop_father == loop)
2159 {
2160 block_stmt_iterator bsi;
2161 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2162 {
2163 tree stmt = bsi_stmt (bsi);
2164 if (stmt == exit_cond
2165 || not_interesting_stmt (stmt)
2166 || stmt_is_bumper_for_loop (loop, stmt))
2167 continue;
2168 free (bbs);
2169 return false;
2170 }
2171 }
2172 }
2173 free (bbs);
2174 /* See if the inner loops are perfectly nested as well. */
2175 if (loop->inner)
2176 return perfect_nest_p (loop->inner);
2177 return true;
2178}
2179
f67d92e9
DB
2180/* Replace the USES of tree X in STMT with tree Y */
2181
2182static void
2183replace_uses_of_x_with_y (tree stmt, tree x, tree y)
2184{
2185 use_optype uses = STMT_USE_OPS (stmt);
2186 size_t i;
2187 for (i = 0; i < NUM_USES (uses); i++)
2188 {
2189 if (USE_OP (uses, i) == x)
2190 SET_USE_OP (uses, i, y);
2191 }
2192}
2193
471854f8 2194/* Return TRUE if STMT uses tree OP in it's uses. */
f67d92e9
DB
2195
2196static bool
2197stmt_uses_op (tree stmt, tree op)
2198{
2199 use_optype uses = STMT_USE_OPS (stmt);
2200 size_t i;
2201 for (i = 0; i < NUM_USES (uses); i++)
2202 {
2203 if (USE_OP (uses, i) == op)
2204 return true;
2205 }
2206 return false;
2207}
2208
2209/* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2210 one. LOOPIVS is a vector of induction variables, one per loop.
2211 ATM, we only handle imperfect nests of depth 2, where all of the statements
2212 occur after the inner loop. */
2213
2214static bool
2215can_convert_to_perfect_nest (struct loop *loop,
d4e6fecb 2216 VEC(tree,gc) *loopivs)
f67d92e9
DB
2217{
2218 basic_block *bbs;
903a33c9 2219 tree exit_condition, phi;
f67d92e9
DB
2220 size_t i;
2221 block_stmt_iterator bsi;
903a33c9 2222 basic_block exitdest;
f67d92e9
DB
2223
2224 /* Can't handle triply nested+ loops yet. */
2225 if (!loop->inner || loop->inner->inner)
2226 return false;
2227
2228 /* We only handle moving the after-inner-body statements right now, so make
2229 sure all the statements we need to move are located in that position. */
2230 bbs = get_loop_body (loop);
2231 exit_condition = get_loop_exit_condition (loop);
2232 for (i = 0; i < loop->num_nodes; i++)
2233 {
2234 if (bbs[i]->loop_father == loop)
2235 {
2236 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2237 {
2238 size_t j;
2239 tree stmt = bsi_stmt (bsi);
2240 if (stmt == exit_condition
2241 || not_interesting_stmt (stmt)
2242 || stmt_is_bumper_for_loop (loop, stmt))
2243 continue;
2244 /* If the statement uses inner loop ivs, we == screwed. */
2245 for (j = 1; j < VEC_length (tree, loopivs); j++)
2246 if (stmt_uses_op (stmt, VEC_index (tree, loopivs, j)))
2247 {
2248 free (bbs);
2249 return false;
2250 }
2251
2252 /* If the bb of a statement we care about isn't dominated by
471854f8 2253 the header of the inner loop, then we are also screwed. */
f67d92e9
DB
2254 if (!dominated_by_p (CDI_DOMINATORS,
2255 bb_for_stmt (stmt),
2256 loop->inner->header))
2257 {
2258 free (bbs);
2259 return false;
2260 }
2261 }
2262 }
2263 }
903a33c9
DB
2264
2265 /* We also need to make sure the loop exit only has simple copy phis in it,
2266 otherwise we don't know how to transform it into a perfect nest right
2267 now. */
2268 exitdest = loop->single_exit->dest;
2269
2270 for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2271 if (PHI_NUM_ARGS (phi) != 1)
2272 return false;
2273
f67d92e9
DB
2274 return true;
2275}
2276
2277/* Transform the loop nest into a perfect nest, if possible.
2278 LOOPS is the current struct loops *
2279 LOOP is the loop nest to transform into a perfect nest
2280 LBOUNDS are the lower bounds for the loops to transform
2281 UBOUNDS are the upper bounds for the loops to transform
2282 STEPS is the STEPS for the loops to transform.
2283 LOOPIVS is the induction variables for the loops to transform.
2284
2285 Basically, for the case of
2286
2287 FOR (i = 0; i < 50; i++)
2288 {
2289 FOR (j =0; j < 50; j++)
2290 {
2291 <whatever>
2292 }
2293 <some code>
2294 }
2295
2296 This function will transform it into a perfect loop nest by splitting the
2297 outer loop into two loops, like so:
2298
2299 FOR (i = 0; i < 50; i++)
2300 {
2301 FOR (j = 0; j < 50; j++)
2302 {
2303 <whatever>
2304 }
2305 }
2306
2307 FOR (i = 0; i < 50; i ++)
2308 {
2309 <some code>
2310 }
2311
2312 Return FALSE if we can't make this loop into a perfect nest. */
2313static bool
2314perfect_nestify (struct loops *loops,
2315 struct loop *loop,
d4e6fecb
NS
2316 VEC(tree,gc) *lbounds,
2317 VEC(tree,gc) *ubounds,
2318 VEC(int,gc) *steps,
2319 VEC(tree,gc) *loopivs)
f67d92e9
DB
2320{
2321 basic_block *bbs;
2322 tree exit_condition;
2323 tree then_label, else_label, cond_stmt;
2324 basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2325 size_t i;
2326 block_stmt_iterator bsi;
92d2b330 2327 bool insert_after;
f67d92e9
DB
2328 edge e;
2329 struct loop *newloop;
2330 tree phi;
2331 tree uboundvar;
2332 tree stmt;
464f49d8 2333 tree oldivvar, ivvar, ivvarinced;
d4e6fecb 2334 VEC(tree,gc) *phis = NULL;
f67d92e9
DB
2335
2336 if (!can_convert_to_perfect_nest (loop, loopivs))
2337 return false;
2338
f67d92e9
DB
2339 /* Create the new loop */
2340
2341 olddest = loop->single_exit->dest;
2342 preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2343 headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2344
eae600b9 2345 /* Push the exit phi nodes that we are moving. */
f67d92e9
DB
2346 for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2347 {
d4e6fecb
NS
2348 VEC_reserve (tree, gc, phis, 2);
2349 VEC_quick_push (tree, phis, PHI_RESULT (phi));
2350 VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
f67d92e9 2351 }
c5cbcccf 2352 e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
464f49d8 2353
eae600b9
DB
2354 /* Remove the exit phis from the old basic block. Make sure to set
2355 PHI_RESULT to null so it doesn't get released. */
464f49d8 2356 while (phi_nodes (olddest) != NULL)
eae600b9
DB
2357 {
2358 SET_PHI_RESULT (phi_nodes (olddest), NULL);
d19e3ef6 2359 remove_phi_node (phi_nodes (olddest), NULL);
eae600b9 2360 }
464f49d8 2361
eae600b9 2362 /* and add them back to the new basic block. */
f67d92e9
DB
2363 while (VEC_length (tree, phis) != 0)
2364 {
2365 tree def;
2366 tree phiname;
2367 def = VEC_pop (tree, phis);
464f49d8 2368 phiname = VEC_pop (tree, phis);
f67d92e9 2369 phi = create_phi_node (phiname, preheaderbb);
c5cbcccf 2370 add_phi_arg (phi, def, single_pred_edge (preheaderbb));
464f49d8 2371 }
71882046 2372 flush_pending_stmts (e);
464f49d8 2373
f67d92e9
DB
2374 bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2375 latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2376 make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2377 then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2378 else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2379 cond_stmt = build (COND_EXPR, void_type_node,
2380 build (NE_EXPR, boolean_type_node,
2381 integer_one_node,
2382 integer_zero_node),
2383 then_label, else_label);
2384 bsi = bsi_start (bodybb);
2385 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2386 e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2387 make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2388 make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2389
2390 /* Update the loop structures. */
2391 newloop = duplicate_loop (loops, loop, olddest->loop_father);
2392 newloop->header = headerbb;
2393 newloop->latch = latchbb;
2394 newloop->single_exit = e;
2395 add_bb_to_loop (latchbb, newloop);
2396 add_bb_to_loop (bodybb, newloop);
2397 add_bb_to_loop (headerbb, newloop);
f67d92e9
DB
2398 set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2399 set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2400 set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2401 loop->single_exit->src);
2402 set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2403 set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2404 /* Create the new iv. */
2405 ivvar = create_tmp_var (integer_type_node, "perfectiv");
2406 add_referenced_tmp_var (ivvar);
92d2b330 2407 standard_iv_increment_position (newloop, &bsi, &insert_after);
f67d92e9 2408 create_iv (VEC_index (tree, lbounds, 0),
464f49d8 2409 build_int_cst (integer_type_node, VEC_index (int, steps, 0)),
92d2b330 2410 ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
f67d92e9
DB
2411
2412 /* Create the new upper bound. This may be not just a variable, so we copy
2413 it to one just in case. */
2414
2415 exit_condition = get_loop_exit_condition (newloop);
2416 uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2417 add_referenced_tmp_var (uboundvar);
2418 stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2419 VEC_index (tree, ubounds, 0));
2420 uboundvar = make_ssa_name (uboundvar, stmt);
2421 TREE_OPERAND (stmt, 0) = uboundvar;
92d2b330
SP
2422
2423 if (insert_after)
2424 bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2425 else
2426 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2427
464f49d8 2428 COND_EXPR_COND (exit_condition) = build (GE_EXPR,
f67d92e9 2429 boolean_type_node,
464f49d8
DB
2430 uboundvar,
2431 ivvarinced);
f67d92e9
DB
2432
2433 bbs = get_loop_body (loop);
2434 /* Now replace the induction variable in the moved statements with the
2435 correct loop induction variable. */
464f49d8 2436 oldivvar = VEC_index (tree, loopivs, 0);
f67d92e9
DB
2437 for (i = 0; i < loop->num_nodes; i++)
2438 {
2439 block_stmt_iterator tobsi = bsi_last (bodybb);
2440 if (bbs[i]->loop_father == loop)
2441 {
2442 /* Note that the bsi only needs to be explicitly incremented
2443 when we don't move something, since it is automatically
2444 incremented when we do. */
2445 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2446 {
2447 tree stmt = bsi_stmt (bsi);
2448 if (stmt == exit_condition
2449 || not_interesting_stmt (stmt)
2450 || stmt_is_bumper_for_loop (loop, stmt))
2451 {
2452 bsi_next (&bsi);
2453 continue;
2454 }
464f49d8 2455 replace_uses_of_x_with_y (stmt, oldivvar, ivvar);
f67d92e9
DB
2456 bsi_move_before (&bsi, &tobsi);
2457 }
2458 }
2459 }
2460 free (bbs);
f67d92e9
DB
2461 return perfect_nest_p (loop);
2462}
2463
36d59cf7
DB
2464/* Return true if TRANS is a legal transformation matrix that respects
2465 the dependence vectors in DISTS and DIRS. The conservative answer
2466 is false.
2467
2468 "Wolfe proves that a unimodular transformation represented by the
2469 matrix T is legal when applied to a loop nest with a set of
2470 lexicographically non-negative distance vectors RDG if and only if
2471 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
454ff5cb 2472 i.e.: if and only if it transforms the lexicographically positive
36d59cf7
DB
2473 distance vectors to lexicographically positive vectors. Note that
2474 a unimodular matrix must transform the zero vector (and only it) to
2475 the zero vector." S.Muchnick. */
2476
2477bool
f67d92e9
DB
2478lambda_transform_legal_p (lambda_trans_matrix trans,
2479 int nb_loops,
2480 varray_type dependence_relations)
36d59cf7
DB
2481{
2482 unsigned int i;
2483 lambda_vector distres;
2484 struct data_dependence_relation *ddr;
2485
2486#if defined ENABLE_CHECKING
f67d92e9
DB
2487 if (LTM_COLSIZE (trans) != nb_loops
2488 || LTM_ROWSIZE (trans) != nb_loops)
2489 abort ();
36d59cf7
DB
2490#endif
2491
2492 /* When there is an unknown relation in the dependence_relations, we
2493 know that it is no worth looking at this loop nest: give up. */
f67d92e9 2494 ddr = (struct data_dependence_relation *)
36d59cf7
DB
2495 VARRAY_GENERIC_PTR (dependence_relations, 0);
2496 if (ddr == NULL)
2497 return true;
2498 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2499 return false;
2500
2501 distres = lambda_vector_new (nb_loops);
2502
2503 /* For each distance vector in the dependence graph. */
2504 for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2505 {
f67d92e9 2506 ddr = (struct data_dependence_relation *)
464f49d8 2507 VARRAY_GENERIC_PTR (dependence_relations, i);
f67d92e9 2508
36d59cf7 2509 /* Don't care about relations for which we know that there is no
f67d92e9
DB
2510 dependence, nor about read-read (aka. output-dependences):
2511 these data accesses can happen in any order. */
36d59cf7
DB
2512 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2513 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2514 continue;
464f49d8 2515
36d59cf7
DB
2516 /* Conservatively answer: "this transformation is not valid". */
2517 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2518 return false;
464f49d8
DB
2519
2520 /* If the dependence could not be captured by a distance vector,
2521 conservatively answer that the transform is not valid. */
2522 if (DDR_DIST_VECT (ddr) == NULL)
2523 return false;
36d59cf7
DB
2524
2525 /* Compute trans.dist_vect */
f67d92e9 2526 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
36d59cf7
DB
2527 DDR_DIST_VECT (ddr), distres);
2528
2529 if (!lambda_vector_lexico_pos (distres, nb_loops))
2530 return false;
2531 }
36d59cf7
DB
2532 return true;
2533}