gcc/graphite-interchange.c

   1 /* Interchange heuristics and transform for loop interchange on
   2    polyhedral representation.
   3
   4    Copyright (C) 2009-2013 Free Software Foundation, Inc.
   5    Contributed by Sebastian Pop <sebastian.pop@amd.com> and
   6    Harsha Jagasia <harsha.jagasia@amd.com>.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify
  11 it under the terms of the GNU General Public License as published by
  12 the Free Software Foundation; either version 3, or (at your option)
  13 any later version.
  14
  15 GCC is distributed in the hope that it will be useful,
  16 but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 GNU General Public License for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24 #include "config.h"
  25
  26 #ifdef HAVE_cloog
  27 #include <isl/aff.h>
  28 #include <isl/set.h>
  29 #include <isl/map.h>
  30 #include <isl/union_map.h>
  31 #include <isl/ilp.h>
  32 #include <cloog/cloog.h>
  33 #include <cloog/isl/domain.h>
  34 #endif
  35
  36 #include "system.h"
  37 #include "coretypes.h"
  38 #include "tree.h"
  39 #include "gimple.h"
  40 #include "tree-ssa-loop.h"
  41 #include "dumpfile.h"
  42 #include "cfgloop.h"
  43 #include "tree-chrec.h"
  44 #include "tree-data-ref.h"
  45 #include "tree-scalar-evolution.h"
  46 #include "sese.h"
  47
  48 #ifdef HAVE_cloog
  49 #include "graphite-poly.h"
  50
  51 /* XXX isl rewrite following comment */
  52 /* Builds a linear expression, of dimension DIM, representing PDR's
  53    memory access:
  54
  55    L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}.
  56
  57    For an array A[10][20] with two subscript locations s0 and s1, the
  58    linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0
  59    corresponds to a memory stride of 20.
  60
  61    OFFSET is a number of dimensions to prepend before the
  62    subscript dimensions: s_0, s_1, ..., s_n.
  63
  64    Thus, the final linear expression has the following format:
  65    0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n
  66    where the expression itself is:
  67    c_0 * s_0 + c_1 * s_1 + ... c_n * s_n.  */
  68
  69 static isl_constraint *
  70 build_linearized_memory_access (isl_map *map, poly_dr_p pdr)
  71 {
  72   isl_constraint *res;
  73   isl_local_space *ls = isl_local_space_from_space (isl_map_get_space (map));
  74   unsigned offset, nsubs;
  75   int i;
  76   isl_int size, subsize;
  77
  78   res = isl_equality_alloc (ls);
  79   isl_int_init (size);
  80   isl_int_set_ui (size, 1);
  81   isl_int_init (subsize);
  82   isl_int_set_ui (subsize, 1);
  83
  84   nsubs = isl_set_dim (pdr->extent, isl_dim_set);
  85   /* -1 for the already included L dimension.  */
  86   offset = isl_map_dim (map, isl_dim_out) - 1 - nsubs;
  87   res = isl_constraint_set_coefficient_si (res, isl_dim_out, offset + nsubs, -1);
  88   /* Go through all subscripts from last to first.  First dimension
  89      is the alias set, ignore it.  */
  90   for (i = nsubs - 1; i >= 1; i--)
  91     {
  92       isl_space *dc;
  93       isl_aff *aff;
  94
  95       res = isl_constraint_set_coefficient (res, isl_dim_out, offset + i, size);
  96
  97       dc = isl_set_get_space (pdr->extent);
  98       aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
  99       aff = isl_aff_set_coefficient_si (aff, isl_dim_in, i, 1);
 100       isl_set_max (pdr->extent, aff, &subsize);
 101       isl_aff_free (aff);
 102       isl_int_mul (size, size, subsize);
 103     }
 104
 105   isl_int_clear (subsize);
 106   isl_int_clear (size);
 107
 108   return res;
 109 }
 110
 111 /* Set STRIDE to the stride of PDR in memory by advancing by one in
 112    the loop at DEPTH.  */
 113
 114 static void
 115 pdr_stride_in_loop (mpz_t stride, graphite_dim_t depth, poly_dr_p pdr)
 116 {
 117   poly_bb_p pbb = PDR_PBB (pdr);
 118   isl_map *map;
 119   isl_set *set;
 120   isl_aff *aff;
 121   isl_space *dc;
 122   isl_constraint *lma, *c;
 123   isl_int islstride;
 124   graphite_dim_t time_depth;
 125   unsigned offset, nt;
 126   unsigned i;
 127   /* XXX isl rewrite following comments.  */
 128   /* Builds a partial difference equations and inserts them
 129      into pointset powerset polyhedron P.  Polyhedron is assumed
 130      to have the format: T|I|T'|I'|G|S|S'|l1|l2.
 131
 132      TIME_DEPTH is the time dimension w.r.t. which we are
 133      differentiating.
 134      OFFSET represents the number of dimensions between
 135      columns t_{time_depth} and t'_{time_depth}.
 136      DIM_SCTR is the number of scattering dimensions.  It is
 137      essentially the dimensionality of the T vector.
 138
 139      The following equations are inserted into the polyhedron P:
 140      | t_1 = t_1'
 141      | ...
 142      | t_{time_depth-1} = t'_{time_depth-1}
 143      | t_{time_depth} = t'_{time_depth} + 1
 144      | t_{time_depth+1} = t'_{time_depth + 1}
 145      | ...
 146      | t_{dim_sctr} = t'_{dim_sctr}.  */
 147
 148   /* Add the equality: t_{time_depth} = t'_{time_depth} + 1.
 149      This is the core part of this alogrithm, since this
 150      constraint asks for the memory access stride (difference)
 151      between two consecutive points in time dimensions.  */
 152
 153   /* Add equalities:
 154      | t1 = t1'
 155      | ...
 156      | t_{time_depth-1} = t'_{time_depth-1}
 157      | t_{time_depth+1} = t'_{time_depth+1}
 158      | ...
 159      | t_{dim_sctr} = t'_{dim_sctr}
 160
 161      This means that all the time dimensions are equal except for
 162      time_depth, where the constraint is t_{depth} = t'_{depth} + 1
 163      step.  More to this: we should be careful not to add equalities
 164      to the 'coupled' dimensions, which happens when the one dimension
 165      is stripmined dimension, and the other dimension corresponds
 166      to the point loop inside stripmined dimension.  */
 167
 168   /* pdr->accesses:    [P1..nb_param,I1..nb_domain]->[a,S1..nb_subscript]
 169           ??? [P] not used for PDRs?
 170      pdr->extent:      [a,S1..nb_subscript]
 171      pbb->domain:      [P1..nb_param,I1..nb_domain]
 172      pbb->transformed: [P1..nb_param,I1..nb_domain]->[T1..Tnb_sctr]
 173           [T] includes local vars (currently unused)
 174
 175      First we create [P,I] -> [T,a,S].  */
 176
 177   map = isl_map_flat_range_product (isl_map_copy (pbb->transformed),
 178                                     isl_map_copy (pdr->accesses));
 179   /* Add a dimension for L: [P,I] -> [T,a,S,L].*/
 180   map = isl_map_add_dims (map, isl_dim_out, 1);
 181   /* Build a constraint for "lma[S] - L == 0", effectively calculating
 182      L in terms of subscripts.  */
 183   lma = build_linearized_memory_access (map, pdr);
 184   /* And add it to the map, so we now have:
 185      [P,I] -> [T,a,S,L] : lma([S]) == L.  */
 186   map = isl_map_add_constraint (map, lma);
 187
 188   /* Then we create  [P,I,P',I'] -> [T,a,S,L,T',a',S',L'].  */
 189   map = isl_map_flat_product (map, isl_map_copy (map));
 190
 191   /* Now add the equality T[time_depth] == T'[time_depth]+1.  This will
 192      force L' to be the linear address at T[time_depth] + 1. */
 193   time_depth = psct_dynamic_dim (pbb, depth);
 194   /* Length of [a,S] plus [L] ...  */
 195   offset = 1 + isl_map_dim (pdr->accesses, isl_dim_out);
 196   /* ... plus [T].  */
 197   offset += isl_map_dim (pbb->transformed, isl_dim_out);
 198
 199   c = isl_equality_alloc (isl_local_space_from_space (isl_map_get_space (map)));
 200   c = isl_constraint_set_coefficient_si (c, isl_dim_out, time_depth, 1);
 201   c = isl_constraint_set_coefficient_si (c, isl_dim_out,
 202                                          offset + time_depth, -1);
 203   c = isl_constraint_set_constant_si (c, 1);
 204   map = isl_map_add_constraint (map, c);
 205
 206   /* Now we equate most of the T/T' elements (making PITaSL nearly
 207      the same is (PITaSL)', except for one dimension, namely for 'depth'
 208      (an index into [I]), after translating to index into [T].  Take care
 209      to not produce an empty map, which indicates we wanted to equate
 210      two dimensions that are already coupled via the above time_depth
 211      dimension.  Happens with strip mining where several scatter dimension
 212      are interdependend.  */
 213   /* Length of [T].  */
 214   nt = pbb_nb_scattering_transform (pbb) + pbb_nb_local_vars (pbb);
 215   for (i = 0; i < nt; i++)
 216     if (i != time_depth)
 217       {
 218         isl_map *temp = isl_map_equate (isl_map_copy (map),
 219                                         isl_dim_out, i,
 220                                         isl_dim_out, offset + i);
 221         if (isl_map_is_empty (temp))
 222           isl_map_free (temp);
 223         else
 224           {
 225             isl_map_free (map);
 226             map = temp;
 227           }
 228       }
 229
 230   /* Now maximize the expression L' - L.  */
 231   set = isl_map_range (map);
 232   dc = isl_set_get_space (set);
 233   aff = isl_aff_zero_on_domain (isl_local_space_from_space (dc));
 234   aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset - 1, -1);
 235   aff = isl_aff_set_coefficient_si (aff, isl_dim_in, offset + offset - 1, 1);
 236   isl_int_init (islstride);
 237   isl_set_max (set, aff, &islstride);
 238   isl_int_get_gmp (islstride, stride);
 239   isl_int_clear (islstride);
 240   isl_aff_free (aff);
 241   isl_set_free (set);
 242
 243   if (dump_file && (dump_flags & TDF_DETAILS))
 244     {
 245       gmp_fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:  %Zd ",
 246                    pbb_index (pbb), PDR_ID (pdr), (int) depth, stride);
 247     }
 248 }
 249
 250 /* Sets STRIDES to the sum of all the strides of the data references
 251    accessed in LOOP at DEPTH.  */
 252
 253 static void
 254 memory_strides_in_loop_1 (lst_p loop, graphite_dim_t depth, mpz_t strides)
 255 {
 256   int i, j;
 257   lst_p l;
 258   poly_dr_p pdr;
 259   mpz_t s, n;
 260
 261   mpz_init (s);
 262   mpz_init (n);
 263
 264   FOR_EACH_VEC_ELT (LST_SEQ (loop), j, l)
 265     if (LST_LOOP_P (l))
 266       memory_strides_in_loop_1 (l, depth, strides);
 267     else
 268       FOR_EACH_VEC_ELT (PBB_DRS (LST_PBB (l)), i, pdr)
 269         {
 270           pdr_stride_in_loop (s, depth, pdr);
 271           mpz_set_si (n, PDR_NB_REFS (pdr));
 272           mpz_mul (s, s, n);
 273           mpz_add (strides, strides, s);
 274         }
 275
 276   mpz_clear (s);
 277   mpz_clear (n);
 278 }
 279
 280 /* Sets STRIDES to the sum of all the strides of the data references
 281    accessed in LOOP at DEPTH.  */
 282
 283 static void
 284 memory_strides_in_loop (lst_p loop, graphite_dim_t depth, mpz_t strides)
 285 {
 286   if (mpz_cmp_si (loop->memory_strides, -1) == 0)
 287     {
 288       mpz_set_si (strides, 0);
 289       memory_strides_in_loop_1 (loop, depth, strides);
 290     }
 291   else
 292     mpz_set (strides, loop->memory_strides);
 293 }
 294
 295 /* Return true when the interchange of loops LOOP1 and LOOP2 is
 296    profitable.
 297
 298    Example:
 299
 300    | int a[100][100];
 301    |
 302    | int
 303    | foo (int N)
 304    | {
 305    |   int j;
 306    |   int i;
 307    |
 308    |   for (i = 0; i < N; i++)
 309    |     for (j = 0; j < N; j++)
 310    |       a[j][2 * i] += 1;
 311    |
 312    |   return a[N][12];
 313    | }
 314
 315    The data access A[j][i] is described like this:
 316
 317    | i   j   N   a  s0  s1   1
 318    | 0   0   0   1   0   0  -5    = 0
 319    | 0  -1   0   0   1   0   0    = 0
 320    |-2   0   0   0   0   1   0    = 0
 321    | 0   0   0   0   1   0   0   >= 0
 322    | 0   0   0   0   0   1   0   >= 0
 323    | 0   0   0   0  -1   0 100   >= 0
 324    | 0   0   0   0   0  -1 100   >= 0
 325
 326    The linearized memory access L to A[100][100] is:
 327
 328    | i   j   N   a  s0  s1   1
 329    | 0   0   0   0 100   1   0
 330
 331    TODO: the shown format is not valid as it does not show the fact
 332    that the iteration domain "i j" is transformed using the scattering.
 333
 334    Next, to measure the impact of iterating once in loop "i", we build
 335    a maximization problem: first, we add to DR accesses the dimensions
 336    k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1.
 337    L1 and L2 are the linearized memory access functions.
 338
 339    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 340    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 341    | 0  -1   0   0   1   0   0   0   0   0   0   0   0    = 0  s0 = j
 342    |-2   0   0   0   0   1   0   0   0   0   0   0   0    = 0  s1 = 2 * i
 343    | 0   0   0   0   1   0   0   0   0   0   0   0   0   >= 0
 344    | 0   0   0   0   0   1   0   0   0   0   0   0   0   >= 0
 345    | 0   0   0   0  -1   0   0   0   0   0   0   0 100   >= 0
 346    | 0   0   0   0   0  -1   0   0   0   0   0   0 100   >= 0
 347    | 0   0   0   0 100   1   0   0   0  -1   0   0   0    = 0  L1 = 100 * s0 + s1
 348
 349    Then, we generate the polyhedron P2 by interchanging the dimensions
 350    (s0, s2), (s1, s3), (L1, L2), (k, i)
 351
 352    | i   j   N   a  s0  s1   k  s2  s3  L1  L2  D1   1
 353    | 0   0   0   1   0   0   0   0   0   0   0   0  -5    = 0  alias = 5
 354    | 0  -1   0   0   0   0   0   1   0   0   0   0   0    = 0  s2 = j
 355    | 0   0   0   0   0   0  -2   0   1   0   0   0   0    = 0  s3 = 2 * k
 356    | 0   0   0   0   0   0   0   1   0   0   0   0   0   >= 0
 357    | 0   0   0   0   0   0   0   0   1   0   0   0   0   >= 0
 358    | 0   0   0   0   0   0   0  -1   0   0   0   0 100   >= 0
 359    | 0   0   0   0   0   0   0   0  -1   0   0   0 100   >= 0
 360    | 0   0   0   0   0   0   0 100   1   0  -1   0   0    = 0  L2 = 100 * s2 + s3
 361
 362    then we add to P2 the equality k = i + 1:
 363
 364    |-1   0   0   0   0   0   1   0   0   0   0   0  -1    = 0  k = i + 1
 365
 366    and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)".
 367
 368    Similarly, to determine the impact of one iteration on loop "j", we
 369    interchange (k, j), we add "k = j + 1", and we compute D2 the
 370    maximal value of the difference.
 371
 372    Finally, the profitability test is D1 < D2: if in the outer loop
 373    the strides are smaller than in the inner loop, then it is
 374    profitable to interchange the loops at DEPTH1 and DEPTH2.  */
 375
 376 static bool
 377 lst_interchange_profitable_p (lst_p nest, int depth1, int depth2)
 378 {
 379   mpz_t d1, d2;
 380   bool res;
 381
 382   gcc_assert (depth1 < depth2);
 383
 384   mpz_init (d1);
 385   mpz_init (d2);
 386
 387   memory_strides_in_loop (nest, depth1, d1);
 388   memory_strides_in_loop (nest, depth2, d2);
 389
 390   res = mpz_cmp (d1, d2) < 0;
 391
 392   mpz_clear (d1);
 393   mpz_clear (d2);
 394
 395   return res;
 396 }
 397
 398 /* Interchanges the loops at DEPTH1 and DEPTH2 of the original
 399    scattering and assigns the resulting polyhedron to the transformed
 400    scattering.  */
 401
 402 static void
 403 pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2,
 404                              poly_bb_p pbb)
 405 {
 406   unsigned i;
 407   unsigned dim1 = psct_dynamic_dim (pbb, depth1);
 408   unsigned dim2 = psct_dynamic_dim (pbb, depth2);
 409   isl_space *d = isl_map_get_space (pbb->transformed);
 410   isl_space *d1 = isl_space_range (d);
 411   unsigned n = isl_space_dim (d1, isl_dim_out);
 412   isl_space *d2 = isl_space_add_dims (d1, isl_dim_in, n);
 413   isl_map *x = isl_map_universe (d2);
 414
 415   x = isl_map_equate (x, isl_dim_in, dim1, isl_dim_out, dim2);
 416   x = isl_map_equate (x, isl_dim_in, dim2, isl_dim_out, dim1);
 417
 418   for (i = 0; i < n; i++)
 419     if (i != dim1 && i != dim2)
 420       x = isl_map_equate (x, isl_dim_in, i, isl_dim_out, i);
 421
 422   pbb->transformed = isl_map_apply_range (pbb->transformed, x);
 423 }
 424
 425 /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all
 426    the statements below LST.  */
 427
 428 static void
 429 lst_apply_interchange (lst_p lst, int depth1, int depth2)
 430 {
 431   if (!lst)
 432     return;
 433
 434   if (LST_LOOP_P (lst))
 435     {
 436       int i;
 437       lst_p l;
 438
 439       FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
 440         lst_apply_interchange (l, depth1, depth2);
 441     }
 442   else
 443     pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst));
 444 }
 445
 446 /* Return true when the nest starting at LOOP1 and ending on LOOP2 is
 447    perfect: i.e. there are no sequence of statements.  */
 448
 449 static bool
 450 lst_perfectly_nested_p (lst_p loop1, lst_p loop2)
 451 {
 452   if (loop1 == loop2)
 453     return true;
 454
 455   if (!LST_LOOP_P (loop1))
 456     return false;
 457
 458   return LST_SEQ (loop1).length () == 1
 459          && lst_perfectly_nested_p (LST_SEQ (loop1)[0], loop2);
 460 }
 461
 462 /* Transform the loop nest between LOOP1 and LOOP2 into a perfect
 463    nest.  To continue the naming tradition, this function is called
 464    after perfect_nestify.  NEST is set to the perfectly nested loop
 465    that is created.  BEFORE/AFTER are set to the loops distributed
 466    before/after the loop NEST.  */
 467
 468 static void
 469 lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before,
 470                      lst_p *nest, lst_p *after)
 471 {
 472   poly_bb_p first, last;
 473
 474   gcc_assert (loop1 && loop2
 475               && loop1 != loop2
 476               && LST_LOOP_P (loop1) && LST_LOOP_P (loop2));
 477
 478   first = LST_PBB (lst_find_first_pbb (loop2));
 479   last = LST_PBB (lst_find_last_pbb (loop2));
 480
 481   *before = copy_lst (loop1);
 482   *nest = copy_lst (loop1);
 483   *after = copy_lst (loop1);
 484
 485   lst_remove_all_before_including_pbb (*before, first, false);
 486   lst_remove_all_before_including_pbb (*after, last, true);
 487
 488   lst_remove_all_before_excluding_pbb (*nest, first, true);
 489   lst_remove_all_before_excluding_pbb (*nest, last, false);
 490
 491   if (lst_empty_p (*before))
 492     {
 493       free_lst (*before);
 494       *before = NULL;
 495     }
 496   if (lst_empty_p (*after))
 497     {
 498       free_lst (*after);
 499       *after = NULL;
 500     }
 501   if (lst_empty_p (*nest))
 502     {
 503       free_lst (*nest);
 504       *nest = NULL;
 505     }
 506 }
 507
 508 /* Try to interchange LOOP1 with LOOP2 for all the statements of the
 509    body of LOOP2.  LOOP1 contains LOOP2.  Return true if it did the
 510    interchange.  */
 511
 512 static bool
 513 lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2)
 514 {
 515   int depth1 = lst_depth (loop1);
 516   int depth2 = lst_depth (loop2);
 517   lst_p transformed;
 518
 519   lst_p before = NULL, nest = NULL, after = NULL;
 520
 521   if (!lst_perfectly_nested_p (loop1, loop2))
 522     lst_perfect_nestify (loop1, loop2, &before, &nest, &after);
 523
 524   if (!lst_interchange_profitable_p (loop2, depth1, depth2))
 525     return false;
 526
 527   lst_apply_interchange (loop2, depth1, depth2);
 528
 529   /* Sync the transformed LST information and the PBB scatterings
 530      before using the scatterings in the data dependence analysis.  */
 531   if (before || nest || after)
 532     {
 533       transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1,
 534                                       before, nest, after);
 535       lst_update_scattering (transformed);
 536       free_lst (transformed);
 537     }
 538
 539   if (graphite_legal_transform (scop))
 540     {
 541       if (dump_file && (dump_flags & TDF_DETAILS))
 542         fprintf (dump_file,
 543                  "Loops at depths %d and %d will be interchanged.\n",
 544                  depth1, depth2);
 545
 546       /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP.  */
 547       lst_insert_in_sequence (before, loop1, true);
 548       lst_insert_in_sequence (after, loop1, false);
 549
 550       if (nest)
 551         {
 552           lst_replace (loop1, nest);
 553           free_lst (loop1);
 554         }
 555
 556       return true;
 557     }
 558
 559   /* Undo the transform.  */
 560   free_lst (before);
 561   free_lst (nest);
 562   free_lst (after);
 563   lst_apply_interchange (loop2, depth2, depth1);
 564   return false;
 565 }
 566
 567 /* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged
 568    with the loop OUTER in LST_SEQ (OUTER_FATHER).  */
 569
 570 static bool
 571 lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer,
 572                               lst_p inner_father)
 573 {
 574   int inner;
 575   lst_p loop1, loop2;
 576
 577   gcc_assert (outer_father
 578               && LST_LOOP_P (outer_father)
 579               && LST_LOOP_P (LST_SEQ (outer_father)[outer])
 580               && inner_father
 581               && LST_LOOP_P (inner_father));
 582
 583   loop1 = LST_SEQ (outer_father)[outer];
 584
 585   FOR_EACH_VEC_ELT (LST_SEQ (inner_father), inner, loop2)
 586     if (LST_LOOP_P (loop2)
 587         && (lst_try_interchange_loops (scop, loop1, loop2)
 588             || lst_interchange_select_inner (scop, outer_father, outer, loop2)))
 589       return true;
 590
 591   return false;
 592 }
 593
 594 /* Interchanges all the loops of LOOP and the loops of its body that
 595    are considered profitable to interchange.  Return the number of
 596    interchanged loops.  OUTER is the index in LST_SEQ (LOOP) that
 597    points to the next outer loop to be considered for interchange.  */
 598
 599 static int
 600 lst_interchange_select_outer (scop_p scop, lst_p loop, int outer)
 601 {
 602   lst_p l;
 603   int res = 0;
 604   int i = 0;
 605   lst_p father;
 606
 607   if (!loop || !LST_LOOP_P (loop))
 608     return 0;
 609
 610   father = LST_LOOP_FATHER (loop);
 611   if (father)
 612     {
 613       while (lst_interchange_select_inner (scop, father, outer, loop))
 614         {
 615           res++;
 616           loop = LST_SEQ (father)[outer];
 617         }
 618     }
 619
 620   if (LST_LOOP_P (loop))
 621     FOR_EACH_VEC_ELT (LST_SEQ (loop), i, l)
 622       if (LST_LOOP_P (l))
 623         res += lst_interchange_select_outer (scop, l, i);
 624
 625   return res;
 626 }
 627
 628 /* Interchanges all the loop depths that are considered profitable for
 629    SCOP.  Return the number of interchanged loops.  */
 630
 631 int
 632 scop_do_interchange (scop_p scop)
 633 {
 634   int res = lst_interchange_select_outer
 635     (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0);
 636
 637   lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop));
 638
 639   return res;
 640 }
 641
 642
 643 #endif
 644