gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /* This pass walks a given loop structure searching for array
  23    references.  The information about the array accesses is recorded
  24    in DATA_REFERENCE structures.
  25
  26    The basic test for determining the dependences is:
  27    given two access functions chrec1 and chrec2 to a same array, and
  28    x and y two vectors from the iteration domain, the same element of
  29    the array is accessed twice at iterations x and y if and only if:
  30    |             chrec1 (x) == chrec2 (y).
  31
  32    The goals of this analysis are:
  33
  34    - to determine the independence: the relation between two
  35      independent accesses is qualified with the chrec_known (this
  36      information allows a loop parallelization),
  37
  38    - when two data references access the same data, to qualify the
  39      dependence relation with classic dependence representations:
  40
  41        - distance vectors
  42        - direction vectors
  43        - loop carried level dependence
  44        - polyhedron dependence
  45      or with the chains of recurrences based representation,
  46
  47    - to define a knowledge base for storing the data dependence
  48      information,
  49
  50    - to define an interface to access this data.
  51
  52
  53    Definitions:
  54
  55    - subscript: given two array accesses a subscript is the tuple
  56    composed of the access functions for a given dimension.  Example:
  57    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  58    (f1, g1), (f2, g2), (f3, g3).
  59
  60    - Diophantine equation: an equation whose coefficients and
  61    solutions are integer constants, for example the equation
  62    |   3*x + 2*y = 1
  63    has an integer solution x = 1 and y = -1.
  64
  65    References:
  66
  67    - "Advanced Compilation for High Performance Computing" by Randy
  68    Allen and Ken Kennedy.
  69    http://citeseer.ist.psu.edu/goff91practical.html
  70
  71    - "Loop Transformations for Restructuring Compilers - The Foundations"
  72    by Utpal Banerjee.
  73
  74
  75 */
  76
  77 #include "config.h"
  78 #include "system.h"
  79 #include "coretypes.h"
  80 #include "gimple-pretty-print.h"
  81 #include "tree-flow.h"
  82 #include "cfgloop.h"
  83 #include "tree-data-ref.h"
  84 #include "tree-scalar-evolution.h"
  85 #include "tree-pass.h"
  86 #include "langhooks.h"
  87 #include "tree-affine.h"
  88 #include "params.h"
  89
  90 static struct datadep_stats
  91 {
  92   int num_dependence_tests;
  93   int num_dependence_dependent;
  94   int num_dependence_independent;
  95   int num_dependence_undetermined;
  96
  97   int num_subscript_tests;
  98   int num_subscript_undetermined;
  99   int num_same_subscript_function;
 100
 101   int num_ziv;
 102   int num_ziv_independent;
 103   int num_ziv_dependent;
 104   int num_ziv_unimplemented;
 105
 106   int num_siv;
 107   int num_siv_independent;
 108   int num_siv_dependent;
 109   int num_siv_unimplemented;
 110
 111   int num_miv;
 112   int num_miv_independent;
 113   int num_miv_dependent;
 114   int num_miv_unimplemented;
 115 } dependence_stats;
 116
 117 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 118                                            struct data_reference *,
 119                                            struct data_reference *,
 120                                            struct loop *);
 121 /* Returns true iff A divides B.  */
 122
 123 static inline bool
 124 tree_fold_divides_p (const_tree a, const_tree b)
 125 {
 126   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 127   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 128   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 129 }
 130
 131 /* Returns true iff A divides B.  */
 132
 133 static inline bool
 134 int_divides_p (int a, int b)
 135 {
 136   return ((b % a) == 0);
 137 }
 138
 139 \f
 140
 141 /* Dump into FILE all the data references from DATAREFS.  */
 142
 143 static void
 144 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
 145 {
 146   unsigned int i;
 147   struct data_reference *dr;
 148
 149   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
 150     dump_data_reference (file, dr);
 151 }
 152
 153 /* Dump into STDERR all the data references from DATAREFS.  */
 154
 155 DEBUG_FUNCTION void
 156 debug_data_references (VEC (data_reference_p, heap) *datarefs)
 157 {
 158   dump_data_references (stderr, datarefs);
 159 }
 160
 161 /* Print to STDERR the data_reference DR.  */
 162
 163 DEBUG_FUNCTION void
 164 debug_data_reference (struct data_reference *dr)
 165 {
 166   dump_data_reference (stderr, dr);
 167 }
 168
 169 /* Dump function for a DATA_REFERENCE structure.  */
 170
 171 void
 172 dump_data_reference (FILE *outf,
 173                      struct data_reference *dr)
 174 {
 175   unsigned int i;
 176
 177   fprintf (outf, "#(Data Ref: \n");
 178   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 179   fprintf (outf, "#  stmt: ");
 180   print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
 181   fprintf (outf, "#  ref: ");
 182   print_generic_stmt (outf, DR_REF (dr), 0);
 183   fprintf (outf, "#  base_object: ");
 184   print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
 185
 186   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 187     {
 188       fprintf (outf, "#  Access function %d: ", i);
 189       print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
 190     }
 191   fprintf (outf, "#)\n");
 192 }
 193
 194 /* Dumps the affine function described by FN to the file OUTF.  */
 195
 196 static void
 197 dump_affine_function (FILE *outf, affine_fn fn)
 198 {
 199   unsigned i;
 200   tree coef;
 201
 202   print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
 203   for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
 204     {
 205       fprintf (outf, " + ");
 206       print_generic_expr (outf, coef, TDF_SLIM);
 207       fprintf (outf, " * x_%u", i);
 208     }
 209 }
 210
 211 /* Dumps the conflict function CF to the file OUTF.  */
 212
 213 static void
 214 dump_conflict_function (FILE *outf, conflict_function *cf)
 215 {
 216   unsigned i;
 217
 218   if (cf->n == NO_DEPENDENCE)
 219     fprintf (outf, "no dependence\n");
 220   else if (cf->n == NOT_KNOWN)
 221     fprintf (outf, "not known\n");
 222   else
 223     {
 224       for (i = 0; i < cf->n; i++)
 225         {
 226           fprintf (outf, "[");
 227           dump_affine_function (outf, cf->fns[i]);
 228           fprintf (outf, "]\n");
 229         }
 230     }
 231 }
 232
 233 /* Dump function for a SUBSCRIPT structure.  */
 234
 235 static void
 236 dump_subscript (FILE *outf, struct subscript *subscript)
 237 {
 238   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 239
 240   fprintf (outf, "\n (subscript \n");
 241   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 242   dump_conflict_function (outf, cf);
 243   if (CF_NONTRIVIAL_P (cf))
 244     {
 245       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 246       fprintf (outf, "  last_conflict: ");
 247       print_generic_stmt (outf, last_iteration, 0);
 248     }
 249
 250   cf = SUB_CONFLICTS_IN_B (subscript);
 251   fprintf (outf, "  iterations_that_access_an_element_twice_in_B: ");
 252   dump_conflict_function (outf, cf);
 253   if (CF_NONTRIVIAL_P (cf))
 254     {
 255       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 256       fprintf (outf, "  last_conflict: ");
 257       print_generic_stmt (outf, last_iteration, 0);
 258     }
 259
 260   fprintf (outf, "  (Subscript distance: ");
 261   print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
 262   fprintf (outf, "  )\n");
 263   fprintf (outf, " )\n");
 264 }
 265
 266 /* Print the classic direction vector DIRV to OUTF.  */
 267
 268 static void
 269 print_direction_vector (FILE *outf,
 270                         lambda_vector dirv,
 271                         int length)
 272 {
 273   int eq;
 274
 275   for (eq = 0; eq < length; eq++)
 276     {
 277       enum data_dependence_direction dir = ((enum data_dependence_direction)
 278                                             dirv[eq]);
 279
 280       switch (dir)
 281         {
 282         case dir_positive:
 283           fprintf (outf, "    +");
 284           break;
 285         case dir_negative:
 286           fprintf (outf, "    -");
 287           break;
 288         case dir_equal:
 289           fprintf (outf, "    =");
 290           break;
 291         case dir_positive_or_equal:
 292           fprintf (outf, "   +=");
 293           break;
 294         case dir_positive_or_negative:
 295           fprintf (outf, "   +-");
 296           break;
 297         case dir_negative_or_equal:
 298           fprintf (outf, "   -=");
 299           break;
 300         case dir_star:
 301           fprintf (outf, "    *");
 302           break;
 303         default:
 304           fprintf (outf, "indep");
 305           break;
 306         }
 307     }
 308   fprintf (outf, "\n");
 309 }
 310
 311 /* Print a vector of direction vectors.  */
 312
 313 static void
 314 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
 315                    int length)
 316 {
 317   unsigned j;
 318   lambda_vector v;
 319
 320   FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
 321     print_direction_vector (outf, v, length);
 322 }
 323
 324 /* Print out a vector VEC of length N to OUTFILE.  */
 325
 326 static inline void
 327 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 328 {
 329   int i;
 330
 331   for (i = 0; i < n; i++)
 332     fprintf (outfile, "%3d ", vector[i]);
 333   fprintf (outfile, "\n");
 334 }
 335
 336 /* Print a vector of distance vectors.  */
 337
 338 static void
 339 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
 340                     int length)
 341 {
 342   unsigned j;
 343   lambda_vector v;
 344
 345   FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
 346     print_lambda_vector (outf, v, length);
 347 }
 348
 349 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 350
 351 static void
 352 dump_data_dependence_relation (FILE *outf,
 353                                struct data_dependence_relation *ddr)
 354 {
 355   struct data_reference *dra, *drb;
 356
 357   fprintf (outf, "(Data Dep: \n");
 358
 359   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 360     {
 361       if (ddr)
 362         {
 363           dra = DDR_A (ddr);
 364           drb = DDR_B (ddr);
 365           if (dra)
 366             dump_data_reference (outf, dra);
 367           else
 368             fprintf (outf, "    (nil)\n");
 369           if (drb)
 370             dump_data_reference (outf, drb);
 371           else
 372             fprintf (outf, "    (nil)\n");
 373         }
 374       fprintf (outf, "    (don't know)\n)\n");
 375       return;
 376     }
 377
 378   dra = DDR_A (ddr);
 379   drb = DDR_B (ddr);
 380   dump_data_reference (outf, dra);
 381   dump_data_reference (outf, drb);
 382
 383   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 384     fprintf (outf, "    (no dependence)\n");
 385
 386   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 387     {
 388       unsigned int i;
 389       struct loop *loopi;
 390
 391       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
 392         {
 393           fprintf (outf, "  access_fn_A: ");
 394           print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
 395           fprintf (outf, "  access_fn_B: ");
 396           print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
 397           dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
 398         }
 399
 400       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 401       fprintf (outf, "  loop nest: (");
 402       FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
 403         fprintf (outf, "%d ", loopi->num);
 404       fprintf (outf, ")\n");
 405
 406       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 407         {
 408           fprintf (outf, "  distance_vector: ");
 409           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 410                                DDR_NB_LOOPS (ddr));
 411         }
 412
 413       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 414         {
 415           fprintf (outf, "  direction_vector: ");
 416           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 417                                   DDR_NB_LOOPS (ddr));
 418         }
 419     }
 420
 421   fprintf (outf, ")\n");
 422 }
 423
 424 /* Debug version.  */
 425
 426 DEBUG_FUNCTION void
 427 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 428 {
 429   dump_data_dependence_relation (stderr, ddr);
 430 }
 431
 432 /* Dump into FILE all the dependence relations from DDRS.  */
 433
 434 void
 435 dump_data_dependence_relations (FILE *file,
 436                                 VEC (ddr_p, heap) *ddrs)
 437 {
 438   unsigned int i;
 439   struct data_dependence_relation *ddr;
 440
 441   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
 442     dump_data_dependence_relation (file, ddr);
 443 }
 444
 445 /* Dump to STDERR all the dependence relations from DDRS.  */
 446
 447 DEBUG_FUNCTION void
 448 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
 449 {
 450   dump_data_dependence_relations (stderr, ddrs);
 451 }
 452
 453 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 454    the dependence relations, and VECT_SIZE is the size of the
 455    dependence vectors, or in other words the number of loops in the
 456    considered nest.  */
 457
 458 static void
 459 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
 460 {
 461   unsigned int i, j;
 462   struct data_dependence_relation *ddr;
 463   lambda_vector v;
 464
 465   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
 466     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 467       {
 468         FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
 469           {
 470             fprintf (file, "DISTANCE_V (");
 471             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 472             fprintf (file, ")\n");
 473           }
 474
 475         FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
 476           {
 477             fprintf (file, "DIRECTION_V (");
 478             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 479             fprintf (file, ")\n");
 480           }
 481       }
 482
 483   fprintf (file, "\n\n");
 484 }
 485
 486 /* Dumps the data dependence relations DDRS in FILE.  */
 487
 488 static void
 489 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
 490 {
 491   unsigned int i;
 492   struct data_dependence_relation *ddr;
 493
 494   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
 495     dump_data_dependence_relation (file, ddr);
 496
 497   fprintf (file, "\n\n");
 498 }
 499
 500 DEBUG_FUNCTION void
 501 debug_ddrs (VEC (ddr_p, heap) *ddrs)
 502 {
 503   dump_ddrs (stderr, ddrs);
 504 }
 505
 506 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 507    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 508    constant of type ssizetype, and returns true.  If we cannot do this
 509    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 510    is returned.  */
 511
 512 static bool
 513 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 514                          tree *var, tree *off)
 515 {
 516   tree var0, var1;
 517   tree off0, off1;
 518   enum tree_code ocode = code;
 519
 520   *var = NULL_TREE;
 521   *off = NULL_TREE;
 522
 523   switch (code)
 524     {
 525     case INTEGER_CST:
 526       *var = build_int_cst (type, 0);
 527       *off = fold_convert (ssizetype, op0);
 528       return true;
 529
 530     case POINTER_PLUS_EXPR:
 531       ocode = PLUS_EXPR;
 532       /* FALLTHROUGH */
 533     case PLUS_EXPR:
 534     case MINUS_EXPR:
 535       split_constant_offset (op0, &var0, &off0);
 536       split_constant_offset (op1, &var1, &off1);
 537       *var = fold_build2 (code, type, var0, var1);
 538       *off = size_binop (ocode, off0, off1);
 539       return true;
 540
 541     case MULT_EXPR:
 542       if (TREE_CODE (op1) != INTEGER_CST)
 543         return false;
 544
 545       split_constant_offset (op0, &var0, &off0);
 546       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 547       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 548       return true;
 549
 550     case ADDR_EXPR:
 551       {
 552         tree base, poffset;
 553         HOST_WIDE_INT pbitsize, pbitpos;
 554         enum machine_mode pmode;
 555         int punsignedp, pvolatilep;
 556
 557         op0 = TREE_OPERAND (op0, 0);
 558         base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
 559                                     &pmode, &punsignedp, &pvolatilep, false);
 560
 561         if (pbitpos % BITS_PER_UNIT != 0)
 562           return false;
 563         base = build_fold_addr_expr (base);
 564         off0 = ssize_int (pbitpos / BITS_PER_UNIT);
 565
 566         if (poffset)
 567           {
 568             split_constant_offset (poffset, &poffset, &off1);
 569             off0 = size_binop (PLUS_EXPR, off0, off1);
 570             if (POINTER_TYPE_P (TREE_TYPE (base)))
 571               base = fold_build_pointer_plus (base, poffset);
 572             else
 573               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 574                                   fold_convert (TREE_TYPE (base), poffset));
 575           }
 576
 577         var0 = fold_convert (type, base);
 578
 579         /* If variable length types are involved, punt, otherwise casts
 580            might be converted into ARRAY_REFs in gimplify_conversion.
 581            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 582            possibly no longer appears in current GIMPLE, might resurface.
 583            This perhaps could run
 584            if (CONVERT_EXPR_P (var0))
 585              {
 586                gimplify_conversion (&var0);
 587                // Attempt to fill in any within var0 found ARRAY_REF's
 588                // element size from corresponding op embedded ARRAY_REF,
 589                // if unsuccessful, just punt.
 590              }  */
 591         while (POINTER_TYPE_P (type))
 592           type = TREE_TYPE (type);
 593         if (int_size_in_bytes (type) < 0)
 594           return false;
 595
 596         *var = var0;
 597         *off = off0;
 598         return true;
 599       }
 600
 601     case SSA_NAME:
 602       {
 603         gimple def_stmt = SSA_NAME_DEF_STMT (op0);
 604         enum tree_code subcode;
 605
 606         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 607           return false;
 608
 609         var0 = gimple_assign_rhs1 (def_stmt);
 610         subcode = gimple_assign_rhs_code (def_stmt);
 611         var1 = gimple_assign_rhs2 (def_stmt);
 612
 613         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 614       }
 615     CASE_CONVERT:
 616       {
 617         /* We must not introduce undefined overflow, and we must not change the value.
 618            Hence we're okay if the inner type doesn't overflow to start with
 619            (pointer or signed), the outer type also is an integer or pointer
 620            and the outer precision is at least as large as the inner.  */
 621         tree itype = TREE_TYPE (op0);
 622         if ((POINTER_TYPE_P (itype)
 623              || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
 624             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 625             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 626           {
 627             split_constant_offset (op0, &var0, off);
 628             *var = fold_convert (type, var0);
 629             return true;
 630           }
 631         return false;
 632       }
 633
 634     default:
 635       return false;
 636     }
 637 }
 638
 639 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 640    will be ssizetype.  */
 641
 642 void
 643 split_constant_offset (tree exp, tree *var, tree *off)
 644 {
 645   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
 646   enum tree_code code;
 647
 648   *var = exp;
 649   *off = ssize_int (0);
 650   STRIP_NOPS (exp);
 651
 652   if (tree_is_chrec (exp)
 653       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 654     return;
 655
 656   otype = TREE_TYPE (exp);
 657   code = TREE_CODE (exp);
 658   extract_ops_from_tree (exp, &code, &op0, &op1);
 659   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
 660     {
 661       *var = fold_convert (type, e);
 662       *off = o;
 663     }
 664 }
 665
 666 /* Returns the address ADDR of an object in a canonical shape (without nop
 667    casts, and with type of pointer to the object).  */
 668
 669 static tree
 670 canonicalize_base_object_address (tree addr)
 671 {
 672   tree orig = addr;
 673
 674   STRIP_NOPS (addr);
 675
 676   /* The base address may be obtained by casting from integer, in that case
 677      keep the cast.  */
 678   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 679     return orig;
 680
 681   if (TREE_CODE (addr) != ADDR_EXPR)
 682     return addr;
 683
 684   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 685 }
 686
 687 /* Analyzes the behavior of the memory reference DR in the innermost loop or
 688    basic block that contains it.  Returns true if analysis succeed or false
 689    otherwise.  */
 690
 691 bool
 692 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
 693 {
 694   gimple stmt = DR_STMT (dr);
 695   struct loop *loop = loop_containing_stmt (stmt);
 696   tree ref = DR_REF (dr);
 697   HOST_WIDE_INT pbitsize, pbitpos;
 698   tree base, poffset;
 699   enum machine_mode pmode;
 700   int punsignedp, pvolatilep;
 701   affine_iv base_iv, offset_iv;
 702   tree init, dinit, step;
 703   bool in_loop = (loop && loop->num);
 704
 705   if (dump_file && (dump_flags & TDF_DETAILS))
 706     fprintf (dump_file, "analyze_innermost: ");
 707
 708   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
 709                               &pmode, &punsignedp, &pvolatilep, false);
 710   gcc_assert (base != NULL_TREE);
 711
 712   if (pbitpos % BITS_PER_UNIT != 0)
 713     {
 714       if (dump_file && (dump_flags & TDF_DETAILS))
 715         fprintf (dump_file, "failed: bit offset alignment.\n");
 716       return false;
 717     }
 718
 719   if (TREE_CODE (base) == MEM_REF)
 720     {
 721       if (!integer_zerop (TREE_OPERAND (base, 1)))
 722         {
 723           if (!poffset)
 724             {
 725               double_int moff = mem_ref_offset (base);
 726               poffset = double_int_to_tree (sizetype, moff);
 727             }
 728           else
 729             poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
 730         }
 731       base = TREE_OPERAND (base, 0);
 732     }
 733   else
 734     base = build_fold_addr_expr (base);
 735
 736   if (in_loop)
 737     {
 738       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
 739                       nest ? true : false))
 740         {
 741           if (nest)
 742             {
 743               if (dump_file && (dump_flags & TDF_DETAILS))
 744                 fprintf (dump_file, "failed: evolution of base is not"
 745                                     " affine.\n");
 746               return false;
 747             }
 748           else
 749             {
 750               base_iv.base = base;
 751               base_iv.step = ssize_int (0);
 752               base_iv.no_overflow = true;
 753             }
 754         }
 755     }
 756   else
 757     {
 758       base_iv.base = base;
 759       base_iv.step = ssize_int (0);
 760       base_iv.no_overflow = true;
 761     }
 762
 763   if (!poffset)
 764     {
 765       offset_iv.base = ssize_int (0);
 766       offset_iv.step = ssize_int (0);
 767     }
 768   else
 769     {
 770       if (!in_loop)
 771         {
 772           offset_iv.base = poffset;
 773           offset_iv.step = ssize_int (0);
 774         }
 775       else if (!simple_iv (loop, loop_containing_stmt (stmt),
 776                            poffset, &offset_iv,
 777                            nest ? true : false))
 778         {
 779           if (nest)
 780             {
 781               if (dump_file && (dump_flags & TDF_DETAILS))
 782                 fprintf (dump_file, "failed: evolution of offset is not"
 783                                     " affine.\n");
 784               return false;
 785             }
 786           else
 787             {
 788               offset_iv.base = poffset;
 789               offset_iv.step = ssize_int (0);
 790             }
 791         }
 792     }
 793
 794   init = ssize_int (pbitpos / BITS_PER_UNIT);
 795   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 796   init =  size_binop (PLUS_EXPR, init, dinit);
 797   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 798   init =  size_binop (PLUS_EXPR, init, dinit);
 799
 800   step = size_binop (PLUS_EXPR,
 801                      fold_convert (ssizetype, base_iv.step),
 802                      fold_convert (ssizetype, offset_iv.step));
 803
 804   DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
 805
 806   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
 807   DR_INIT (dr) = init;
 808   DR_STEP (dr) = step;
 809
 810   DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
 811
 812   if (dump_file && (dump_flags & TDF_DETAILS))
 813     fprintf (dump_file, "success.\n");
 814
 815   return true;
 816 }
 817
 818 /* Determines the base object and the list of indices of memory reference
 819    DR, analyzed in LOOP and instantiated in loop nest NEST.  */
 820
 821 static void
 822 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
 823 {
 824   VEC (tree, heap) *access_fns = NULL;
 825   tree ref, op;
 826   tree base, off, access_fn;
 827   basic_block before_loop;
 828
 829   /* If analyzing a basic-block there are no indices to analyze
 830      and thus no access functions.  */
 831   if (!nest)
 832     {
 833       DR_BASE_OBJECT (dr) = DR_REF (dr);
 834       DR_ACCESS_FNS (dr) = NULL;
 835       return;
 836     }
 837
 838   ref = DR_REF (dr);
 839   before_loop = block_before_loop (nest);
 840
 841   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
 842      into a two element array with a constant index.  The base is
 843      then just the immediate underlying object.  */
 844   if (TREE_CODE (ref) == REALPART_EXPR)
 845     {
 846       ref = TREE_OPERAND (ref, 0);
 847       VEC_safe_push (tree, heap, access_fns, integer_zero_node);
 848     }
 849   else if (TREE_CODE (ref) == IMAGPART_EXPR)
 850     {
 851       ref = TREE_OPERAND (ref, 0);
 852       VEC_safe_push (tree, heap, access_fns, integer_one_node);
 853     }
 854
 855   /* Analyze access functions of dimensions we know to be independent.  */
 856   while (handled_component_p (ref))
 857     {
 858       if (TREE_CODE (ref) == ARRAY_REF)
 859         {
 860           op = TREE_OPERAND (ref, 1);
 861           access_fn = analyze_scalar_evolution (loop, op);
 862           access_fn = instantiate_scev (before_loop, loop, access_fn);
 863           VEC_safe_push (tree, heap, access_fns, access_fn);
 864         }
 865       else if (TREE_CODE (ref) == COMPONENT_REF
 866                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
 867         {
 868           /* For COMPONENT_REFs of records (but not unions!) use the
 869              FIELD_DECL offset as constant access function so we can
 870              disambiguate a[i].f1 and a[i].f2.  */
 871           tree off = component_ref_field_offset (ref);
 872           off = size_binop (PLUS_EXPR,
 873                             size_binop (MULT_EXPR,
 874                                         fold_convert (bitsizetype, off),
 875                                         bitsize_int (BITS_PER_UNIT)),
 876                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
 877           VEC_safe_push (tree, heap, access_fns, off);
 878         }
 879       else
 880         /* If we have an unhandled component we could not translate
 881            to an access function stop analyzing.  We have determined
 882            our base object in this case.  */
 883         break;
 884
 885       ref = TREE_OPERAND (ref, 0);
 886     }
 887
 888   /* If the address operand of a MEM_REF base has an evolution in the
 889      analyzed nest, add it as an additional independent access-function.  */
 890   if (TREE_CODE (ref) == MEM_REF)
 891     {
 892       op = TREE_OPERAND (ref, 0);
 893       access_fn = analyze_scalar_evolution (loop, op);
 894       access_fn = instantiate_scev (before_loop, loop, access_fn);
 895       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
 896         {
 897           tree orig_type;
 898           tree memoff = TREE_OPERAND (ref, 1);
 899           base = initial_condition (access_fn);
 900           orig_type = TREE_TYPE (base);
 901           STRIP_USELESS_TYPE_CONVERSION (base);
 902           split_constant_offset (base, &base, &off);
 903           /* Fold the MEM_REF offset into the evolutions initial
 904              value to make more bases comparable.  */
 905           if (!integer_zerop (memoff))
 906             {
 907               off = size_binop (PLUS_EXPR, off,
 908                                 fold_convert (ssizetype, memoff));
 909               memoff = build_int_cst (TREE_TYPE (memoff), 0);
 910             }
 911           access_fn = chrec_replace_initial_condition
 912               (access_fn, fold_convert (orig_type, off));
 913           /* ???  This is still not a suitable base object for
 914              dr_may_alias_p - the base object needs to be an
 915              access that covers the object as whole.  With
 916              an evolution in the pointer this cannot be
 917              guaranteed.
 918              As a band-aid, mark the access so we can special-case
 919              it in dr_may_alias_p.  */
 920           ref = fold_build2_loc (EXPR_LOCATION (ref),
 921                                  MEM_REF, TREE_TYPE (ref),
 922                                  base, memoff);
 923           DR_UNCONSTRAINED_BASE (dr) = true;
 924           VEC_safe_push (tree, heap, access_fns, access_fn);
 925         }
 926     }
 927   else if (DECL_P (ref))
 928     {
 929       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
 930       ref = build2 (MEM_REF, TREE_TYPE (ref),
 931                     build_fold_addr_expr (ref),
 932                     build_int_cst (reference_alias_ptr_type (ref), 0));
 933     }
 934
 935   DR_BASE_OBJECT (dr) = ref;
 936   DR_ACCESS_FNS (dr) = access_fns;
 937 }
 938
 939 /* Extracts the alias analysis information from the memory reference DR.  */
 940
 941 static void
 942 dr_analyze_alias (struct data_reference *dr)
 943 {
 944   tree ref = DR_REF (dr);
 945   tree base = get_base_address (ref), addr;
 946
 947   if (INDIRECT_REF_P (base)
 948       || TREE_CODE (base) == MEM_REF)
 949     {
 950       addr = TREE_OPERAND (base, 0);
 951       if (TREE_CODE (addr) == SSA_NAME)
 952         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
 953     }
 954 }
 955
 956 /* Frees data reference DR.  */
 957
 958 void
 959 free_data_ref (data_reference_p dr)
 960 {
 961   VEC_free (tree, heap, DR_ACCESS_FNS (dr));
 962   free (dr);
 963 }
 964
 965 /* Analyzes memory reference MEMREF accessed in STMT.  The reference
 966    is read if IS_READ is true, write otherwise.  Returns the
 967    data_reference description of MEMREF.  NEST is the outermost loop
 968    in which the reference should be instantiated, LOOP is the loop in
 969    which the data reference should be analyzed.  */
 970
 971 struct data_reference *
 972 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
 973                  bool is_read)
 974 {
 975   struct data_reference *dr;
 976
 977   if (dump_file && (dump_flags & TDF_DETAILS))
 978     {
 979       fprintf (dump_file, "Creating dr for ");
 980       print_generic_expr (dump_file, memref, TDF_SLIM);
 981       fprintf (dump_file, "\n");
 982     }
 983
 984   dr = XCNEW (struct data_reference);
 985   DR_STMT (dr) = stmt;
 986   DR_REF (dr) = memref;
 987   DR_IS_READ (dr) = is_read;
 988
 989   dr_analyze_innermost (dr, nest);
 990   dr_analyze_indices (dr, nest, loop);
 991   dr_analyze_alias (dr);
 992
 993   if (dump_file && (dump_flags & TDF_DETAILS))
 994     {
 995       unsigned i;
 996       fprintf (dump_file, "\tbase_address: ");
 997       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
 998       fprintf (dump_file, "\n\toffset from base address: ");
 999       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1000       fprintf (dump_file, "\n\tconstant offset from base address: ");
1001       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1002       fprintf (dump_file, "\n\tstep: ");
1003       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1004       fprintf (dump_file, "\n\taligned to: ");
1005       print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1006       fprintf (dump_file, "\n\tbase_object: ");
1007       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1008       fprintf (dump_file, "\n");
1009       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1010         {
1011           fprintf (dump_file, "\tAccess function %d: ", i);
1012           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1013         }
1014     }
1015
1016   return dr;
1017 }
1018
1019 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1020    expressions.  */
1021 static bool
1022 dr_equal_offsets_p1 (tree offset1, tree offset2)
1023 {
1024   bool res;
1025
1026   STRIP_NOPS (offset1);
1027   STRIP_NOPS (offset2);
1028
1029   if (offset1 == offset2)
1030     return true;
1031
1032   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1033       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1034     return false;
1035
1036   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1037                              TREE_OPERAND (offset2, 0));
1038
1039   if (!res || !BINARY_CLASS_P (offset1))
1040     return res;
1041
1042   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1043                              TREE_OPERAND (offset2, 1));
1044
1045   return res;
1046 }
1047
1048 /* Check if DRA and DRB have equal offsets.  */
1049 bool
1050 dr_equal_offsets_p (struct data_reference *dra,
1051                     struct data_reference *drb)
1052 {
1053   tree offset1, offset2;
1054
1055   offset1 = DR_OFFSET (dra);
1056   offset2 = DR_OFFSET (drb);
1057
1058   return dr_equal_offsets_p1 (offset1, offset2);
1059 }
1060
1061 /* Returns true if FNA == FNB.  */
1062
1063 static bool
1064 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1065 {
1066   unsigned i, n = VEC_length (tree, fna);
1067
1068   if (n != VEC_length (tree, fnb))
1069     return false;
1070
1071   for (i = 0; i < n; i++)
1072     if (!operand_equal_p (VEC_index (tree, fna, i),
1073                           VEC_index (tree, fnb, i), 0))
1074       return false;
1075
1076   return true;
1077 }
1078
1079 /* If all the functions in CF are the same, returns one of them,
1080    otherwise returns NULL.  */
1081
1082 static affine_fn
1083 common_affine_function (conflict_function *cf)
1084 {
1085   unsigned i;
1086   affine_fn comm;
1087
1088   if (!CF_NONTRIVIAL_P (cf))
1089     return NULL;
1090
1091   comm = cf->fns[0];
1092
1093   for (i = 1; i < cf->n; i++)
1094     if (!affine_function_equal_p (comm, cf->fns[i]))
1095       return NULL;
1096
1097   return comm;
1098 }
1099
1100 /* Returns the base of the affine function FN.  */
1101
1102 static tree
1103 affine_function_base (affine_fn fn)
1104 {
1105   return VEC_index (tree, fn, 0);
1106 }
1107
1108 /* Returns true if FN is a constant.  */
1109
1110 static bool
1111 affine_function_constant_p (affine_fn fn)
1112 {
1113   unsigned i;
1114   tree coef;
1115
1116   for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1117     if (!integer_zerop (coef))
1118       return false;
1119
1120   return true;
1121 }
1122
1123 /* Returns true if FN is the zero constant function.  */
1124
1125 static bool
1126 affine_function_zero_p (affine_fn fn)
1127 {
1128   return (integer_zerop (affine_function_base (fn))
1129           && affine_function_constant_p (fn));
1130 }
1131
1132 /* Returns a signed integer type with the largest precision from TA
1133    and TB.  */
1134
1135 static tree
1136 signed_type_for_types (tree ta, tree tb)
1137 {
1138   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1139     return signed_type_for (ta);
1140   else
1141     return signed_type_for (tb);
1142 }
1143
1144 /* Applies operation OP on affine functions FNA and FNB, and returns the
1145    result.  */
1146
1147 static affine_fn
1148 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1149 {
1150   unsigned i, n, m;
1151   affine_fn ret;
1152   tree coef;
1153
1154   if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1155     {
1156       n = VEC_length (tree, fna);
1157       m = VEC_length (tree, fnb);
1158     }
1159   else
1160     {
1161       n = VEC_length (tree, fnb);
1162       m = VEC_length (tree, fna);
1163     }
1164
1165   ret = VEC_alloc (tree, heap, m);
1166   for (i = 0; i < n; i++)
1167     {
1168       tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1169                                          TREE_TYPE (VEC_index (tree, fnb, i)));
1170
1171       VEC_quick_push (tree, ret,
1172                       fold_build2 (op, type,
1173                                    VEC_index (tree, fna, i),
1174                                    VEC_index (tree, fnb, i)));
1175     }
1176
1177   for (; VEC_iterate (tree, fna, i, coef); i++)
1178     VEC_quick_push (tree, ret,
1179                     fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1180                                  coef, integer_zero_node));
1181   for (; VEC_iterate (tree, fnb, i, coef); i++)
1182     VEC_quick_push (tree, ret,
1183                     fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1184                                  integer_zero_node, coef));
1185
1186   return ret;
1187 }
1188
1189 /* Returns the sum of affine functions FNA and FNB.  */
1190
1191 static affine_fn
1192 affine_fn_plus (affine_fn fna, affine_fn fnb)
1193 {
1194   return affine_fn_op (PLUS_EXPR, fna, fnb);
1195 }
1196
1197 /* Returns the difference of affine functions FNA and FNB.  */
1198
1199 static affine_fn
1200 affine_fn_minus (affine_fn fna, affine_fn fnb)
1201 {
1202   return affine_fn_op (MINUS_EXPR, fna, fnb);
1203 }
1204
1205 /* Frees affine function FN.  */
1206
1207 static void
1208 affine_fn_free (affine_fn fn)
1209 {
1210   VEC_free (tree, heap, fn);
1211 }
1212
1213 /* Determine for each subscript in the data dependence relation DDR
1214    the distance.  */
1215
1216 static void
1217 compute_subscript_distance (struct data_dependence_relation *ddr)
1218 {
1219   conflict_function *cf_a, *cf_b;
1220   affine_fn fn_a, fn_b, diff;
1221
1222   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1223     {
1224       unsigned int i;
1225
1226       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1227         {
1228           struct subscript *subscript;
1229
1230           subscript = DDR_SUBSCRIPT (ddr, i);
1231           cf_a = SUB_CONFLICTS_IN_A (subscript);
1232           cf_b = SUB_CONFLICTS_IN_B (subscript);
1233
1234           fn_a = common_affine_function (cf_a);
1235           fn_b = common_affine_function (cf_b);
1236           if (!fn_a || !fn_b)
1237             {
1238               SUB_DISTANCE (subscript) = chrec_dont_know;
1239               return;
1240             }
1241           diff = affine_fn_minus (fn_a, fn_b);
1242
1243           if (affine_function_constant_p (diff))
1244             SUB_DISTANCE (subscript) = affine_function_base (diff);
1245           else
1246             SUB_DISTANCE (subscript) = chrec_dont_know;
1247
1248           affine_fn_free (diff);
1249         }
1250     }
1251 }
1252
1253 /* Returns the conflict function for "unknown".  */
1254
1255 static conflict_function *
1256 conflict_fn_not_known (void)
1257 {
1258   conflict_function *fn = XCNEW (conflict_function);
1259   fn->n = NOT_KNOWN;
1260
1261   return fn;
1262 }
1263
1264 /* Returns the conflict function for "independent".  */
1265
1266 static conflict_function *
1267 conflict_fn_no_dependence (void)
1268 {
1269   conflict_function *fn = XCNEW (conflict_function);
1270   fn->n = NO_DEPENDENCE;
1271
1272   return fn;
1273 }
1274
1275 /* Returns true if the address of OBJ is invariant in LOOP.  */
1276
1277 static bool
1278 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1279 {
1280   while (handled_component_p (obj))
1281     {
1282       if (TREE_CODE (obj) == ARRAY_REF)
1283         {
1284           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1285              need to check the stride and the lower bound of the reference.  */
1286           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1287                                                       loop->num)
1288               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1289                                                          loop->num))
1290             return false;
1291         }
1292       else if (TREE_CODE (obj) == COMPONENT_REF)
1293         {
1294           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1295                                                       loop->num))
1296             return false;
1297         }
1298       obj = TREE_OPERAND (obj, 0);
1299     }
1300
1301   if (!INDIRECT_REF_P (obj)
1302       && TREE_CODE (obj) != MEM_REF)
1303     return true;
1304
1305   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1306                                                   loop->num);
1307 }
1308
1309 /* Returns false if we can prove that data references A and B do not alias,
1310    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
1311    considered.  */
1312
1313 bool
1314 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1315                 bool loop_nest)
1316 {
1317   tree addr_a = DR_BASE_OBJECT (a);
1318   tree addr_b = DR_BASE_OBJECT (b);
1319
1320   /* If we are not processing a loop nest but scalar code we
1321      do not need to care about possible cross-iteration dependences
1322      and thus can process the full original reference.  Do so,
1323      similar to how loop invariant motion applies extra offset-based
1324      disambiguation.  */
1325   if (!loop_nest)
1326     {
1327       aff_tree off1, off2;
1328       double_int size1, size2;
1329       get_inner_reference_aff (DR_REF (a), &off1, &size1);
1330       get_inner_reference_aff (DR_REF (b), &off2, &size2);
1331       aff_combination_scale (&off1, double_int_minus_one);
1332       aff_combination_add (&off2, &off1);
1333       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1334         return false;
1335     }
1336
1337   /* If we had an evolution in a MEM_REF BASE_OBJECT we do not know
1338      the size of the base-object.  So we cannot do any offset/overlap
1339      based analysis but have to rely on points-to information only.  */
1340   if (TREE_CODE (addr_a) == MEM_REF
1341       && DR_UNCONSTRAINED_BASE (a))
1342     {
1343       if (TREE_CODE (addr_b) == MEM_REF
1344           && DR_UNCONSTRAINED_BASE (b))
1345         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1346                                        TREE_OPERAND (addr_b, 0));
1347       else
1348         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1349                                        build_fold_addr_expr (addr_b));
1350     }
1351   else if (TREE_CODE (addr_b) == MEM_REF
1352            && DR_UNCONSTRAINED_BASE (b))
1353     return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1354                                    TREE_OPERAND (addr_b, 0));
1355
1356   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1357      that is being subsetted in the loop nest.  */
1358   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1359     return refs_output_dependent_p (addr_a, addr_b);
1360   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1361     return refs_anti_dependent_p (addr_a, addr_b);
1362   return refs_may_alias_p (addr_a, addr_b);
1363 }
1364
1365 /* Initialize a data dependence relation between data accesses A and
1366    B.  NB_LOOPS is the number of loops surrounding the references: the
1367    size of the classic distance/direction vectors.  */
1368
1369 struct data_dependence_relation *
1370 initialize_data_dependence_relation (struct data_reference *a,
1371                                      struct data_reference *b,
1372                                      VEC (loop_p, heap) *loop_nest)
1373 {
1374   struct data_dependence_relation *res;
1375   unsigned int i;
1376
1377   res = XNEW (struct data_dependence_relation);
1378   DDR_A (res) = a;
1379   DDR_B (res) = b;
1380   DDR_LOOP_NEST (res) = NULL;
1381   DDR_REVERSED_P (res) = false;
1382   DDR_SUBSCRIPTS (res) = NULL;
1383   DDR_DIR_VECTS (res) = NULL;
1384   DDR_DIST_VECTS (res) = NULL;
1385
1386   if (a == NULL || b == NULL)
1387     {
1388       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1389       return res;
1390     }
1391
1392   /* If the data references do not alias, then they are independent.  */
1393   if (!dr_may_alias_p (a, b, loop_nest != NULL))
1394     {
1395       DDR_ARE_DEPENDENT (res) = chrec_known;
1396       return res;
1397     }
1398
1399   /* The case where the references are exactly the same.  */
1400   if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1401     {
1402      if (loop_nest
1403         && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1404                                                 DR_BASE_OBJECT (a)))
1405       {
1406         DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1407         return res;
1408       }
1409       DDR_AFFINE_P (res) = true;
1410       DDR_ARE_DEPENDENT (res) = NULL_TREE;
1411       DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1412       DDR_LOOP_NEST (res) = loop_nest;
1413       DDR_INNER_LOOP (res) = 0;
1414       DDR_SELF_REFERENCE (res) = true;
1415       for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1416        {
1417          struct subscript *subscript;
1418
1419          subscript = XNEW (struct subscript);
1420          SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1421          SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1422          SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1423          SUB_DISTANCE (subscript) = chrec_dont_know;
1424          VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1425        }
1426       return res;
1427     }
1428
1429   /* If the references do not access the same object, we do not know
1430      whether they alias or not.  */
1431   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1432     {
1433       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1434       return res;
1435     }
1436
1437   /* If the base of the object is not invariant in the loop nest, we cannot
1438      analyze it.  TODO -- in fact, it would suffice to record that there may
1439      be arbitrary dependences in the loops where the base object varies.  */
1440   if (loop_nest
1441       && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1442                                               DR_BASE_OBJECT (a)))
1443     {
1444       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1445       return res;
1446     }
1447
1448   /* If the number of dimensions of the access to not agree we can have
1449      a pointer access to a component of the array element type and an
1450      array access while the base-objects are still the same.  Punt.  */
1451   if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1452     {
1453       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1454       return res;
1455     }
1456
1457   DDR_AFFINE_P (res) = true;
1458   DDR_ARE_DEPENDENT (res) = NULL_TREE;
1459   DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1460   DDR_LOOP_NEST (res) = loop_nest;
1461   DDR_INNER_LOOP (res) = 0;
1462   DDR_SELF_REFERENCE (res) = false;
1463
1464   for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1465     {
1466       struct subscript *subscript;
1467
1468       subscript = XNEW (struct subscript);
1469       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1470       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1471       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1472       SUB_DISTANCE (subscript) = chrec_dont_know;
1473       VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1474     }
1475
1476   return res;
1477 }
1478
1479 /* Frees memory used by the conflict function F.  */
1480
1481 static void
1482 free_conflict_function (conflict_function *f)
1483 {
1484   unsigned i;
1485
1486   if (CF_NONTRIVIAL_P (f))
1487     {
1488       for (i = 0; i < f->n; i++)
1489         affine_fn_free (f->fns[i]);
1490     }
1491   free (f);
1492 }
1493
1494 /* Frees memory used by SUBSCRIPTS.  */
1495
1496 static void
1497 free_subscripts (VEC (subscript_p, heap) *subscripts)
1498 {
1499   unsigned i;
1500   subscript_p s;
1501
1502   FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1503     {
1504       free_conflict_function (s->conflicting_iterations_in_a);
1505       free_conflict_function (s->conflicting_iterations_in_b);
1506       free (s);
1507     }
1508   VEC_free (subscript_p, heap, subscripts);
1509 }
1510
1511 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1512    description.  */
1513
1514 static inline void
1515 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1516                         tree chrec)
1517 {
1518   if (dump_file && (dump_flags & TDF_DETAILS))
1519     {
1520       fprintf (dump_file, "(dependence classified: ");
1521       print_generic_expr (dump_file, chrec, 0);
1522       fprintf (dump_file, ")\n");
1523     }
1524
1525   DDR_ARE_DEPENDENT (ddr) = chrec;
1526   free_subscripts (DDR_SUBSCRIPTS (ddr));
1527   DDR_SUBSCRIPTS (ddr) = NULL;
1528 }
1529
1530 /* The dependence relation DDR cannot be represented by a distance
1531    vector.  */
1532
1533 static inline void
1534 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1535 {
1536   if (dump_file && (dump_flags & TDF_DETAILS))
1537     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1538
1539   DDR_AFFINE_P (ddr) = false;
1540 }
1541
1542 \f
1543
1544 /* This section contains the classic Banerjee tests.  */
1545
1546 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1547    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1548
1549 static inline bool
1550 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1551 {
1552   return (evolution_function_is_constant_p (chrec_a)
1553           && evolution_function_is_constant_p (chrec_b));
1554 }
1555
1556 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1557    variable, i.e., if the SIV (Single Index Variable) test is true.  */
1558
1559 static bool
1560 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1561 {
1562   if ((evolution_function_is_constant_p (chrec_a)
1563        && evolution_function_is_univariate_p (chrec_b))
1564       || (evolution_function_is_constant_p (chrec_b)
1565           && evolution_function_is_univariate_p (chrec_a)))
1566     return true;
1567
1568   if (evolution_function_is_univariate_p (chrec_a)
1569       && evolution_function_is_univariate_p (chrec_b))
1570     {
1571       switch (TREE_CODE (chrec_a))
1572         {
1573         case POLYNOMIAL_CHREC:
1574           switch (TREE_CODE (chrec_b))
1575             {
1576             case POLYNOMIAL_CHREC:
1577               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1578                 return false;
1579
1580             default:
1581               return true;
1582             }
1583
1584         default:
1585           return true;
1586         }
1587     }
1588
1589   return false;
1590 }
1591
1592 /* Creates a conflict function with N dimensions.  The affine functions
1593    in each dimension follow.  */
1594
1595 static conflict_function *
1596 conflict_fn (unsigned n, ...)
1597 {
1598   unsigned i;
1599   conflict_function *ret = XCNEW (conflict_function);
1600   va_list ap;
1601
1602   gcc_assert (0 < n && n <= MAX_DIM);
1603   va_start(ap, n);
1604
1605   ret->n = n;
1606   for (i = 0; i < n; i++)
1607     ret->fns[i] = va_arg (ap, affine_fn);
1608   va_end(ap);
1609
1610   return ret;
1611 }
1612
1613 /* Returns constant affine function with value CST.  */
1614
1615 static affine_fn
1616 affine_fn_cst (tree cst)
1617 {
1618   affine_fn fn = VEC_alloc (tree, heap, 1);
1619   VEC_quick_push (tree, fn, cst);
1620   return fn;
1621 }
1622
1623 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
1624
1625 static affine_fn
1626 affine_fn_univar (tree cst, unsigned dim, tree coef)
1627 {
1628   affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1629   unsigned i;
1630
1631   gcc_assert (dim > 0);
1632   VEC_quick_push (tree, fn, cst);
1633   for (i = 1; i < dim; i++)
1634     VEC_quick_push (tree, fn, integer_zero_node);
1635   VEC_quick_push (tree, fn, coef);
1636   return fn;
1637 }
1638
1639 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1640    *OVERLAPS_B are initialized to the functions that describe the
1641    relation between the elements accessed twice by CHREC_A and
1642    CHREC_B.  For k >= 0, the following property is verified:
1643
1644    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1645
1646 static void
1647 analyze_ziv_subscript (tree chrec_a,
1648                        tree chrec_b,
1649                        conflict_function **overlaps_a,
1650                        conflict_function **overlaps_b,
1651                        tree *last_conflicts)
1652 {
1653   tree type, difference;
1654   dependence_stats.num_ziv++;
1655
1656   if (dump_file && (dump_flags & TDF_DETAILS))
1657     fprintf (dump_file, "(analyze_ziv_subscript \n");
1658
1659   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1660   chrec_a = chrec_convert (type, chrec_a, NULL);
1661   chrec_b = chrec_convert (type, chrec_b, NULL);
1662   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1663
1664   switch (TREE_CODE (difference))
1665     {
1666     case INTEGER_CST:
1667       if (integer_zerop (difference))
1668         {
1669           /* The difference is equal to zero: the accessed index
1670              overlaps for each iteration in the loop.  */
1671           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1672           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1673           *last_conflicts = chrec_dont_know;
1674           dependence_stats.num_ziv_dependent++;
1675         }
1676       else
1677         {
1678           /* The accesses do not overlap.  */
1679           *overlaps_a = conflict_fn_no_dependence ();
1680           *overlaps_b = conflict_fn_no_dependence ();
1681           *last_conflicts = integer_zero_node;
1682           dependence_stats.num_ziv_independent++;
1683         }
1684       break;
1685
1686     default:
1687       /* We're not sure whether the indexes overlap.  For the moment,
1688          conservatively answer "don't know".  */
1689       if (dump_file && (dump_flags & TDF_DETAILS))
1690         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1691
1692       *overlaps_a = conflict_fn_not_known ();
1693       *overlaps_b = conflict_fn_not_known ();
1694       *last_conflicts = chrec_dont_know;
1695       dependence_stats.num_ziv_unimplemented++;
1696       break;
1697     }
1698
1699   if (dump_file && (dump_flags & TDF_DETAILS))
1700     fprintf (dump_file, ")\n");
1701 }
1702
1703 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1704    and only if it fits to the int type.  If this is not the case, or the
1705    bound  on the number of iterations of LOOP could not be derived, returns
1706    chrec_dont_know.  */
1707
1708 static tree
1709 max_stmt_executions_tree (struct loop *loop)
1710 {
1711   double_int nit;
1712
1713   if (!max_stmt_executions (loop, &nit))
1714     return chrec_dont_know;
1715
1716   if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1717     return chrec_dont_know;
1718
1719   return double_int_to_tree (unsigned_type_node, nit);
1720 }
1721
1722 /* Determine whether the CHREC is always positive/negative.  If the expression
1723    cannot be statically analyzed, return false, otherwise set the answer into
1724    VALUE.  */
1725
1726 static bool
1727 chrec_is_positive (tree chrec, bool *value)
1728 {
1729   bool value0, value1, value2;
1730   tree end_value, nb_iter;
1731
1732   switch (TREE_CODE (chrec))
1733     {
1734     case POLYNOMIAL_CHREC:
1735       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1736           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1737         return false;
1738
1739       /* FIXME -- overflows.  */
1740       if (value0 == value1)
1741         {
1742           *value = value0;
1743           return true;
1744         }
1745
1746       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1747          and the proof consists in showing that the sign never
1748          changes during the execution of the loop, from 0 to
1749          loop->nb_iterations.  */
1750       if (!evolution_function_is_affine_p (chrec))
1751         return false;
1752
1753       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1754       if (chrec_contains_undetermined (nb_iter))
1755         return false;
1756
1757 #if 0
1758       /* TODO -- If the test is after the exit, we may decrease the number of
1759          iterations by one.  */
1760       if (after_exit)
1761         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1762 #endif
1763
1764       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1765
1766       if (!chrec_is_positive (end_value, &value2))
1767         return false;
1768
1769       *value = value0;
1770       return value0 == value1;
1771
1772     case INTEGER_CST:
1773       switch (tree_int_cst_sgn (chrec))
1774         {
1775         case -1:
1776           *value = false;
1777           break;
1778         case 1:
1779           *value = true;
1780           break;
1781         default:
1782           return false;
1783         }
1784       return true;
1785
1786     default:
1787       return false;
1788     }
1789 }
1790
1791
1792 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1793    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1794    *OVERLAPS_B are initialized to the functions that describe the
1795    relation between the elements accessed twice by CHREC_A and
1796    CHREC_B.  For k >= 0, the following property is verified:
1797
1798    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1799
1800 static void
1801 analyze_siv_subscript_cst_affine (tree chrec_a,
1802                                   tree chrec_b,
1803                                   conflict_function **overlaps_a,
1804                                   conflict_function **overlaps_b,
1805                                   tree *last_conflicts)
1806 {
1807   bool value0, value1, value2;
1808   tree type, difference, tmp;
1809
1810   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1811   chrec_a = chrec_convert (type, chrec_a, NULL);
1812   chrec_b = chrec_convert (type, chrec_b, NULL);
1813   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1814
1815   /* Special case overlap in the first iteration.  */
1816   if (integer_zerop (difference))
1817     {
1818       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1819       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1820       *last_conflicts = integer_one_node;
1821       return;
1822     }
1823
1824   if (!chrec_is_positive (initial_condition (difference), &value0))
1825     {
1826       if (dump_file && (dump_flags & TDF_DETAILS))
1827         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1828
1829       dependence_stats.num_siv_unimplemented++;
1830       *overlaps_a = conflict_fn_not_known ();
1831       *overlaps_b = conflict_fn_not_known ();
1832       *last_conflicts = chrec_dont_know;
1833       return;
1834     }
1835   else
1836     {
1837       if (value0 == false)
1838         {
1839           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1840             {
1841               if (dump_file && (dump_flags & TDF_DETAILS))
1842                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1843
1844               *overlaps_a = conflict_fn_not_known ();
1845               *overlaps_b = conflict_fn_not_known ();
1846               *last_conflicts = chrec_dont_know;
1847               dependence_stats.num_siv_unimplemented++;
1848               return;
1849             }
1850           else
1851             {
1852               if (value1 == true)
1853                 {
1854                   /* Example:
1855                      chrec_a = 12
1856                      chrec_b = {10, +, 1}
1857                   */
1858
1859                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1860                     {
1861                       HOST_WIDE_INT numiter;
1862                       struct loop *loop = get_chrec_loop (chrec_b);
1863
1864                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1865                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
1866                                          fold_build1 (ABS_EXPR, type, difference),
1867                                          CHREC_RIGHT (chrec_b));
1868                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1869                       *last_conflicts = integer_one_node;
1870
1871
1872                       /* Perform weak-zero siv test to see if overlap is
1873                          outside the loop bounds.  */
1874                       numiter = max_stmt_executions_int (loop);
1875
1876                       if (numiter >= 0
1877                           && compare_tree_int (tmp, numiter) > 0)
1878                         {
1879                           free_conflict_function (*overlaps_a);
1880                           free_conflict_function (*overlaps_b);
1881                           *overlaps_a = conflict_fn_no_dependence ();
1882                           *overlaps_b = conflict_fn_no_dependence ();
1883                           *last_conflicts = integer_zero_node;
1884                           dependence_stats.num_siv_independent++;
1885                           return;
1886                         }
1887                       dependence_stats.num_siv_dependent++;
1888                       return;
1889                     }
1890
1891                   /* When the step does not divide the difference, there are
1892                      no overlaps.  */
1893                   else
1894                     {
1895                       *overlaps_a = conflict_fn_no_dependence ();
1896                       *overlaps_b = conflict_fn_no_dependence ();
1897                       *last_conflicts = integer_zero_node;
1898                       dependence_stats.num_siv_independent++;
1899                       return;
1900                     }
1901                 }
1902
1903               else
1904                 {
1905                   /* Example:
1906                      chrec_a = 12
1907                      chrec_b = {10, +, -1}
1908
1909                      In this case, chrec_a will not overlap with chrec_b.  */
1910                   *overlaps_a = conflict_fn_no_dependence ();
1911                   *overlaps_b = conflict_fn_no_dependence ();
1912                   *last_conflicts = integer_zero_node;
1913                   dependence_stats.num_siv_independent++;
1914                   return;
1915                 }
1916             }
1917         }
1918       else
1919         {
1920           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1921             {
1922               if (dump_file && (dump_flags & TDF_DETAILS))
1923                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1924
1925               *overlaps_a = conflict_fn_not_known ();
1926               *overlaps_b = conflict_fn_not_known ();
1927               *last_conflicts = chrec_dont_know;
1928               dependence_stats.num_siv_unimplemented++;
1929               return;
1930             }
1931           else
1932             {
1933               if (value2 == false)
1934                 {
1935                   /* Example:
1936                      chrec_a = 3
1937                      chrec_b = {10, +, -1}
1938                   */
1939                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1940                     {
1941                       HOST_WIDE_INT numiter;
1942                       struct loop *loop = get_chrec_loop (chrec_b);
1943
1944                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1945                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1946                                          CHREC_RIGHT (chrec_b));
1947                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1948                       *last_conflicts = integer_one_node;
1949
1950                       /* Perform weak-zero siv test to see if overlap is
1951                          outside the loop bounds.  */
1952                       numiter = max_stmt_executions_int (loop);
1953
1954                       if (numiter >= 0
1955                           && compare_tree_int (tmp, numiter) > 0)
1956                         {
1957                           free_conflict_function (*overlaps_a);
1958                           free_conflict_function (*overlaps_b);
1959                           *overlaps_a = conflict_fn_no_dependence ();
1960                           *overlaps_b = conflict_fn_no_dependence ();
1961                           *last_conflicts = integer_zero_node;
1962                           dependence_stats.num_siv_independent++;
1963                           return;
1964                         }
1965                       dependence_stats.num_siv_dependent++;
1966                       return;
1967                     }
1968
1969                   /* When the step does not divide the difference, there
1970                      are no overlaps.  */
1971                   else
1972                     {
1973                       *overlaps_a = conflict_fn_no_dependence ();
1974                       *overlaps_b = conflict_fn_no_dependence ();
1975                       *last_conflicts = integer_zero_node;
1976                       dependence_stats.num_siv_independent++;
1977                       return;
1978                     }
1979                 }
1980               else
1981                 {
1982                   /* Example:
1983                      chrec_a = 3
1984                      chrec_b = {4, +, 1}
1985
1986                      In this case, chrec_a will not overlap with chrec_b.  */
1987                   *overlaps_a = conflict_fn_no_dependence ();
1988                   *overlaps_b = conflict_fn_no_dependence ();
1989                   *last_conflicts = integer_zero_node;
1990                   dependence_stats.num_siv_independent++;
1991                   return;
1992                 }
1993             }
1994         }
1995     }
1996 }
1997
1998 /* Helper recursive function for initializing the matrix A.  Returns
1999    the initial value of CHREC.  */
2000
2001 static tree
2002 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2003 {
2004   gcc_assert (chrec);
2005
2006   switch (TREE_CODE (chrec))
2007     {
2008     case POLYNOMIAL_CHREC:
2009       gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2010
2011       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2012       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2013
2014     case PLUS_EXPR:
2015     case MULT_EXPR:
2016     case MINUS_EXPR:
2017       {
2018         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2019         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2020
2021         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2022       }
2023
2024     case NOP_EXPR:
2025       {
2026         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2027         return chrec_convert (chrec_type (chrec), op, NULL);
2028       }
2029
2030     case BIT_NOT_EXPR:
2031       {
2032         /* Handle ~X as -1 - X.  */
2033         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2034         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2035                               build_int_cst (TREE_TYPE (chrec), -1), op);
2036       }
2037
2038     case INTEGER_CST:
2039       return chrec;
2040
2041     default:
2042       gcc_unreachable ();
2043       return NULL_TREE;
2044     }
2045 }
2046
2047 #define FLOOR_DIV(x,y) ((x) / (y))
2048
2049 /* Solves the special case of the Diophantine equation:
2050    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2051
2052    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
2053    number of iterations that loops X and Y run.  The overlaps will be
2054    constructed as evolutions in dimension DIM.  */
2055
2056 static void
2057 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2058                                          affine_fn *overlaps_a,
2059                                          affine_fn *overlaps_b,
2060                                          tree *last_conflicts, int dim)
2061 {
2062   if (((step_a > 0 && step_b > 0)
2063        || (step_a < 0 && step_b < 0)))
2064     {
2065       int step_overlaps_a, step_overlaps_b;
2066       int gcd_steps_a_b, last_conflict, tau2;
2067
2068       gcd_steps_a_b = gcd (step_a, step_b);
2069       step_overlaps_a = step_b / gcd_steps_a_b;
2070       step_overlaps_b = step_a / gcd_steps_a_b;
2071
2072       if (niter > 0)
2073         {
2074           tau2 = FLOOR_DIV (niter, step_overlaps_a);
2075           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2076           last_conflict = tau2;
2077           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2078         }
2079       else
2080         *last_conflicts = chrec_dont_know;
2081
2082       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2083                                       build_int_cst (NULL_TREE,
2084                                                      step_overlaps_a));
2085       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2086                                       build_int_cst (NULL_TREE,
2087                                                      step_overlaps_b));
2088     }
2089
2090   else
2091     {
2092       *overlaps_a = affine_fn_cst (integer_zero_node);
2093       *overlaps_b = affine_fn_cst (integer_zero_node);
2094       *last_conflicts = integer_zero_node;
2095     }
2096 }
2097
2098 /* Solves the special case of a Diophantine equation where CHREC_A is
2099    an affine bivariate function, and CHREC_B is an affine univariate
2100    function.  For example,
2101
2102    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2103
2104    has the following overlapping functions:
2105
2106    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2107    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2108    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2109
2110    FORNOW: This is a specialized implementation for a case occurring in
2111    a common benchmark.  Implement the general algorithm.  */
2112
2113 static void
2114 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2115                                       conflict_function **overlaps_a,
2116                                       conflict_function **overlaps_b,
2117                                       tree *last_conflicts)
2118 {
2119   bool xz_p, yz_p, xyz_p;
2120   int step_x, step_y, step_z;
2121   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2122   affine_fn overlaps_a_xz, overlaps_b_xz;
2123   affine_fn overlaps_a_yz, overlaps_b_yz;
2124   affine_fn overlaps_a_xyz, overlaps_b_xyz;
2125   affine_fn ova1, ova2, ovb;
2126   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2127
2128   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2129   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2130   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2131
2132   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2133   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2134   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2135
2136   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2137     {
2138       if (dump_file && (dump_flags & TDF_DETAILS))
2139         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2140
2141       *overlaps_a = conflict_fn_not_known ();
2142       *overlaps_b = conflict_fn_not_known ();
2143       *last_conflicts = chrec_dont_know;
2144       return;
2145     }
2146
2147   niter = MIN (niter_x, niter_z);
2148   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2149                                            &overlaps_a_xz,
2150                                            &overlaps_b_xz,
2151                                            &last_conflicts_xz, 1);
2152   niter = MIN (niter_y, niter_z);
2153   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2154                                            &overlaps_a_yz,
2155                                            &overlaps_b_yz,
2156                                            &last_conflicts_yz, 2);
2157   niter = MIN (niter_x, niter_z);
2158   niter = MIN (niter_y, niter);
2159   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2160                                            &overlaps_a_xyz,
2161                                            &overlaps_b_xyz,
2162                                            &last_conflicts_xyz, 3);
2163
2164   xz_p = !integer_zerop (last_conflicts_xz);
2165   yz_p = !integer_zerop (last_conflicts_yz);
2166   xyz_p = !integer_zerop (last_conflicts_xyz);
2167
2168   if (xz_p || yz_p || xyz_p)
2169     {
2170       ova1 = affine_fn_cst (integer_zero_node);
2171       ova2 = affine_fn_cst (integer_zero_node);
2172       ovb = affine_fn_cst (integer_zero_node);
2173       if (xz_p)
2174         {
2175           affine_fn t0 = ova1;
2176           affine_fn t2 = ovb;
2177
2178           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2179           ovb = affine_fn_plus (ovb, overlaps_b_xz);
2180           affine_fn_free (t0);
2181           affine_fn_free (t2);
2182           *last_conflicts = last_conflicts_xz;
2183         }
2184       if (yz_p)
2185         {
2186           affine_fn t0 = ova2;
2187           affine_fn t2 = ovb;
2188
2189           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2190           ovb = affine_fn_plus (ovb, overlaps_b_yz);
2191           affine_fn_free (t0);
2192           affine_fn_free (t2);
2193           *last_conflicts = last_conflicts_yz;
2194         }
2195       if (xyz_p)
2196         {
2197           affine_fn t0 = ova1;
2198           affine_fn t2 = ova2;
2199           affine_fn t4 = ovb;
2200
2201           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2202           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2203           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2204           affine_fn_free (t0);
2205           affine_fn_free (t2);
2206           affine_fn_free (t4);
2207           *last_conflicts = last_conflicts_xyz;
2208         }
2209       *overlaps_a = conflict_fn (2, ova1, ova2);
2210       *overlaps_b = conflict_fn (1, ovb);
2211     }
2212   else
2213     {
2214       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2215       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2216       *last_conflicts = integer_zero_node;
2217     }
2218
2219   affine_fn_free (overlaps_a_xz);
2220   affine_fn_free (overlaps_b_xz);
2221   affine_fn_free (overlaps_a_yz);
2222   affine_fn_free (overlaps_b_yz);
2223   affine_fn_free (overlaps_a_xyz);
2224   affine_fn_free (overlaps_b_xyz);
2225 }
2226
2227 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
2228
2229 static void
2230 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2231                     int size)
2232 {
2233   memcpy (vec2, vec1, size * sizeof (*vec1));
2234 }
2235
2236 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
2237
2238 static void
2239 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2240                     int m, int n)
2241 {
2242   int i;
2243
2244   for (i = 0; i < m; i++)
2245     lambda_vector_copy (mat1[i], mat2[i], n);
2246 }
2247
2248 /* Store the N x N identity matrix in MAT.  */
2249
2250 static void
2251 lambda_matrix_id (lambda_matrix mat, int size)
2252 {
2253   int i, j;
2254
2255   for (i = 0; i < size; i++)
2256     for (j = 0; j < size; j++)
2257       mat[i][j] = (i == j) ? 1 : 0;
2258 }
2259
2260 /* Return the first nonzero element of vector VEC1 between START and N.
2261    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
2262
2263 static int
2264 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2265 {
2266   int j = start;
2267   while (j < n && vec1[j] == 0)
2268     j++;
2269   return j;
2270 }
2271
2272 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2273    R2 = R2 + CONST1 * R1.  */
2274
2275 static void
2276 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2277 {
2278   int i;
2279
2280   if (const1 == 0)
2281     return;
2282
2283   for (i = 0; i < n; i++)
2284     mat[r2][i] += const1 * mat[r1][i];
2285 }
2286
2287 /* Swap rows R1 and R2 in matrix MAT.  */
2288
2289 static void
2290 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2291 {
2292   lambda_vector row;
2293
2294   row = mat[r1];
2295   mat[r1] = mat[r2];
2296   mat[r2] = row;
2297 }
2298
2299 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2300    and store the result in VEC2.  */
2301
2302 static void
2303 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2304                           int size, int const1)
2305 {
2306   int i;
2307
2308   if (const1 == 0)
2309     lambda_vector_clear (vec2, size);
2310   else
2311     for (i = 0; i < size; i++)
2312       vec2[i] = const1 * vec1[i];
2313 }
2314
2315 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
2316
2317 static void
2318 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2319                       int size)
2320 {
2321   lambda_vector_mult_const (vec1, vec2, size, -1);
2322 }
2323
2324 /* Negate row R1 of matrix MAT which has N columns.  */
2325
2326 static void
2327 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2328 {
2329   lambda_vector_negate (mat[r1], mat[r1], n);
2330 }
2331
2332 /* Return true if two vectors are equal.  */
2333
2334 static bool
2335 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2336 {
2337   int i;
2338   for (i = 0; i < size; i++)
2339     if (vec1[i] != vec2[i])
2340       return false;
2341   return true;
2342 }
2343
2344 /* Given an M x N integer matrix A, this function determines an M x
2345    M unimodular matrix U, and an M x N echelon matrix S such that
2346    "U.A = S".  This decomposition is also known as "right Hermite".
2347
2348    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2349    Restructuring Compilers" Utpal Banerjee.  */
2350
2351 static void
2352 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2353                              lambda_matrix S, lambda_matrix U)
2354 {
2355   int i, j, i0 = 0;
2356
2357   lambda_matrix_copy (A, S, m, n);
2358   lambda_matrix_id (U, m);
2359
2360   for (j = 0; j < n; j++)
2361     {
2362       if (lambda_vector_first_nz (S[j], m, i0) < m)
2363         {
2364           ++i0;
2365           for (i = m - 1; i >= i0; i--)
2366             {
2367               while (S[i][j] != 0)
2368                 {
2369                   int sigma, factor, a, b;
2370
2371                   a = S[i-1][j];
2372                   b = S[i][j];
2373                   sigma = (a * b < 0) ? -1: 1;
2374                   a = abs (a);
2375                   b = abs (b);
2376                   factor = sigma * (a / b);
2377
2378                   lambda_matrix_row_add (S, n, i, i-1, -factor);
2379                   lambda_matrix_row_exchange (S, i, i-1);
2380
2381                   lambda_matrix_row_add (U, m, i, i-1, -factor);
2382                   lambda_matrix_row_exchange (U, i, i-1);
2383                 }
2384             }
2385         }
2386     }
2387 }
2388
2389 /* Determines the overlapping elements due to accesses CHREC_A and
2390    CHREC_B, that are affine functions.  This function cannot handle
2391    symbolic evolution functions, ie. when initial conditions are
2392    parameters, because it uses lambda matrices of integers.  */
2393
2394 static void
2395 analyze_subscript_affine_affine (tree chrec_a,
2396                                  tree chrec_b,
2397                                  conflict_function **overlaps_a,
2398                                  conflict_function **overlaps_b,
2399                                  tree *last_conflicts)
2400 {
2401   unsigned nb_vars_a, nb_vars_b, dim;
2402   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2403   lambda_matrix A, U, S;
2404   struct obstack scratch_obstack;
2405
2406   if (eq_evolutions_p (chrec_a, chrec_b))
2407     {
2408       /* The accessed index overlaps for each iteration in the
2409          loop.  */
2410       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2411       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2412       *last_conflicts = chrec_dont_know;
2413       return;
2414     }
2415   if (dump_file && (dump_flags & TDF_DETAILS))
2416     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2417
2418   /* For determining the initial intersection, we have to solve a
2419      Diophantine equation.  This is the most time consuming part.
2420
2421      For answering to the question: "Is there a dependence?" we have
2422      to prove that there exists a solution to the Diophantine
2423      equation, and that the solution is in the iteration domain,
2424      i.e. the solution is positive or zero, and that the solution
2425      happens before the upper bound loop.nb_iterations.  Otherwise
2426      there is no dependence.  This function outputs a description of
2427      the iterations that hold the intersections.  */
2428
2429   nb_vars_a = nb_vars_in_chrec (chrec_a);
2430   nb_vars_b = nb_vars_in_chrec (chrec_b);
2431
2432   gcc_obstack_init (&scratch_obstack);
2433
2434   dim = nb_vars_a + nb_vars_b;
2435   U = lambda_matrix_new (dim, dim, &scratch_obstack);
2436   A = lambda_matrix_new (dim, 1, &scratch_obstack);
2437   S = lambda_matrix_new (dim, 1, &scratch_obstack);
2438
2439   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2440   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2441   gamma = init_b - init_a;
2442
2443   /* Don't do all the hard work of solving the Diophantine equation
2444      when we already know the solution: for example,
2445      | {3, +, 1}_1
2446      | {3, +, 4}_2
2447      | gamma = 3 - 3 = 0.
2448      Then the first overlap occurs during the first iterations:
2449      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2450   */
2451   if (gamma == 0)
2452     {
2453       if (nb_vars_a == 1 && nb_vars_b == 1)
2454         {
2455           HOST_WIDE_INT step_a, step_b;
2456           HOST_WIDE_INT niter, niter_a, niter_b;
2457           affine_fn ova, ovb;
2458
2459           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2460           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2461           niter = MIN (niter_a, niter_b);
2462           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2463           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2464
2465           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2466                                                    &ova, &ovb,
2467                                                    last_conflicts, 1);
2468           *overlaps_a = conflict_fn (1, ova);
2469           *overlaps_b = conflict_fn (1, ovb);
2470         }
2471
2472       else if (nb_vars_a == 2 && nb_vars_b == 1)
2473         compute_overlap_steps_for_affine_1_2
2474           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2475
2476       else if (nb_vars_a == 1 && nb_vars_b == 2)
2477         compute_overlap_steps_for_affine_1_2
2478           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2479
2480       else
2481         {
2482           if (dump_file && (dump_flags & TDF_DETAILS))
2483             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2484           *overlaps_a = conflict_fn_not_known ();
2485           *overlaps_b = conflict_fn_not_known ();
2486           *last_conflicts = chrec_dont_know;
2487         }
2488       goto end_analyze_subs_aa;
2489     }
2490
2491   /* U.A = S */
2492   lambda_matrix_right_hermite (A, dim, 1, S, U);
2493
2494   if (S[0][0] < 0)
2495     {
2496       S[0][0] *= -1;
2497       lambda_matrix_row_negate (U, dim, 0);
2498     }
2499   gcd_alpha_beta = S[0][0];
2500
2501   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2502      but that is a quite strange case.  Instead of ICEing, answer
2503      don't know.  */
2504   if (gcd_alpha_beta == 0)
2505     {
2506       *overlaps_a = conflict_fn_not_known ();
2507       *overlaps_b = conflict_fn_not_known ();
2508       *last_conflicts = chrec_dont_know;
2509       goto end_analyze_subs_aa;
2510     }
2511
2512   /* The classic "gcd-test".  */
2513   if (!int_divides_p (gcd_alpha_beta, gamma))
2514     {
2515       /* The "gcd-test" has determined that there is no integer
2516          solution, i.e. there is no dependence.  */
2517       *overlaps_a = conflict_fn_no_dependence ();
2518       *overlaps_b = conflict_fn_no_dependence ();
2519       *last_conflicts = integer_zero_node;
2520     }
2521
2522   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
2523   else if (nb_vars_a == 1 && nb_vars_b == 1)
2524     {
2525       /* Both functions should have the same evolution sign.  */
2526       if (((A[0][0] > 0 && -A[1][0] > 0)
2527            || (A[0][0] < 0 && -A[1][0] < 0)))
2528         {
2529           /* The solutions are given by:
2530              |
2531              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
2532              |                           [u21 u22]    [y0]
2533
2534              For a given integer t.  Using the following variables,
2535
2536              | i0 = u11 * gamma / gcd_alpha_beta
2537              | j0 = u12 * gamma / gcd_alpha_beta
2538              | i1 = u21
2539              | j1 = u22
2540
2541              the solutions are:
2542
2543              | x0 = i0 + i1 * t,
2544              | y0 = j0 + j1 * t.  */
2545           HOST_WIDE_INT i0, j0, i1, j1;
2546
2547           i0 = U[0][0] * gamma / gcd_alpha_beta;
2548           j0 = U[0][1] * gamma / gcd_alpha_beta;
2549           i1 = U[1][0];
2550           j1 = U[1][1];
2551
2552           if ((i1 == 0 && i0 < 0)
2553               || (j1 == 0 && j0 < 0))
2554             {
2555               /* There is no solution.
2556                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2557                  falls in here, but for the moment we don't look at the
2558                  upper bound of the iteration domain.  */
2559               *overlaps_a = conflict_fn_no_dependence ();
2560               *overlaps_b = conflict_fn_no_dependence ();
2561               *last_conflicts = integer_zero_node;
2562               goto end_analyze_subs_aa;
2563             }
2564
2565           if (i1 > 0 && j1 > 0)
2566             {
2567               HOST_WIDE_INT niter_a
2568                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2569               HOST_WIDE_INT niter_b
2570                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2571               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2572
2573               /* (X0, Y0) is a solution of the Diophantine equation:
2574                  "chrec_a (X0) = chrec_b (Y0)".  */
2575               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2576                                         CEIL (-j0, j1));
2577               HOST_WIDE_INT x0 = i1 * tau1 + i0;
2578               HOST_WIDE_INT y0 = j1 * tau1 + j0;
2579
2580               /* (X1, Y1) is the smallest positive solution of the eq
2581                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2582                  first conflict occurs.  */
2583               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2584               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2585               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2586
2587               if (niter > 0)
2588                 {
2589                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2590                                             FLOOR_DIV (niter - j0, j1));
2591                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2592
2593                   /* If the overlap occurs outside of the bounds of the
2594                      loop, there is no dependence.  */
2595                   if (x1 >= niter || y1 >= niter)
2596                     {
2597                       *overlaps_a = conflict_fn_no_dependence ();
2598                       *overlaps_b = conflict_fn_no_dependence ();
2599                       *last_conflicts = integer_zero_node;
2600                       goto end_analyze_subs_aa;
2601                     }
2602                   else
2603                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2604                 }
2605               else
2606                 *last_conflicts = chrec_dont_know;
2607
2608               *overlaps_a
2609                 = conflict_fn (1,
2610                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
2611                                                  1,
2612                                                  build_int_cst (NULL_TREE, i1)));
2613               *overlaps_b
2614                 = conflict_fn (1,
2615                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
2616                                                  1,
2617                                                  build_int_cst (NULL_TREE, j1)));
2618             }
2619           else
2620             {
2621               /* FIXME: For the moment, the upper bound of the
2622                  iteration domain for i and j is not checked.  */
2623               if (dump_file && (dump_flags & TDF_DETAILS))
2624                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2625               *overlaps_a = conflict_fn_not_known ();
2626               *overlaps_b = conflict_fn_not_known ();
2627               *last_conflicts = chrec_dont_know;
2628             }
2629         }
2630       else
2631         {
2632           if (dump_file && (dump_flags & TDF_DETAILS))
2633             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2634           *overlaps_a = conflict_fn_not_known ();
2635           *overlaps_b = conflict_fn_not_known ();
2636           *last_conflicts = chrec_dont_know;
2637         }
2638     }
2639   else
2640     {
2641       if (dump_file && (dump_flags & TDF_DETAILS))
2642         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2643       *overlaps_a = conflict_fn_not_known ();
2644       *overlaps_b = conflict_fn_not_known ();
2645       *last_conflicts = chrec_dont_know;
2646     }
2647
2648 end_analyze_subs_aa:
2649   obstack_free (&scratch_obstack, NULL);
2650   if (dump_file && (dump_flags & TDF_DETAILS))
2651     {
2652       fprintf (dump_file, "  (overlaps_a = ");
2653       dump_conflict_function (dump_file, *overlaps_a);
2654       fprintf (dump_file, ")\n  (overlaps_b = ");
2655       dump_conflict_function (dump_file, *overlaps_b);
2656       fprintf (dump_file, ")\n");
2657       fprintf (dump_file, ")\n");
2658     }
2659 }
2660
2661 /* Returns true when analyze_subscript_affine_affine can be used for
2662    determining the dependence relation between chrec_a and chrec_b,
2663    that contain symbols.  This function modifies chrec_a and chrec_b
2664    such that the analysis result is the same, and such that they don't
2665    contain symbols, and then can safely be passed to the analyzer.
2666
2667    Example: The analysis of the following tuples of evolutions produce
2668    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2669    vs. {0, +, 1}_1
2670
2671    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2672    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2673 */
2674
2675 static bool
2676 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2677 {
2678   tree diff, type, left_a, left_b, right_b;
2679
2680   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2681       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2682     /* FIXME: For the moment not handled.  Might be refined later.  */
2683     return false;
2684
2685   type = chrec_type (*chrec_a);
2686   left_a = CHREC_LEFT (*chrec_a);
2687   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2688   diff = chrec_fold_minus (type, left_a, left_b);
2689
2690   if (!evolution_function_is_constant_p (diff))
2691     return false;
2692
2693   if (dump_file && (dump_flags & TDF_DETAILS))
2694     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2695
2696   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2697                                      diff, CHREC_RIGHT (*chrec_a));
2698   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2699   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2700                                      build_int_cst (type, 0),
2701                                      right_b);
2702   return true;
2703 }
2704
2705 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
2706    *OVERLAPS_B are initialized to the functions that describe the
2707    relation between the elements accessed twice by CHREC_A and
2708    CHREC_B.  For k >= 0, the following property is verified:
2709
2710    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2711
2712 static void
2713 analyze_siv_subscript (tree chrec_a,
2714                        tree chrec_b,
2715                        conflict_function **overlaps_a,
2716                        conflict_function **overlaps_b,
2717                        tree *last_conflicts,
2718                        int loop_nest_num)
2719 {
2720   dependence_stats.num_siv++;
2721
2722   if (dump_file && (dump_flags & TDF_DETAILS))
2723     fprintf (dump_file, "(analyze_siv_subscript \n");
2724
2725   if (evolution_function_is_constant_p (chrec_a)
2726       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2727     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2728                                       overlaps_a, overlaps_b, last_conflicts);
2729
2730   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2731            && evolution_function_is_constant_p (chrec_b))
2732     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2733                                       overlaps_b, overlaps_a, last_conflicts);
2734
2735   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2736            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2737     {
2738       if (!chrec_contains_symbols (chrec_a)
2739           && !chrec_contains_symbols (chrec_b))
2740         {
2741           analyze_subscript_affine_affine (chrec_a, chrec_b,
2742                                            overlaps_a, overlaps_b,
2743                                            last_conflicts);
2744
2745           if (CF_NOT_KNOWN_P (*overlaps_a)
2746               || CF_NOT_KNOWN_P (*overlaps_b))
2747             dependence_stats.num_siv_unimplemented++;
2748           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2749                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2750             dependence_stats.num_siv_independent++;
2751           else
2752             dependence_stats.num_siv_dependent++;
2753         }
2754       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2755                                                         &chrec_b))
2756         {
2757           analyze_subscript_affine_affine (chrec_a, chrec_b,
2758                                            overlaps_a, overlaps_b,
2759                                            last_conflicts);
2760
2761           if (CF_NOT_KNOWN_P (*overlaps_a)
2762               || CF_NOT_KNOWN_P (*overlaps_b))
2763             dependence_stats.num_siv_unimplemented++;
2764           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2765                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2766             dependence_stats.num_siv_independent++;
2767           else
2768             dependence_stats.num_siv_dependent++;
2769         }
2770       else
2771         goto siv_subscript_dontknow;
2772     }
2773
2774   else
2775     {
2776     siv_subscript_dontknow:;
2777       if (dump_file && (dump_flags & TDF_DETAILS))
2778         fprintf (dump_file, "siv test failed: unimplemented.\n");
2779       *overlaps_a = conflict_fn_not_known ();
2780       *overlaps_b = conflict_fn_not_known ();
2781       *last_conflicts = chrec_dont_know;
2782       dependence_stats.num_siv_unimplemented++;
2783     }
2784
2785   if (dump_file && (dump_flags & TDF_DETAILS))
2786     fprintf (dump_file, ")\n");
2787 }
2788
2789 /* Returns false if we can prove that the greatest common divisor of the steps
2790    of CHREC does not divide CST, false otherwise.  */
2791
2792 static bool
2793 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2794 {
2795   HOST_WIDE_INT cd = 0, val;
2796   tree step;
2797
2798   if (!host_integerp (cst, 0))
2799     return true;
2800   val = tree_low_cst (cst, 0);
2801
2802   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2803     {
2804       step = CHREC_RIGHT (chrec);
2805       if (!host_integerp (step, 0))
2806         return true;
2807       cd = gcd (cd, tree_low_cst (step, 0));
2808       chrec = CHREC_LEFT (chrec);
2809     }
2810
2811   return val % cd == 0;
2812 }
2813
2814 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2815    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
2816    functions that describe the relation between the elements accessed
2817    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
2818    is verified:
2819
2820    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2821
2822 static void
2823 analyze_miv_subscript (tree chrec_a,
2824                        tree chrec_b,
2825                        conflict_function **overlaps_a,
2826                        conflict_function **overlaps_b,
2827                        tree *last_conflicts,
2828                        struct loop *loop_nest)
2829 {
2830   tree type, difference;
2831
2832   dependence_stats.num_miv++;
2833   if (dump_file && (dump_flags & TDF_DETAILS))
2834     fprintf (dump_file, "(analyze_miv_subscript \n");
2835
2836   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2837   chrec_a = chrec_convert (type, chrec_a, NULL);
2838   chrec_b = chrec_convert (type, chrec_b, NULL);
2839   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2840
2841   if (eq_evolutions_p (chrec_a, chrec_b))
2842     {
2843       /* Access functions are the same: all the elements are accessed
2844          in the same order.  */
2845       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2846       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2847       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2848       dependence_stats.num_miv_dependent++;
2849     }
2850
2851   else if (evolution_function_is_constant_p (difference)
2852            /* For the moment, the following is verified:
2853               evolution_function_is_affine_multivariate_p (chrec_a,
2854               loop_nest->num) */
2855            && !gcd_of_steps_may_divide_p (chrec_a, difference))
2856     {
2857       /* testsuite/.../ssa-chrec-33.c
2858          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
2859
2860          The difference is 1, and all the evolution steps are multiples
2861          of 2, consequently there are no overlapping elements.  */
2862       *overlaps_a = conflict_fn_no_dependence ();
2863       *overlaps_b = conflict_fn_no_dependence ();
2864       *last_conflicts = integer_zero_node;
2865       dependence_stats.num_miv_independent++;
2866     }
2867
2868   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2869            && !chrec_contains_symbols (chrec_a)
2870            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2871            && !chrec_contains_symbols (chrec_b))
2872     {
2873       /* testsuite/.../ssa-chrec-35.c
2874          {0, +, 1}_2  vs.  {0, +, 1}_3
2875          the overlapping elements are respectively located at iterations:
2876          {0, +, 1}_x and {0, +, 1}_x,
2877          in other words, we have the equality:
2878          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2879
2880          Other examples:
2881          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2882          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2883
2884          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2885          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2886       */
2887       analyze_subscript_affine_affine (chrec_a, chrec_b,
2888                                        overlaps_a, overlaps_b, last_conflicts);
2889
2890       if (CF_NOT_KNOWN_P (*overlaps_a)
2891           || CF_NOT_KNOWN_P (*overlaps_b))
2892         dependence_stats.num_miv_unimplemented++;
2893       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2894                || CF_NO_DEPENDENCE_P (*overlaps_b))
2895         dependence_stats.num_miv_independent++;
2896       else
2897         dependence_stats.num_miv_dependent++;
2898     }
2899
2900   else
2901     {
2902       /* When the analysis is too difficult, answer "don't know".  */
2903       if (dump_file && (dump_flags & TDF_DETAILS))
2904         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2905
2906       *overlaps_a = conflict_fn_not_known ();
2907       *overlaps_b = conflict_fn_not_known ();
2908       *last_conflicts = chrec_dont_know;
2909       dependence_stats.num_miv_unimplemented++;
2910     }
2911
2912   if (dump_file && (dump_flags & TDF_DETAILS))
2913     fprintf (dump_file, ")\n");
2914 }
2915
2916 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2917    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
2918    OVERLAP_ITERATIONS_B are initialized with two functions that
2919    describe the iterations that contain conflicting elements.
2920
2921    Remark: For an integer k >= 0, the following equality is true:
2922
2923    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2924 */
2925
2926 static void
2927 analyze_overlapping_iterations (tree chrec_a,
2928                                 tree chrec_b,
2929                                 conflict_function **overlap_iterations_a,
2930                                 conflict_function **overlap_iterations_b,
2931                                 tree *last_conflicts, struct loop *loop_nest)
2932 {
2933   unsigned int lnn = loop_nest->num;
2934
2935   dependence_stats.num_subscript_tests++;
2936
2937   if (dump_file && (dump_flags & TDF_DETAILS))
2938     {
2939       fprintf (dump_file, "(analyze_overlapping_iterations \n");
2940       fprintf (dump_file, "  (chrec_a = ");
2941       print_generic_expr (dump_file, chrec_a, 0);
2942       fprintf (dump_file, ")\n  (chrec_b = ");
2943       print_generic_expr (dump_file, chrec_b, 0);
2944       fprintf (dump_file, ")\n");
2945     }
2946
2947   if (chrec_a == NULL_TREE
2948       || chrec_b == NULL_TREE
2949       || chrec_contains_undetermined (chrec_a)
2950       || chrec_contains_undetermined (chrec_b))
2951     {
2952       dependence_stats.num_subscript_undetermined++;
2953
2954       *overlap_iterations_a = conflict_fn_not_known ();
2955       *overlap_iterations_b = conflict_fn_not_known ();
2956     }
2957
2958   /* If they are the same chrec, and are affine, they overlap
2959      on every iteration.  */
2960   else if (eq_evolutions_p (chrec_a, chrec_b)
2961            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2962                || operand_equal_p (chrec_a, chrec_b, 0)))
2963     {
2964       dependence_stats.num_same_subscript_function++;
2965       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2966       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2967       *last_conflicts = chrec_dont_know;
2968     }
2969
2970   /* If they aren't the same, and aren't affine, we can't do anything
2971      yet.  */
2972   else if ((chrec_contains_symbols (chrec_a)
2973             || chrec_contains_symbols (chrec_b))
2974            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2975                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2976     {
2977       dependence_stats.num_subscript_undetermined++;
2978       *overlap_iterations_a = conflict_fn_not_known ();
2979       *overlap_iterations_b = conflict_fn_not_known ();
2980     }
2981
2982   else if (ziv_subscript_p (chrec_a, chrec_b))
2983     analyze_ziv_subscript (chrec_a, chrec_b,
2984                            overlap_iterations_a, overlap_iterations_b,
2985                            last_conflicts);
2986
2987   else if (siv_subscript_p (chrec_a, chrec_b))
2988     analyze_siv_subscript (chrec_a, chrec_b,
2989                            overlap_iterations_a, overlap_iterations_b,
2990                            last_conflicts, lnn);
2991
2992   else
2993     analyze_miv_subscript (chrec_a, chrec_b,
2994                            overlap_iterations_a, overlap_iterations_b,
2995                            last_conflicts, loop_nest);
2996
2997   if (dump_file && (dump_flags & TDF_DETAILS))
2998     {
2999       fprintf (dump_file, "  (overlap_iterations_a = ");
3000       dump_conflict_function (dump_file, *overlap_iterations_a);
3001       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
3002       dump_conflict_function (dump_file, *overlap_iterations_b);
3003       fprintf (dump_file, ")\n");
3004       fprintf (dump_file, ")\n");
3005     }
3006 }
3007
3008 /* Helper function for uniquely inserting distance vectors.  */
3009
3010 static void
3011 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3012 {
3013   unsigned i;
3014   lambda_vector v;
3015
3016   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
3017     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3018       return;
3019
3020   VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
3021 }
3022
3023 /* Helper function for uniquely inserting direction vectors.  */
3024
3025 static void
3026 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3027 {
3028   unsigned i;
3029   lambda_vector v;
3030
3031   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
3032     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3033       return;
3034
3035   VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
3036 }
3037
3038 /* Add a distance of 1 on all the loops outer than INDEX.  If we
3039    haven't yet determined a distance for this outer loop, push a new
3040    distance vector composed of the previous distance, and a distance
3041    of 1 for this outer loop.  Example:
3042
3043    | loop_1
3044    |   loop_2
3045    |     A[10]
3046    |   endloop_2
3047    | endloop_1
3048
3049    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
3050    save (0, 1), then we have to save (1, 0).  */
3051
3052 static void
3053 add_outer_distances (struct data_dependence_relation *ddr,
3054                      lambda_vector dist_v, int index)
3055 {
3056   /* For each outer loop where init_v is not set, the accesses are
3057      in dependence of distance 1 in the loop.  */
3058   while (--index >= 0)
3059     {
3060       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3061       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3062       save_v[index] = 1;
3063       save_dist_v (ddr, save_v);
3064     }
3065 }
3066
3067 /* Return false when fail to represent the data dependence as a
3068    distance vector.  INIT_B is set to true when a component has been
3069    added to the distance vector DIST_V.  INDEX_CARRY is then set to
3070    the index in DIST_V that carries the dependence.  */
3071
3072 static bool
3073 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3074                              struct data_reference *ddr_a,
3075                              struct data_reference *ddr_b,
3076                              lambda_vector dist_v, bool *init_b,
3077                              int *index_carry)
3078 {
3079   unsigned i;
3080   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3081
3082   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3083     {
3084       tree access_fn_a, access_fn_b;
3085       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3086
3087       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3088         {
3089           non_affine_dependence_relation (ddr);
3090           return false;
3091         }
3092
3093       access_fn_a = DR_ACCESS_FN (ddr_a, i);
3094       access_fn_b = DR_ACCESS_FN (ddr_b, i);
3095
3096       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3097           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3098         {
3099           int dist, index;
3100           int var_a = CHREC_VARIABLE (access_fn_a);
3101           int var_b = CHREC_VARIABLE (access_fn_b);
3102
3103           if (var_a != var_b
3104               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3105             {
3106               non_affine_dependence_relation (ddr);
3107               return false;
3108             }
3109
3110           dist = int_cst_value (SUB_DISTANCE (subscript));
3111           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3112           *index_carry = MIN (index, *index_carry);
3113
3114           /* This is the subscript coupling test.  If we have already
3115              recorded a distance for this loop (a distance coming from
3116              another subscript), it should be the same.  For example,
3117              in the following code, there is no dependence:
3118
3119              | loop i = 0, N, 1
3120              |   T[i+1][i] = ...
3121              |   ... = T[i][i]
3122              | endloop
3123           */
3124           if (init_v[index] != 0 && dist_v[index] != dist)
3125             {
3126               finalize_ddr_dependent (ddr, chrec_known);
3127               return false;
3128             }
3129
3130           dist_v[index] = dist;
3131           init_v[index] = 1;
3132           *init_b = true;
3133         }
3134       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3135         {
3136           /* This can be for example an affine vs. constant dependence
3137              (T[i] vs. T[3]) that is not an affine dependence and is
3138              not representable as a distance vector.  */
3139           non_affine_dependence_relation (ddr);
3140           return false;
3141         }
3142     }
3143
3144   return true;
3145 }
3146
3147 /* Return true when the DDR contains only constant access functions.  */
3148
3149 static bool
3150 constant_access_functions (const struct data_dependence_relation *ddr)
3151 {
3152   unsigned i;
3153
3154   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3155     if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3156         || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3157       return false;
3158
3159   return true;
3160 }
3161
3162 /* Helper function for the case where DDR_A and DDR_B are the same
3163    multivariate access function with a constant step.  For an example
3164    see pr34635-1.c.  */
3165
3166 static void
3167 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3168 {
3169   int x_1, x_2;
3170   tree c_1 = CHREC_LEFT (c_2);
3171   tree c_0 = CHREC_LEFT (c_1);
3172   lambda_vector dist_v;
3173   int v1, v2, cd;
3174
3175   /* Polynomials with more than 2 variables are not handled yet.  When
3176      the evolution steps are parameters, it is not possible to
3177      represent the dependence using classical distance vectors.  */
3178   if (TREE_CODE (c_0) != INTEGER_CST
3179       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3180       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3181     {
3182       DDR_AFFINE_P (ddr) = false;
3183       return;
3184     }
3185
3186   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3187   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3188
3189   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
3190   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3191   v1 = int_cst_value (CHREC_RIGHT (c_1));
3192   v2 = int_cst_value (CHREC_RIGHT (c_2));
3193   cd = gcd (v1, v2);
3194   v1 /= cd;
3195   v2 /= cd;
3196
3197   if (v2 < 0)
3198     {
3199       v2 = -v2;
3200       v1 = -v1;
3201     }
3202
3203   dist_v[x_1] = v2;
3204   dist_v[x_2] = -v1;
3205   save_dist_v (ddr, dist_v);
3206
3207   add_outer_distances (ddr, dist_v, x_1);
3208 }
3209
3210 /* Helper function for the case where DDR_A and DDR_B are the same
3211    access functions.  */
3212
3213 static void
3214 add_other_self_distances (struct data_dependence_relation *ddr)
3215 {
3216   lambda_vector dist_v;
3217   unsigned i;
3218   int index_carry = DDR_NB_LOOPS (ddr);
3219
3220   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3221     {
3222       tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3223
3224       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3225         {
3226           if (!evolution_function_is_univariate_p (access_fun))
3227             {
3228               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3229                 {
3230                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3231                   return;
3232                 }
3233
3234               access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3235
3236               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3237                 add_multivariate_self_dist (ddr, access_fun);
3238               else
3239                 /* The evolution step is not constant: it varies in
3240                    the outer loop, so this cannot be represented by a
3241                    distance vector.  For example in pr34635.c the
3242                    evolution is {0, +, {0, +, 4}_1}_2.  */
3243                 DDR_AFFINE_P (ddr) = false;
3244
3245               return;
3246             }
3247
3248           index_carry = MIN (index_carry,
3249                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
3250                                                  DDR_LOOP_NEST (ddr)));
3251         }
3252     }
3253
3254   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3255   add_outer_distances (ddr, dist_v, index_carry);
3256 }
3257
3258 static void
3259 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3260 {
3261   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3262
3263   dist_v[DDR_INNER_LOOP (ddr)] = 1;
3264   save_dist_v (ddr, dist_v);
3265 }
3266
3267 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
3268    is the case for example when access functions are the same and
3269    equal to a constant, as in:
3270
3271    | loop_1
3272    |   A[3] = ...
3273    |   ... = A[3]
3274    | endloop_1
3275
3276    in which case the distance vectors are (0) and (1).  */
3277
3278 static void
3279 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3280 {
3281   unsigned i, j;
3282
3283   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3284     {
3285       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3286       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3287       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3288
3289       for (j = 0; j < ca->n; j++)
3290         if (affine_function_zero_p (ca->fns[j]))
3291           {
3292             insert_innermost_unit_dist_vector (ddr);
3293             return;
3294           }
3295
3296       for (j = 0; j < cb->n; j++)
3297         if (affine_function_zero_p (cb->fns[j]))
3298           {
3299             insert_innermost_unit_dist_vector (ddr);
3300             return;
3301           }
3302     }
3303 }
3304
3305 /* Compute the classic per loop distance vector.  DDR is the data
3306    dependence relation to build a vector from.  Return false when fail
3307    to represent the data dependence as a distance vector.  */
3308
3309 static bool
3310 build_classic_dist_vector (struct data_dependence_relation *ddr,
3311                            struct loop *loop_nest)
3312 {
3313   bool init_b = false;
3314   int index_carry = DDR_NB_LOOPS (ddr);
3315   lambda_vector dist_v;
3316
3317   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3318     return false;
3319
3320   if (same_access_functions (ddr))
3321     {
3322       /* Save the 0 vector.  */
3323       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3324       save_dist_v (ddr, dist_v);
3325
3326       if (constant_access_functions (ddr))
3327         add_distance_for_zero_overlaps (ddr);
3328
3329       if (DDR_NB_LOOPS (ddr) > 1)
3330         add_other_self_distances (ddr);
3331
3332       return true;
3333     }
3334
3335   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3336   if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3337                                     dist_v, &init_b, &index_carry))
3338     return false;
3339
3340   /* Save the distance vector if we initialized one.  */
3341   if (init_b)
3342     {
3343       /* Verify a basic constraint: classic distance vectors should
3344          always be lexicographically positive.
3345
3346          Data references are collected in the order of execution of
3347          the program, thus for the following loop
3348
3349          | for (i = 1; i < 100; i++)
3350          |   for (j = 1; j < 100; j++)
3351          |     {
3352          |       t = T[j+1][i-1];  // A
3353          |       T[j][i] = t + 2;  // B
3354          |     }
3355
3356          references are collected following the direction of the wind:
3357          A then B.  The data dependence tests are performed also
3358          following this order, such that we're looking at the distance
3359          separating the elements accessed by A from the elements later
3360          accessed by B.  But in this example, the distance returned by
3361          test_dep (A, B) is lexicographically negative (-1, 1), that
3362          means that the access A occurs later than B with respect to
3363          the outer loop, ie. we're actually looking upwind.  In this
3364          case we solve test_dep (B, A) looking downwind to the
3365          lexicographically positive solution, that returns the
3366          distance vector (1, -1).  */
3367       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3368         {
3369           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3370           if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3371                                               loop_nest))
3372             return false;
3373           compute_subscript_distance (ddr);
3374           if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3375                                             save_v, &init_b, &index_carry))
3376             return false;
3377           save_dist_v (ddr, save_v);
3378           DDR_REVERSED_P (ddr) = true;
3379
3380           /* In this case there is a dependence forward for all the
3381              outer loops:
3382
3383              | for (k = 1; k < 100; k++)
3384              |  for (i = 1; i < 100; i++)
3385              |   for (j = 1; j < 100; j++)
3386              |     {
3387              |       t = T[j+1][i-1];  // A
3388              |       T[j][i] = t + 2;  // B
3389              |     }
3390
3391              the vectors are:
3392              (0,  1, -1)
3393              (1,  1, -1)
3394              (1, -1,  1)
3395           */
3396           if (DDR_NB_LOOPS (ddr) > 1)
3397             {
3398               add_outer_distances (ddr, save_v, index_carry);
3399               add_outer_distances (ddr, dist_v, index_carry);
3400             }
3401         }
3402       else
3403         {
3404           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3405           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3406
3407           if (DDR_NB_LOOPS (ddr) > 1)
3408             {
3409               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3410
3411               if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3412                                                   DDR_A (ddr), loop_nest))
3413                 return false;
3414               compute_subscript_distance (ddr);
3415               if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3416                                                 opposite_v, &init_b,
3417                                                 &index_carry))
3418                 return false;
3419
3420               save_dist_v (ddr, save_v);
3421               add_outer_distances (ddr, dist_v, index_carry);
3422               add_outer_distances (ddr, opposite_v, index_carry);
3423             }
3424           else
3425             save_dist_v (ddr, save_v);
3426         }
3427     }
3428   else
3429     {
3430       /* There is a distance of 1 on all the outer loops: Example:
3431          there is a dependence of distance 1 on loop_1 for the array A.
3432
3433          | loop_1
3434          |   A[5] = ...
3435          | endloop
3436       */
3437       add_outer_distances (ddr, dist_v,
3438                            lambda_vector_first_nz (dist_v,
3439                                                    DDR_NB_LOOPS (ddr), 0));
3440     }
3441
3442   if (dump_file && (dump_flags & TDF_DETAILS))
3443     {
3444       unsigned i;
3445
3446       fprintf (dump_file, "(build_classic_dist_vector\n");
3447       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3448         {
3449           fprintf (dump_file, "  dist_vector = (");
3450           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3451                                DDR_NB_LOOPS (ddr));
3452           fprintf (dump_file, "  )\n");
3453         }
3454       fprintf (dump_file, ")\n");
3455     }
3456
3457   return true;
3458 }
3459
3460 /* Return the direction for a given distance.
3461    FIXME: Computing dir this way is suboptimal, since dir can catch
3462    cases that dist is unable to represent.  */
3463
3464 static inline enum data_dependence_direction
3465 dir_from_dist (int dist)
3466 {
3467   if (dist > 0)
3468     return dir_positive;
3469   else if (dist < 0)
3470     return dir_negative;
3471   else
3472     return dir_equal;
3473 }
3474
3475 /* Compute the classic per loop direction vector.  DDR is the data
3476    dependence relation to build a vector from.  */
3477
3478 static void
3479 build_classic_dir_vector (struct data_dependence_relation *ddr)
3480 {
3481   unsigned i, j;
3482   lambda_vector dist_v;
3483
3484   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3485     {
3486       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3487
3488       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3489         dir_v[j] = dir_from_dist (dist_v[j]);
3490
3491       save_dir_v (ddr, dir_v);
3492     }
3493 }
3494
3495 /* Helper function.  Returns true when there is a dependence between
3496    data references DRA and DRB.  */
3497
3498 static bool
3499 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3500                                struct data_reference *dra,
3501                                struct data_reference *drb,
3502                                struct loop *loop_nest)
3503 {
3504   unsigned int i;
3505   tree last_conflicts;
3506   struct subscript *subscript;
3507   tree res = NULL_TREE;
3508
3509   for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3510        i++)
3511     {
3512       conflict_function *overlaps_a, *overlaps_b;
3513
3514       analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3515                                       DR_ACCESS_FN (drb, i),
3516                                       &overlaps_a, &overlaps_b,
3517                                       &last_conflicts, loop_nest);
3518
3519       if (SUB_CONFLICTS_IN_A (subscript))
3520         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3521       if (SUB_CONFLICTS_IN_B (subscript))
3522         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3523
3524       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3525       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3526       SUB_LAST_CONFLICT (subscript) = last_conflicts;
3527
3528       /* If there is any undetermined conflict function we have to
3529          give a conservative answer in case we cannot prove that
3530          no dependence exists when analyzing another subscript.  */
3531       if (CF_NOT_KNOWN_P (overlaps_a)
3532           || CF_NOT_KNOWN_P (overlaps_b))
3533         {
3534           res = chrec_dont_know;
3535           continue;
3536         }
3537
3538       /* When there is a subscript with no dependence we can stop.  */
3539       else if (CF_NO_DEPENDENCE_P (overlaps_a)
3540                || CF_NO_DEPENDENCE_P (overlaps_b))
3541         {
3542           res = chrec_known;
3543           break;
3544         }
3545     }
3546
3547   if (res == NULL_TREE)
3548     return true;
3549
3550   if (res == chrec_known)
3551     dependence_stats.num_dependence_independent++;
3552   else
3553     dependence_stats.num_dependence_undetermined++;
3554   finalize_ddr_dependent (ddr, res);
3555   return false;
3556 }
3557
3558 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
3559
3560 static void
3561 subscript_dependence_tester (struct data_dependence_relation *ddr,
3562                              struct loop *loop_nest)
3563 {
3564
3565   if (dump_file && (dump_flags & TDF_DETAILS))
3566     fprintf (dump_file, "(subscript_dependence_tester \n");
3567
3568   if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3569     dependence_stats.num_dependence_dependent++;
3570
3571   compute_subscript_distance (ddr);
3572   if (build_classic_dist_vector (ddr, loop_nest))
3573     build_classic_dir_vector (ddr);
3574
3575   if (dump_file && (dump_flags & TDF_DETAILS))
3576     fprintf (dump_file, ")\n");
3577 }
3578
3579 /* Returns true when all the access functions of A are affine or
3580    constant with respect to LOOP_NEST.  */
3581
3582 static bool
3583 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3584                                            const struct loop *loop_nest)
3585 {
3586   unsigned int i;
3587   VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3588   tree t;
3589
3590   FOR_EACH_VEC_ELT (tree, fns, i, t)
3591     if (!evolution_function_is_invariant_p (t, loop_nest->num)
3592         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3593       return false;
3594
3595   return true;
3596 }
3597
3598 /* Initializes an equation for an OMEGA problem using the information
3599    contained in the ACCESS_FUN.  Returns true when the operation
3600    succeeded.
3601
3602    PB is the omega constraint system.
3603    EQ is the number of the equation to be initialized.
3604    OFFSET is used for shifting the variables names in the constraints:
3605    a constrain is composed of 2 * the number of variables surrounding
3606    dependence accesses.  OFFSET is set either to 0 for the first n variables,
3607    then it is set to n.
3608    ACCESS_FUN is expected to be an affine chrec.  */
3609
3610 static bool
3611 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3612                        unsigned int offset, tree access_fun,
3613                        struct data_dependence_relation *ddr)
3614 {
3615   switch (TREE_CODE (access_fun))
3616     {
3617     case POLYNOMIAL_CHREC:
3618       {
3619         tree left = CHREC_LEFT (access_fun);
3620         tree right = CHREC_RIGHT (access_fun);
3621         int var = CHREC_VARIABLE (access_fun);
3622         unsigned var_idx;
3623
3624         if (TREE_CODE (right) != INTEGER_CST)
3625           return false;
3626
3627         var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3628         pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3629
3630         /* Compute the innermost loop index.  */
3631         DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3632
3633         if (offset == 0)
3634           pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3635             += int_cst_value (right);
3636
3637         switch (TREE_CODE (left))
3638           {
3639           case POLYNOMIAL_CHREC:
3640             return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3641
3642           case INTEGER_CST:
3643             pb->eqs[eq].coef[0] += int_cst_value (left);
3644             return true;
3645
3646           default:
3647             return false;
3648           }
3649       }
3650
3651     case INTEGER_CST:
3652       pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3653       return true;
3654
3655     default:
3656       return false;
3657     }
3658 }
3659
3660 /* As explained in the comments preceding init_omega_for_ddr, we have
3661    to set up a system for each loop level, setting outer loops
3662    variation to zero, and current loop variation to positive or zero.
3663    Save each lexico positive distance vector.  */
3664
3665 static void
3666 omega_extract_distance_vectors (omega_pb pb,
3667                                 struct data_dependence_relation *ddr)
3668 {
3669   int eq, geq;
3670   unsigned i, j;
3671   struct loop *loopi, *loopj;
3672   enum omega_result res;
3673
3674   /* Set a new problem for each loop in the nest.  The basis is the
3675      problem that we have initialized until now.  On top of this we
3676      add new constraints.  */
3677   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3678          && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3679     {
3680       int dist = 0;
3681       omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3682                                            DDR_NB_LOOPS (ddr));
3683
3684       omega_copy_problem (copy, pb);
3685
3686       /* For all the outer loops "loop_j", add "dj = 0".  */
3687       for (j = 0;
3688            j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3689         {
3690           eq = omega_add_zero_eq (copy, omega_black);
3691           copy->eqs[eq].coef[j + 1] = 1;
3692         }
3693
3694       /* For "loop_i", add "0 <= di".  */
3695       geq = omega_add_zero_geq (copy, omega_black);
3696       copy->geqs[geq].coef[i + 1] = 1;
3697
3698       /* Reduce the constraint system, and test that the current
3699          problem is feasible.  */
3700       res = omega_simplify_problem (copy);
3701       if (res == omega_false
3702           || res == omega_unknown
3703           || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3704         goto next_problem;
3705
3706       for (eq = 0; eq < copy->num_subs; eq++)
3707         if (copy->subs[eq].key == (int) i + 1)
3708           {
3709             dist = copy->subs[eq].coef[0];
3710             goto found_dist;
3711           }
3712
3713       if (dist == 0)
3714         {
3715           /* Reinitialize problem...  */
3716           omega_copy_problem (copy, pb);
3717           for (j = 0;
3718                j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3719             {
3720               eq = omega_add_zero_eq (copy, omega_black);
3721               copy->eqs[eq].coef[j + 1] = 1;
3722             }
3723
3724           /* ..., but this time "di = 1".  */
3725           eq = omega_add_zero_eq (copy, omega_black);
3726           copy->eqs[eq].coef[i + 1] = 1;
3727           copy->eqs[eq].coef[0] = -1;
3728
3729           res = omega_simplify_problem (copy);
3730           if (res == omega_false
3731               || res == omega_unknown
3732               || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3733             goto next_problem;
3734
3735           for (eq = 0; eq < copy->num_subs; eq++)
3736             if (copy->subs[eq].key == (int) i + 1)
3737               {
3738                 dist = copy->subs[eq].coef[0];
3739                 goto found_dist;
3740               }
3741         }
3742
3743     found_dist:;
3744       /* Save the lexicographically positive distance vector.  */
3745       if (dist >= 0)
3746         {
3747           lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3748           lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3749
3750           dist_v[i] = dist;
3751
3752           for (eq = 0; eq < copy->num_subs; eq++)
3753             if (copy->subs[eq].key > 0)
3754               {
3755                 dist = copy->subs[eq].coef[0];
3756                 dist_v[copy->subs[eq].key - 1] = dist;
3757               }
3758
3759           for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3760             dir_v[j] = dir_from_dist (dist_v[j]);
3761
3762           save_dist_v (ddr, dist_v);
3763           save_dir_v (ddr, dir_v);
3764         }
3765
3766     next_problem:;
3767       omega_free_problem (copy);
3768     }
3769 }
3770
3771 /* This is called for each subscript of a tuple of data references:
3772    insert an equality for representing the conflicts.  */
3773
3774 static bool
3775 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3776                        struct data_dependence_relation *ddr,
3777                        omega_pb pb, bool *maybe_dependent)
3778 {
3779   int eq;
3780   tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3781                                      TREE_TYPE (access_fun_b));
3782   tree fun_a = chrec_convert (type, access_fun_a, NULL);
3783   tree fun_b = chrec_convert (type, access_fun_b, NULL);
3784   tree difference = chrec_fold_minus (type, fun_a, fun_b);
3785   tree minus_one;
3786
3787   /* When the fun_a - fun_b is not constant, the dependence is not
3788      captured by the classic distance vector representation.  */
3789   if (TREE_CODE (difference) != INTEGER_CST)
3790     return false;
3791
3792   /* ZIV test.  */
3793   if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3794     {
3795       /* There is no dependence.  */
3796       *maybe_dependent = false;
3797       return true;
3798     }
3799
3800   minus_one = build_int_cst (type, -1);
3801   fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3802
3803   eq = omega_add_zero_eq (pb, omega_black);
3804   if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3805       || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3806     /* There is probably a dependence, but the system of
3807        constraints cannot be built: answer "don't know".  */
3808     return false;
3809
3810   /* GCD test.  */
3811   if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3812       && !int_divides_p (lambda_vector_gcd
3813                          ((lambda_vector) &(pb->eqs[eq].coef[1]),
3814                           2 * DDR_NB_LOOPS (ddr)),
3815                          pb->eqs[eq].coef[0]))
3816     {
3817       /* There is no dependence.  */
3818       *maybe_dependent = false;
3819       return true;
3820     }
3821
3822   return true;
3823 }
3824
3825 /* Helper function, same as init_omega_for_ddr but specialized for
3826    data references A and B.  */
3827
3828 static bool
3829 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3830                       struct data_dependence_relation *ddr,
3831                       omega_pb pb, bool *maybe_dependent)
3832 {
3833   unsigned i;
3834   int ineq;
3835   struct loop *loopi;
3836   unsigned nb_loops = DDR_NB_LOOPS (ddr);
3837
3838   /* Insert an equality per subscript.  */
3839   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3840     {
3841       if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3842                                   ddr, pb, maybe_dependent))
3843         return false;
3844       else if (*maybe_dependent == false)
3845         {
3846           /* There is no dependence.  */
3847           DDR_ARE_DEPENDENT (ddr) = chrec_known;
3848           return true;
3849         }
3850     }
3851
3852   /* Insert inequalities: constraints corresponding to the iteration
3853      domain, i.e. the loops surrounding the references "loop_x" and
3854      the distance variables "dx".  The layout of the OMEGA
3855      representation is as follows:
3856      - coef[0] is the constant
3857      - coef[1..nb_loops] are the protected variables that will not be
3858      removed by the solver: the "dx"
3859      - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3860   */
3861   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3862          && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3863     {
3864       HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3865
3866       /* 0 <= loop_x */
3867       ineq = omega_add_zero_geq (pb, omega_black);
3868       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3869
3870       /* 0 <= loop_x + dx */
3871       ineq = omega_add_zero_geq (pb, omega_black);
3872       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3873       pb->geqs[ineq].coef[i + 1] = 1;
3874
3875       if (nbi != -1)
3876         {
3877           /* loop_x <= nb_iters */
3878           ineq = omega_add_zero_geq (pb, omega_black);
3879           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3880           pb->geqs[ineq].coef[0] = nbi;
3881
3882           /* loop_x + dx <= nb_iters */
3883           ineq = omega_add_zero_geq (pb, omega_black);
3884           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3885           pb->geqs[ineq].coef[i + 1] = -1;
3886           pb->geqs[ineq].coef[0] = nbi;
3887
3888           /* A step "dx" bigger than nb_iters is not feasible, so
3889              add "0 <= nb_iters + dx",  */
3890           ineq = omega_add_zero_geq (pb, omega_black);
3891           pb->geqs[ineq].coef[i + 1] = 1;
3892           pb->geqs[ineq].coef[0] = nbi;
3893           /* and "dx <= nb_iters".  */
3894           ineq = omega_add_zero_geq (pb, omega_black);
3895           pb->geqs[ineq].coef[i + 1] = -1;
3896           pb->geqs[ineq].coef[0] = nbi;
3897         }
3898     }
3899
3900   omega_extract_distance_vectors (pb, ddr);
3901
3902   return true;
3903 }
3904
3905 /* Sets up the Omega dependence problem for the data dependence
3906    relation DDR.  Returns false when the constraint system cannot be
3907    built, ie. when the test answers "don't know".  Returns true
3908    otherwise, and when independence has been proved (using one of the
3909    trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3910    set MAYBE_DEPENDENT to true.
3911
3912    Example: for setting up the dependence system corresponding to the
3913    conflicting accesses
3914
3915    | loop_i
3916    |   loop_j
3917    |     A[i, i+1] = ...
3918    |     ... A[2*j, 2*(i + j)]
3919    |   endloop_j
3920    | endloop_i
3921
3922    the following constraints come from the iteration domain:
3923
3924    0 <= i <= Ni
3925    0 <= i + di <= Ni
3926    0 <= j <= Nj
3927    0 <= j + dj <= Nj
3928
3929    where di, dj are the distance variables.  The constraints
3930    representing the conflicting elements are:
3931
3932    i = 2 * (j + dj)
3933    i + 1 = 2 * (i + di + j + dj)
3934
3935    For asking that the resulting distance vector (di, dj) be
3936    lexicographically positive, we insert the constraint "di >= 0".  If
3937    "di = 0" in the solution, we fix that component to zero, and we
3938    look at the inner loops: we set a new problem where all the outer
3939    loop distances are zero, and fix this inner component to be
3940    positive.  When one of the components is positive, we save that
3941    distance, and set a new problem where the distance on this loop is
3942    zero, searching for other distances in the inner loops.  Here is
3943    the classic example that illustrates that we have to set for each
3944    inner loop a new problem:
3945
3946    | loop_1
3947    |   loop_2
3948    |     A[10]
3949    |   endloop_2
3950    | endloop_1
3951
3952    we have to save two distances (1, 0) and (0, 1).
3953
3954    Given two array references, refA and refB, we have to set the
3955    dependence problem twice, refA vs. refB and refB vs. refA, and we
3956    cannot do a single test, as refB might occur before refA in the
3957    inner loops, and the contrary when considering outer loops: ex.
3958
3959    | loop_0
3960    |   loop_1
3961    |     loop_2
3962    |       T[{1,+,1}_2][{1,+,1}_1]  // refA
3963    |       T[{2,+,1}_2][{0,+,1}_1]  // refB
3964    |     endloop_2
3965    |   endloop_1
3966    | endloop_0
3967
3968    refB touches the elements in T before refA, and thus for the same
3969    loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3970    but for successive loop_0 iterations, we have (1, -1, 1)
3971
3972    The Omega solver expects the distance variables ("di" in the
3973    previous example) to come first in the constraint system (as
3974    variables to be protected, or "safe" variables), the constraint
3975    system is built using the following layout:
3976
3977    "cst | distance vars | index vars".
3978 */
3979
3980 static bool
3981 init_omega_for_ddr (struct data_dependence_relation *ddr,
3982                     bool *maybe_dependent)
3983 {
3984   omega_pb pb;
3985   bool res = false;
3986
3987   *maybe_dependent = true;
3988
3989   if (same_access_functions (ddr))
3990     {
3991       unsigned j;
3992       lambda_vector dir_v;
3993
3994       /* Save the 0 vector.  */
3995       save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3996       dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3997       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3998         dir_v[j] = dir_equal;
3999       save_dir_v (ddr, dir_v);
4000
4001       /* Save the dependences carried by outer loops.  */
4002       pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4003       res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4004                                   maybe_dependent);
4005       omega_free_problem (pb);
4006       return res;
4007     }
4008
4009   /* Omega expects the protected variables (those that have to be kept
4010      after elimination) to appear first in the constraint system.
4011      These variables are the distance variables.  In the following
4012      initialization we declare NB_LOOPS safe variables, and the total
4013      number of variables for the constraint system is 2*NB_LOOPS.  */
4014   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4015   res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4016                               maybe_dependent);
4017   omega_free_problem (pb);
4018
4019   /* Stop computation if not decidable, or no dependence.  */
4020   if (res == false || *maybe_dependent == false)
4021     return res;
4022
4023   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4024   res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4025                               maybe_dependent);
4026   omega_free_problem (pb);
4027
4028   return res;
4029 }
4030
4031 /* Return true when DDR contains the same information as that stored
4032    in DIR_VECTS and in DIST_VECTS, return false otherwise.   */
4033
4034 static bool
4035 ddr_consistent_p (FILE *file,
4036                   struct data_dependence_relation *ddr,
4037                   VEC (lambda_vector, heap) *dist_vects,
4038                   VEC (lambda_vector, heap) *dir_vects)
4039 {
4040   unsigned int i, j;
4041
4042   /* If dump_file is set, output there.  */
4043   if (dump_file && (dump_flags & TDF_DETAILS))
4044     file = dump_file;
4045
4046   if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
4047     {
4048       lambda_vector b_dist_v;
4049       fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4050                VEC_length (lambda_vector, dist_vects),
4051                DDR_NUM_DIST_VECTS (ddr));
4052
4053       fprintf (file, "Banerjee dist vectors:\n");
4054       FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
4055         print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4056
4057       fprintf (file, "Omega dist vectors:\n");
4058       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4059         print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4060
4061       fprintf (file, "data dependence relation:\n");
4062       dump_data_dependence_relation (file, ddr);
4063
4064       fprintf (file, ")\n");
4065       return false;
4066     }
4067
4068   if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
4069     {
4070       fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4071                VEC_length (lambda_vector, dir_vects),
4072                DDR_NUM_DIR_VECTS (ddr));
4073       return false;
4074     }
4075
4076   for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4077     {
4078       lambda_vector a_dist_v;
4079       lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4080
4081       /* Distance vectors are not ordered in the same way in the DDR
4082          and in the DIST_VECTS: search for a matching vector.  */
4083       FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
4084         if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4085           break;
4086
4087       if (j == VEC_length (lambda_vector, dist_vects))
4088         {
4089           fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4090           print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4091           fprintf (file, "not found in Omega dist vectors:\n");
4092           print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4093           fprintf (file, "data dependence relation:\n");
4094           dump_data_dependence_relation (file, ddr);
4095           fprintf (file, ")\n");
4096         }
4097     }
4098
4099   for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4100     {
4101       lambda_vector a_dir_v;
4102       lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4103
4104       /* Direction vectors are not ordered in the same way in the DDR
4105          and in the DIR_VECTS: search for a matching vector.  */
4106       FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
4107         if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4108           break;
4109
4110       if (j == VEC_length (lambda_vector, dist_vects))
4111         {
4112           fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4113           print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4114           fprintf (file, "not found in Omega dir vectors:\n");
4115           print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4116           fprintf (file, "data dependence relation:\n");
4117           dump_data_dependence_relation (file, ddr);
4118           fprintf (file, ")\n");
4119         }
4120     }
4121
4122   return true;
4123 }
4124
4125 /* This computes the affine dependence relation between A and B with
4126    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4127    independence between two accesses, while CHREC_DONT_KNOW is used
4128    for representing the unknown relation.
4129
4130    Note that it is possible to stop the computation of the dependence
4131    relation the first time we detect a CHREC_KNOWN element for a given
4132    subscript.  */
4133
4134 static void
4135 compute_affine_dependence (struct data_dependence_relation *ddr,
4136                            struct loop *loop_nest)
4137 {
4138   struct data_reference *dra = DDR_A (ddr);
4139   struct data_reference *drb = DDR_B (ddr);
4140
4141   if (dump_file && (dump_flags & TDF_DETAILS))
4142     {
4143       fprintf (dump_file, "(compute_affine_dependence\n");
4144       fprintf (dump_file, "  stmt_a: ");
4145       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4146       fprintf (dump_file, "  stmt_b: ");
4147       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4148     }
4149
4150   /* Analyze only when the dependence relation is not yet known.  */
4151   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4152     {
4153       dependence_stats.num_dependence_tests++;
4154
4155       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4156           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4157         {
4158           if (flag_check_data_deps)
4159             {
4160               /* Compute the dependences using the first algorithm.  */
4161               subscript_dependence_tester (ddr, loop_nest);
4162
4163               if (dump_file && (dump_flags & TDF_DETAILS))
4164                 {
4165                   fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4166                   dump_data_dependence_relation (dump_file, ddr);
4167                 }
4168
4169               if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4170                 {
4171                   bool maybe_dependent;
4172                   VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4173
4174                   /* Save the result of the first DD analyzer.  */
4175                   dist_vects = DDR_DIST_VECTS (ddr);
4176                   dir_vects = DDR_DIR_VECTS (ddr);
4177
4178                   /* Reset the information.  */
4179                   DDR_DIST_VECTS (ddr) = NULL;
4180                   DDR_DIR_VECTS (ddr) = NULL;
4181
4182                   /* Compute the same information using Omega.  */
4183                   if (!init_omega_for_ddr (ddr, &maybe_dependent))
4184                     goto csys_dont_know;
4185
4186                   if (dump_file && (dump_flags & TDF_DETAILS))
4187                     {
4188                       fprintf (dump_file, "Omega Analyzer\n");
4189                       dump_data_dependence_relation (dump_file, ddr);
4190                     }
4191
4192                   /* Check that we get the same information.  */
4193                   if (maybe_dependent)
4194                     gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4195                                                   dir_vects));
4196                 }
4197             }
4198           else
4199             subscript_dependence_tester (ddr, loop_nest);
4200         }
4201
4202       /* As a last case, if the dependence cannot be determined, or if
4203          the dependence is considered too difficult to determine, answer
4204          "don't know".  */
4205       else
4206         {
4207         csys_dont_know:;
4208           dependence_stats.num_dependence_undetermined++;
4209
4210           if (dump_file && (dump_flags & TDF_DETAILS))
4211             {
4212               fprintf (dump_file, "Data ref a:\n");
4213               dump_data_reference (dump_file, dra);
4214               fprintf (dump_file, "Data ref b:\n");
4215               dump_data_reference (dump_file, drb);
4216               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4217             }
4218           finalize_ddr_dependent (ddr, chrec_dont_know);
4219         }
4220     }
4221
4222   if (dump_file && (dump_flags & TDF_DETAILS))
4223     {
4224       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4225         fprintf (dump_file, ") -> no dependence\n");
4226       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4227         fprintf (dump_file, ") -> dependence analysis failed\n");
4228       else
4229         fprintf (dump_file, ")\n");
4230     }
4231 }
4232
4233 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4234    the data references in DATAREFS, in the LOOP_NEST.  When
4235    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4236    relations.  Return true when successful, i.e. data references number
4237    is small enough to be handled.  */
4238
4239 bool
4240 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4241                          VEC (ddr_p, heap) **dependence_relations,
4242                          VEC (loop_p, heap) *loop_nest,
4243                          bool compute_self_and_rr)
4244 {
4245   struct data_dependence_relation *ddr;
4246   struct data_reference *a, *b;
4247   unsigned int i, j;
4248
4249   if ((int) VEC_length (data_reference_p, datarefs)
4250       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4251     {
4252       struct data_dependence_relation *ddr;
4253
4254       /* Insert a single relation into dependence_relations:
4255          chrec_dont_know.  */
4256       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4257       VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4258       return false;
4259     }
4260
4261   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4262     for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4263       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4264         {
4265           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4266           VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4267           if (loop_nest)
4268             compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4269         }
4270
4271   if (compute_self_and_rr)
4272     FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4273       {
4274         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4275         VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4276         if (loop_nest)
4277           compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4278       }
4279
4280   return true;
4281 }
4282
4283 /* Describes a location of a memory reference.  */
4284
4285 typedef struct data_ref_loc_d
4286 {
4287     /* Position of the memory reference.  */
4288     tree *pos;
4289
4290       /* True if the memory reference is read.  */
4291       bool is_read;
4292 } data_ref_loc;
4293
4294 DEF_VEC_O (data_ref_loc);
4295 DEF_VEC_ALLOC_O (data_ref_loc, heap);
4296
4297 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4298    true if STMT clobbers memory, false otherwise.  */
4299
4300 static bool
4301 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4302 {
4303   bool clobbers_memory = false;
4304   data_ref_loc *ref;
4305   tree *op0, *op1;
4306   enum gimple_code stmt_code = gimple_code (stmt);
4307
4308   *references = NULL;
4309
4310   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4311      Calls have side-effects, except those to const or pure
4312      functions.  */
4313   if ((stmt_code == GIMPLE_CALL
4314        && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4315       || (stmt_code == GIMPLE_ASM
4316           && (gimple_asm_volatile_p (stmt) || gimple_vuse (stmt))))
4317     clobbers_memory = true;
4318
4319   if (!gimple_vuse (stmt))
4320     return clobbers_memory;
4321
4322   if (stmt_code == GIMPLE_ASSIGN)
4323     {
4324       tree base;
4325       op0 = gimple_assign_lhs_ptr (stmt);
4326       op1 = gimple_assign_rhs1_ptr (stmt);
4327
4328       if (DECL_P (*op1)
4329           || (REFERENCE_CLASS_P (*op1)
4330               && (base = get_base_address (*op1))
4331               && TREE_CODE (base) != SSA_NAME))
4332         {
4333           ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4334           ref->pos = op1;
4335           ref->is_read = true;
4336         }
4337     }
4338   else if (stmt_code == GIMPLE_CALL)
4339     {
4340       unsigned i, n;
4341
4342       op0 = gimple_call_lhs_ptr (stmt);
4343       n = gimple_call_num_args (stmt);
4344       for (i = 0; i < n; i++)
4345         {
4346           op1 = gimple_call_arg_ptr (stmt, i);
4347
4348           if (DECL_P (*op1)
4349               || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4350             {
4351               ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4352               ref->pos = op1;
4353               ref->is_read = true;
4354             }
4355         }
4356     }
4357   else
4358     return clobbers_memory;
4359
4360   if (*op0
4361       && (DECL_P (*op0)
4362           || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4363     {
4364       ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4365       ref->pos = op0;
4366       ref->is_read = false;
4367     }
4368   return clobbers_memory;
4369 }
4370
4371 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4372    reference, returns false, otherwise returns true.  NEST is the outermost
4373    loop of the loop nest in which the references should be analyzed.  */
4374
4375 bool
4376 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4377                               VEC (data_reference_p, heap) **datarefs)
4378 {
4379   unsigned i;
4380   VEC (data_ref_loc, heap) *references;
4381   data_ref_loc *ref;
4382   bool ret = true;
4383   data_reference_p dr;
4384
4385   if (get_references_in_stmt (stmt, &references))
4386     {
4387       VEC_free (data_ref_loc, heap, references);
4388       return false;
4389     }
4390
4391   FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4392     {
4393       dr = create_data_ref (nest, loop_containing_stmt (stmt),
4394                             *ref->pos, stmt, ref->is_read);
4395       gcc_assert (dr != NULL);
4396       VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4397     }
4398   VEC_free (data_ref_loc, heap, references);
4399   return ret;
4400 }
4401
4402 /* Stores the data references in STMT to DATAREFS.  If there is an
4403    unanalyzable reference, returns false, otherwise returns true.
4404    NEST is the outermost loop of the loop nest in which the references
4405    should be instantiated, LOOP is the loop in which the references
4406    should be analyzed.  */
4407
4408 bool
4409 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4410                                        VEC (data_reference_p, heap) **datarefs)
4411 {
4412   unsigned i;
4413   VEC (data_ref_loc, heap) *references;
4414   data_ref_loc *ref;
4415   bool ret = true;
4416   data_reference_p dr;
4417
4418   if (get_references_in_stmt (stmt, &references))
4419     {
4420       VEC_free (data_ref_loc, heap, references);
4421       return false;
4422     }
4423
4424   FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4425     {
4426       dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4427       gcc_assert (dr != NULL);
4428       VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4429     }
4430
4431   VEC_free (data_ref_loc, heap, references);
4432   return ret;
4433 }
4434
4435 /* Search the data references in LOOP, and record the information into
4436    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4437    difficult case, returns NULL_TREE otherwise.  */
4438
4439 tree
4440 find_data_references_in_bb (struct loop *loop, basic_block bb,
4441                             VEC (data_reference_p, heap) **datarefs)
4442 {
4443   gimple_stmt_iterator bsi;
4444
4445   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4446     {
4447       gimple stmt = gsi_stmt (bsi);
4448
4449       if (!find_data_references_in_stmt (loop, stmt, datarefs))
4450         {
4451           struct data_reference *res;
4452           res = XCNEW (struct data_reference);
4453           VEC_safe_push (data_reference_p, heap, *datarefs, res);
4454
4455           return chrec_dont_know;
4456         }
4457     }
4458
4459   return NULL_TREE;
4460 }
4461
4462 /* Search the data references in LOOP, and record the information into
4463    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4464    difficult case, returns NULL_TREE otherwise.
4465
4466    TODO: This function should be made smarter so that it can handle address
4467    arithmetic as if they were array accesses, etc.  */
4468
4469 static tree
4470 find_data_references_in_loop (struct loop *loop,
4471                               VEC (data_reference_p, heap) **datarefs)
4472 {
4473   basic_block bb, *bbs;
4474   unsigned int i;
4475
4476   bbs = get_loop_body_in_dom_order (loop);
4477
4478   for (i = 0; i < loop->num_nodes; i++)
4479     {
4480       bb = bbs[i];
4481
4482       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4483         {
4484           free (bbs);
4485           return chrec_dont_know;
4486         }
4487     }
4488   free (bbs);
4489
4490   return NULL_TREE;
4491 }
4492
4493 /* Recursive helper function.  */
4494
4495 static bool
4496 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4497 {
4498   /* Inner loops of the nest should not contain siblings.  Example:
4499      when there are two consecutive loops,
4500
4501      | loop_0
4502      |   loop_1
4503      |     A[{0, +, 1}_1]
4504      |   endloop_1
4505      |   loop_2
4506      |     A[{0, +, 1}_2]
4507      |   endloop_2
4508      | endloop_0
4509
4510      the dependence relation cannot be captured by the distance
4511      abstraction.  */
4512   if (loop->next)
4513     return false;
4514
4515   VEC_safe_push (loop_p, heap, *loop_nest, loop);
4516   if (loop->inner)
4517     return find_loop_nest_1 (loop->inner, loop_nest);
4518   return true;
4519 }
4520
4521 /* Return false when the LOOP is not well nested.  Otherwise return
4522    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4523    contain the loops from the outermost to the innermost, as they will
4524    appear in the classic distance vector.  */
4525
4526 bool
4527 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4528 {
4529   VEC_safe_push (loop_p, heap, *loop_nest, loop);
4530   if (loop->inner)
4531     return find_loop_nest_1 (loop->inner, loop_nest);
4532   return true;
4533 }
4534
4535 /* Returns true when the data dependences have been computed, false otherwise.
4536    Given a loop nest LOOP, the following vectors are returned:
4537    DATAREFS is initialized to all the array elements contained in this loop,
4538    DEPENDENCE_RELATIONS contains the relations between the data references.
4539    Compute read-read and self relations if
4540    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4541
4542 bool
4543 compute_data_dependences_for_loop (struct loop *loop,
4544                                    bool compute_self_and_read_read_dependences,
4545                                    VEC (loop_p, heap) **loop_nest,
4546                                    VEC (data_reference_p, heap) **datarefs,
4547                                    VEC (ddr_p, heap) **dependence_relations)
4548 {
4549   bool res = true;
4550
4551   memset (&dependence_stats, 0, sizeof (dependence_stats));
4552
4553   /* If the loop nest is not well formed, or one of the data references
4554      is not computable, give up without spending time to compute other
4555      dependences.  */
4556   if (!loop
4557       || !find_loop_nest (loop, loop_nest)
4558       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4559       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4560                                    compute_self_and_read_read_dependences))
4561     res = false;
4562
4563   if (dump_file && (dump_flags & TDF_STATS))
4564     {
4565       fprintf (dump_file, "Dependence tester statistics:\n");
4566
4567       fprintf (dump_file, "Number of dependence tests: %d\n",
4568                dependence_stats.num_dependence_tests);
4569       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4570                dependence_stats.num_dependence_dependent);
4571       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4572                dependence_stats.num_dependence_independent);
4573       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4574                dependence_stats.num_dependence_undetermined);
4575
4576       fprintf (dump_file, "Number of subscript tests: %d\n",
4577                dependence_stats.num_subscript_tests);
4578       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4579                dependence_stats.num_subscript_undetermined);
4580       fprintf (dump_file, "Number of same subscript function: %d\n",
4581                dependence_stats.num_same_subscript_function);
4582
4583       fprintf (dump_file, "Number of ziv tests: %d\n",
4584                dependence_stats.num_ziv);
4585       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4586                dependence_stats.num_ziv_dependent);
4587       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4588                dependence_stats.num_ziv_independent);
4589       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4590                dependence_stats.num_ziv_unimplemented);
4591
4592       fprintf (dump_file, "Number of siv tests: %d\n",
4593                dependence_stats.num_siv);
4594       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4595                dependence_stats.num_siv_dependent);
4596       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4597                dependence_stats.num_siv_independent);
4598       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4599                dependence_stats.num_siv_unimplemented);
4600
4601       fprintf (dump_file, "Number of miv tests: %d\n",
4602                dependence_stats.num_miv);
4603       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4604                dependence_stats.num_miv_dependent);
4605       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4606                dependence_stats.num_miv_independent);
4607       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4608                dependence_stats.num_miv_unimplemented);
4609     }
4610
4611   return res;
4612 }
4613
4614 /* Returns true when the data dependences for the basic block BB have been
4615    computed, false otherwise.
4616    DATAREFS is initialized to all the array elements contained in this basic
4617    block, DEPENDENCE_RELATIONS contains the relations between the data
4618    references. Compute read-read and self relations if
4619    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4620 bool
4621 compute_data_dependences_for_bb (basic_block bb,
4622                                  bool compute_self_and_read_read_dependences,
4623                                  VEC (data_reference_p, heap) **datarefs,
4624                                  VEC (ddr_p, heap) **dependence_relations)
4625 {
4626   if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4627     return false;
4628
4629   return compute_all_dependences (*datarefs, dependence_relations, NULL,
4630                                   compute_self_and_read_read_dependences);
4631 }
4632
4633 /* Entry point (for testing only).  Analyze all the data references
4634    and the dependence relations in LOOP.
4635
4636    The data references are computed first.
4637
4638    A relation on these nodes is represented by a complete graph.  Some
4639    of the relations could be of no interest, thus the relations can be
4640    computed on demand.
4641
4642    In the following function we compute all the relations.  This is
4643    just a first implementation that is here for:
4644    - for showing how to ask for the dependence relations,
4645    - for the debugging the whole dependence graph,
4646    - for the dejagnu testcases and maintenance.
4647
4648    It is possible to ask only for a part of the graph, avoiding to
4649    compute the whole dependence graph.  The computed dependences are
4650    stored in a knowledge base (KB) such that later queries don't
4651    recompute the same information.  The implementation of this KB is
4652    transparent to the optimizer, and thus the KB can be changed with a
4653    more efficient implementation, or the KB could be disabled.  */
4654 static void
4655 analyze_all_data_dependences (struct loop *loop)
4656 {
4657   unsigned int i;
4658   int nb_data_refs = 10;
4659   VEC (data_reference_p, heap) *datarefs =
4660     VEC_alloc (data_reference_p, heap, nb_data_refs);
4661   VEC (ddr_p, heap) *dependence_relations =
4662     VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4663   VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4664
4665   /* Compute DDs on the whole function.  */
4666   compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4667                                      &dependence_relations);
4668
4669   if (dump_file)
4670     {
4671       dump_data_dependence_relations (dump_file, dependence_relations);
4672       fprintf (dump_file, "\n\n");
4673
4674       if (dump_flags & TDF_DETAILS)
4675         dump_dist_dir_vectors (dump_file, dependence_relations);
4676
4677       if (dump_flags & TDF_STATS)
4678         {
4679           unsigned nb_top_relations = 0;
4680           unsigned nb_bot_relations = 0;
4681           unsigned nb_chrec_relations = 0;
4682           struct data_dependence_relation *ddr;
4683
4684           FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4685             {
4686               if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4687                 nb_top_relations++;
4688
4689               else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4690                 nb_bot_relations++;
4691
4692               else
4693                 nb_chrec_relations++;
4694             }
4695
4696           gather_stats_on_scev_database ();
4697         }
4698     }
4699
4700   VEC_free (loop_p, heap, loop_nest);
4701   free_dependence_relations (dependence_relations);
4702   free_data_refs (datarefs);
4703 }
4704
4705 /* Computes all the data dependences and check that the results of
4706    several analyzers are the same.  */
4707
4708 void
4709 tree_check_data_deps (void)
4710 {
4711   loop_iterator li;
4712   struct loop *loop_nest;
4713
4714   FOR_EACH_LOOP (li, loop_nest, 0)
4715     analyze_all_data_dependences (loop_nest);
4716 }
4717
4718 /* Free the memory used by a data dependence relation DDR.  */
4719
4720 void
4721 free_dependence_relation (struct data_dependence_relation *ddr)
4722 {
4723   if (ddr == NULL)
4724     return;
4725
4726   if (DDR_SUBSCRIPTS (ddr))
4727     free_subscripts (DDR_SUBSCRIPTS (ddr));
4728   if (DDR_DIST_VECTS (ddr))
4729     VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4730   if (DDR_DIR_VECTS (ddr))
4731     VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4732
4733   free (ddr);
4734 }
4735
4736 /* Free the memory used by the data dependence relations from
4737    DEPENDENCE_RELATIONS.  */
4738
4739 void
4740 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4741 {
4742   unsigned int i;
4743   struct data_dependence_relation *ddr;
4744
4745   FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4746     if (ddr)
4747       free_dependence_relation (ddr);
4748
4749   VEC_free (ddr_p, heap, dependence_relations);
4750 }
4751
4752 /* Free the memory used by the data references from DATAREFS.  */
4753
4754 void
4755 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4756 {
4757   unsigned int i;
4758   struct data_reference *dr;
4759
4760   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4761     free_data_ref (dr);
4762   VEC_free (data_reference_p, heap, datarefs);
4763 }
4764
4765 \f
4766
4767 /* Dump vertex I in RDG to FILE.  */
4768
4769 static void
4770 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4771 {
4772   struct vertex *v = &(rdg->vertices[i]);
4773   struct graph_edge *e;
4774
4775   fprintf (file, "(vertex %d: (%s%s) (in:", i,
4776            RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4777            RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4778
4779   if (v->pred)
4780     for (e = v->pred; e; e = e->pred_next)
4781       fprintf (file, " %d", e->src);
4782
4783   fprintf (file, ") (out:");
4784
4785   if (v->succ)
4786     for (e = v->succ; e; e = e->succ_next)
4787       fprintf (file, " %d", e->dest);
4788
4789   fprintf (file, ")\n");
4790   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4791   fprintf (file, ")\n");
4792 }
4793
4794 /* Call dump_rdg_vertex on stderr.  */
4795
4796 DEBUG_FUNCTION void
4797 debug_rdg_vertex (struct graph *rdg, int i)
4798 {
4799   dump_rdg_vertex (stderr, rdg, i);
4800 }
4801
4802 /* Dump component C of RDG to FILE.  If DUMPED is non-null, set the
4803    dumped vertices to that bitmap.  */
4804
4805 static void
4806 dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4807 {
4808   int i;
4809
4810   fprintf (file, "(%d\n", c);
4811
4812   for (i = 0; i < rdg->n_vertices; i++)
4813     if (rdg->vertices[i].component == c)
4814       {
4815         if (dumped)
4816           bitmap_set_bit (dumped, i);
4817
4818         dump_rdg_vertex (file, rdg, i);
4819       }
4820
4821   fprintf (file, ")\n");
4822 }
4823
4824 /* Call dump_rdg_vertex on stderr.  */
4825
4826 DEBUG_FUNCTION void
4827 debug_rdg_component (struct graph *rdg, int c)
4828 {
4829   dump_rdg_component (stderr, rdg, c, NULL);
4830 }
4831
4832 /* Dump the reduced dependence graph RDG to FILE.  */
4833
4834 void
4835 dump_rdg (FILE *file, struct graph *rdg)
4836 {
4837   int i;
4838   bitmap dumped = BITMAP_ALLOC (NULL);
4839
4840   fprintf (file, "(rdg\n");
4841
4842   for (i = 0; i < rdg->n_vertices; i++)
4843     if (!bitmap_bit_p (dumped, i))
4844       dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4845
4846   fprintf (file, ")\n");
4847   BITMAP_FREE (dumped);
4848 }
4849
4850 /* Call dump_rdg on stderr.  */
4851
4852 DEBUG_FUNCTION void
4853 debug_rdg (struct graph *rdg)
4854 {
4855   dump_rdg (stderr, rdg);
4856 }
4857
4858 static void
4859 dot_rdg_1 (FILE *file, struct graph *rdg)
4860 {
4861   int i;
4862
4863   fprintf (file, "digraph RDG {\n");
4864
4865   for (i = 0; i < rdg->n_vertices; i++)
4866     {
4867       struct vertex *v = &(rdg->vertices[i]);
4868       struct graph_edge *e;
4869
4870       /* Highlight reads from memory.  */
4871       if (RDG_MEM_READS_STMT (rdg, i))
4872        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4873
4874       /* Highlight stores to memory.  */
4875       if (RDG_MEM_WRITE_STMT (rdg, i))
4876        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4877
4878       if (v->succ)
4879        for (e = v->succ; e; e = e->succ_next)
4880          switch (RDGE_TYPE (e))
4881            {
4882            case input_dd:
4883              fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4884              break;
4885
4886            case output_dd:
4887              fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4888              break;
4889
4890            case flow_dd:
4891              /* These are the most common dependences: don't print these. */
4892              fprintf (file, "%d -> %d \n", i, e->dest);
4893              break;
4894
4895            case anti_dd:
4896              fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4897              break;
4898
4899            default:
4900              gcc_unreachable ();
4901            }
4902     }
4903
4904   fprintf (file, "}\n\n");
4905 }
4906
4907 /* Display the Reduced Dependence Graph using dotty.  */
4908 extern void dot_rdg (struct graph *);
4909
4910 DEBUG_FUNCTION void
4911 dot_rdg (struct graph *rdg)
4912 {
4913   /* When debugging, enable the following code.  This cannot be used
4914      in production compilers because it calls "system".  */
4915 #if 0
4916   FILE *file = fopen ("/tmp/rdg.dot", "w");
4917   gcc_assert (file != NULL);
4918
4919   dot_rdg_1 (file, rdg);
4920   fclose (file);
4921
4922   system ("dotty /tmp/rdg.dot &");
4923 #else
4924   dot_rdg_1 (stderr, rdg);
4925 #endif
4926 }
4927
4928 /* This structure is used for recording the mapping statement index in
4929    the RDG.  */
4930
4931 struct GTY(()) rdg_vertex_info
4932 {
4933   gimple stmt;
4934   int index;
4935 };
4936
4937 /* Returns the index of STMT in RDG.  */
4938
4939 int
4940 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4941 {
4942   struct rdg_vertex_info rvi, *slot;
4943
4944   rvi.stmt = stmt;
4945   slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4946
4947   if (!slot)
4948     return -1;
4949
4950   return slot->index;
4951 }
4952
4953 /* Creates an edge in RDG for each distance vector from DDR.  The
4954    order that we keep track of in the RDG is the order in which
4955    statements have to be executed.  */
4956
4957 static void
4958 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4959 {
4960   struct graph_edge *e;
4961   int va, vb;
4962   data_reference_p dra = DDR_A (ddr);
4963   data_reference_p drb = DDR_B (ddr);
4964   unsigned level = ddr_dependence_level (ddr);
4965
4966   /* For non scalar dependences, when the dependence is REVERSED,
4967      statement B has to be executed before statement A.  */
4968   if (level > 0
4969       && !DDR_REVERSED_P (ddr))
4970     {
4971       data_reference_p tmp = dra;
4972       dra = drb;
4973       drb = tmp;
4974     }
4975
4976   va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4977   vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4978
4979   if (va < 0 || vb < 0)
4980     return;
4981
4982   e = add_edge (rdg, va, vb);
4983   e->data = XNEW (struct rdg_edge);
4984
4985   RDGE_LEVEL (e) = level;
4986   RDGE_RELATION (e) = ddr;
4987
4988   /* Determines the type of the data dependence.  */
4989   if (DR_IS_READ (dra) && DR_IS_READ (drb))
4990     RDGE_TYPE (e) = input_dd;
4991   else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4992     RDGE_TYPE (e) = output_dd;
4993   else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4994     RDGE_TYPE (e) = flow_dd;
4995   else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4996     RDGE_TYPE (e) = anti_dd;
4997 }
4998
4999 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
5000    the index of DEF in RDG.  */
5001
5002 static void
5003 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
5004 {
5005   use_operand_p imm_use_p;
5006   imm_use_iterator iterator;
5007
5008   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
5009     {
5010       struct graph_edge *e;
5011       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
5012
5013       if (use < 0)
5014         continue;
5015
5016       e = add_edge (rdg, idef, use);
5017       e->data = XNEW (struct rdg_edge);
5018       RDGE_TYPE (e) = flow_dd;
5019       RDGE_RELATION (e) = NULL;
5020     }
5021 }
5022
5023 /* Creates the edges of the reduced dependence graph RDG.  */
5024
5025 static void
5026 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
5027 {
5028   int i;
5029   struct data_dependence_relation *ddr;
5030   def_operand_p def_p;
5031   ssa_op_iter iter;
5032
5033   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
5034     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5035       create_rdg_edge_for_ddr (rdg, ddr);
5036
5037   for (i = 0; i < rdg->n_vertices; i++)
5038     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
5039                               iter, SSA_OP_DEF)
5040       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
5041 }
5042
5043 /* Build the vertices of the reduced dependence graph RDG.  */
5044
5045 void
5046 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
5047 {
5048   int i, j;
5049   gimple stmt;
5050
5051   FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
5052     {
5053       VEC (data_ref_loc, heap) *references;
5054       data_ref_loc *ref;
5055       struct vertex *v = &(rdg->vertices[i]);
5056       struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
5057       struct rdg_vertex_info **slot;
5058
5059       rvi->stmt = stmt;
5060       rvi->index = i;
5061       slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
5062
5063       if (!*slot)
5064         *slot = rvi;
5065       else
5066         free (rvi);
5067
5068       v->data = XNEW (struct rdg_vertex);
5069       RDG_STMT (rdg, i) = stmt;
5070
5071       RDG_MEM_WRITE_STMT (rdg, i) = false;
5072       RDG_MEM_READS_STMT (rdg, i) = false;
5073       if (gimple_code (stmt) == GIMPLE_PHI)
5074         continue;
5075
5076       get_references_in_stmt (stmt, &references);
5077       FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
5078         if (!ref->is_read)
5079           RDG_MEM_WRITE_STMT (rdg, i) = true;
5080         else
5081           RDG_MEM_READS_STMT (rdg, i) = true;
5082
5083       VEC_free (data_ref_loc, heap, references);
5084     }
5085 }
5086
5087 /* Initialize STMTS with all the statements of LOOP.  When
5088    INCLUDE_PHIS is true, include also the PHI nodes.  The order in
5089    which we discover statements is important as
5090    generate_loops_for_partition is using the same traversal for
5091    identifying statements. */
5092
5093 static void
5094 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5095 {
5096   unsigned int i;
5097   basic_block *bbs = get_loop_body_in_dom_order (loop);
5098
5099   for (i = 0; i < loop->num_nodes; i++)
5100     {
5101       basic_block bb = bbs[i];
5102       gimple_stmt_iterator bsi;
5103       gimple stmt;
5104
5105       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5106         VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5107
5108       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5109         {
5110           stmt = gsi_stmt (bsi);
5111           if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5112             VEC_safe_push (gimple, heap, *stmts, stmt);
5113         }
5114     }
5115
5116   free (bbs);
5117 }
5118
5119 /* Returns true when all the dependences are computable.  */
5120
5121 static bool
5122 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5123 {
5124   ddr_p ddr;
5125   unsigned int i;
5126
5127   FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5128     if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5129       return false;
5130
5131   return true;
5132 }
5133
5134 /* Computes a hash function for element ELT.  */
5135
5136 static hashval_t
5137 hash_stmt_vertex_info (const void *elt)
5138 {
5139   const struct rdg_vertex_info *const rvi =
5140     (const struct rdg_vertex_info *) elt;
5141   gimple stmt = rvi->stmt;
5142
5143   return htab_hash_pointer (stmt);
5144 }
5145
5146 /* Compares database elements E1 and E2.  */
5147
5148 static int
5149 eq_stmt_vertex_info (const void *e1, const void *e2)
5150 {
5151   const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5152   const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5153
5154   return elt1->stmt == elt2->stmt;
5155 }
5156
5157 /* Free the element E.  */
5158
5159 static void
5160 hash_stmt_vertex_del (void *e)
5161 {
5162   free (e);
5163 }
5164
5165 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5166    statement of the loop nest, and one edge per data dependence or
5167    scalar dependence.  */
5168
5169 struct graph *
5170 build_empty_rdg (int n_stmts)
5171 {
5172   int nb_data_refs = 10;
5173   struct graph *rdg = new_graph (n_stmts);
5174
5175   rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5176                               eq_stmt_vertex_info, hash_stmt_vertex_del);
5177   return rdg;
5178 }
5179
5180 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5181    statement of the loop nest, and one edge per data dependence or
5182    scalar dependence.  */
5183
5184 struct graph *
5185 build_rdg (struct loop *loop,
5186            VEC (loop_p, heap) **loop_nest,
5187            VEC (ddr_p, heap) **dependence_relations,
5188            VEC (data_reference_p, heap) **datarefs)
5189 {
5190   struct graph *rdg = NULL;
5191
5192   if (compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5193                                          dependence_relations)
5194       && known_dependences_p (*dependence_relations))
5195     {
5196       VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5197       stmts_from_loop (loop, &stmts);
5198       rdg = build_empty_rdg (VEC_length (gimple, stmts));
5199       create_rdg_vertices (rdg, stmts);
5200       create_rdg_edges (rdg, *dependence_relations);
5201       VEC_free (gimple, heap, stmts);
5202     }
5203
5204   return rdg;
5205 }
5206
5207 /* Free the reduced dependence graph RDG.  */
5208
5209 void
5210 free_rdg (struct graph *rdg)
5211 {
5212   int i;
5213
5214   for (i = 0; i < rdg->n_vertices; i++)
5215     {
5216       struct vertex *v = &(rdg->vertices[i]);
5217       struct graph_edge *e;
5218
5219       for (e = v->succ; e; e = e->succ_next)
5220         free (e->data);
5221
5222       free (v->data);
5223     }
5224
5225   htab_delete (rdg->indices);
5226   free_graph (rdg);
5227 }
5228
5229 /* Initialize STMTS with all the statements of LOOP that contain a
5230    store to memory.  */
5231
5232 void
5233 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5234 {
5235   unsigned int i;
5236   basic_block *bbs = get_loop_body_in_dom_order (loop);
5237
5238   for (i = 0; i < loop->num_nodes; i++)
5239     {
5240       basic_block bb = bbs[i];
5241       gimple_stmt_iterator bsi;
5242
5243       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5244         if (gimple_vdef (gsi_stmt (bsi)))
5245           VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5246     }
5247
5248   free (bbs);
5249 }
5250
5251 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5252    that contains a data reference on its LHS with a stride of the same
5253    size as its unit type.  */
5254
5255 bool
5256 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5257 {
5258   tree op0, op1;
5259   bool res;
5260   struct data_reference *dr;
5261
5262   if (!stmt
5263       || !gimple_vdef (stmt)
5264       || !is_gimple_assign (stmt)
5265       || !gimple_assign_single_p (stmt)
5266       || !(op1 = gimple_assign_rhs1 (stmt))
5267       || !(integer_zerop (op1) || real_zerop (op1)))
5268     return false;
5269
5270   dr = XCNEW (struct data_reference);
5271   op0 = gimple_assign_lhs (stmt);
5272
5273   DR_STMT (dr) = stmt;
5274   DR_REF (dr) = op0;
5275
5276   res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5277     && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5278
5279   free_data_ref (dr);
5280   return res;
5281 }
5282
5283 /* Initialize STMTS with all the statements of LOOP that contain a
5284    store to memory of the form "A[i] = 0".  */
5285
5286 void
5287 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5288 {
5289   unsigned int i;
5290   basic_block bb;
5291   gimple_stmt_iterator si;
5292   gimple stmt;
5293   basic_block *bbs = get_loop_body_in_dom_order (loop);
5294
5295   for (i = 0; i < loop->num_nodes; i++)
5296     for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5297       if ((stmt = gsi_stmt (si))
5298           && stmt_with_adjacent_zero_store_dr_p (stmt))
5299         VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5300
5301   free (bbs);
5302 }
5303
5304 /* For a data reference REF, return the declaration of its base
5305    address or NULL_TREE if the base is not determined.  */
5306
5307 static inline tree
5308 ref_base_address (gimple stmt, data_ref_loc *ref)
5309 {
5310   tree base = NULL_TREE;
5311   tree base_address;
5312   struct data_reference *dr = XCNEW (struct data_reference);
5313
5314   DR_STMT (dr) = stmt;
5315   DR_REF (dr) = *ref->pos;
5316   dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5317   base_address = DR_BASE_ADDRESS (dr);
5318
5319   if (!base_address)
5320     goto end;
5321
5322   switch (TREE_CODE (base_address))
5323     {
5324     case ADDR_EXPR:
5325       base = TREE_OPERAND (base_address, 0);
5326       break;
5327
5328     default:
5329       base = base_address;
5330       break;
5331     }
5332
5333  end:
5334   free_data_ref (dr);
5335   return base;
5336 }
5337
5338 /* Determines whether the statement from vertex V of the RDG has a
5339    definition used outside the loop that contains this statement.  */
5340
5341 bool
5342 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5343 {
5344   gimple stmt = RDG_STMT (rdg, v);
5345   struct loop *loop = loop_containing_stmt (stmt);
5346   use_operand_p imm_use_p;
5347   imm_use_iterator iterator;
5348   ssa_op_iter it;
5349   def_operand_p def_p;
5350
5351   if (!loop)
5352     return true;
5353
5354   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5355     {
5356       FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5357         {
5358           if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5359             return true;
5360         }
5361     }
5362
5363   return false;
5364 }
5365
5366 /* Determines whether statements S1 and S2 access to similar memory
5367    locations.  Two memory accesses are considered similar when they
5368    have the same base address declaration, i.e. when their
5369    ref_base_address is the same.  */
5370
5371 bool
5372 have_similar_memory_accesses (gimple s1, gimple s2)
5373 {
5374   bool res = false;
5375   unsigned i, j;
5376   VEC (data_ref_loc, heap) *refs1, *refs2;
5377   data_ref_loc *ref1, *ref2;
5378
5379   get_references_in_stmt (s1, &refs1);
5380   get_references_in_stmt (s2, &refs2);
5381
5382   FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5383     {
5384       tree base1 = ref_base_address (s1, ref1);
5385
5386       if (base1)
5387         FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5388           if (base1 == ref_base_address (s2, ref2))
5389             {
5390               res = true;
5391               goto end;
5392             }
5393     }
5394
5395  end:
5396   VEC_free (data_ref_loc, heap, refs1);
5397   VEC_free (data_ref_loc, heap, refs2);
5398   return res;
5399 }
5400
5401 /* Helper function for the hashtab.  */
5402
5403 static int
5404 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5405 {
5406   return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5407                                        CONST_CAST_GIMPLE ((const_gimple) s2));
5408 }
5409
5410 /* Helper function for the hashtab.  */
5411
5412 static hashval_t
5413 ref_base_address_1 (const void *s)
5414 {
5415   gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5416   unsigned i;
5417   VEC (data_ref_loc, heap) *refs;
5418   data_ref_loc *ref;
5419   hashval_t res = 0;
5420
5421   get_references_in_stmt (stmt, &refs);
5422
5423   FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5424     if (!ref->is_read)
5425       {
5426         res = htab_hash_pointer (ref_base_address (stmt, ref));
5427         break;
5428       }
5429
5430   VEC_free (data_ref_loc, heap, refs);
5431   return res;
5432 }
5433
5434 /* Try to remove duplicated write data references from STMTS.  */
5435
5436 void
5437 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5438 {
5439   unsigned i;
5440   gimple stmt;
5441   htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5442                              have_similar_memory_accesses_1, NULL);
5443
5444   for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5445     {
5446       void **slot;
5447
5448       slot = htab_find_slot (seen, stmt, INSERT);
5449
5450       if (*slot)
5451         VEC_ordered_remove (gimple, *stmts, i);
5452       else
5453         {
5454           *slot = (void *) stmt;
5455           i++;
5456         }
5457     }
5458
5459   htab_delete (seen);
5460 }
5461