gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98 #include "stringpool.h"
  99 #include "tree-vrp.h"
 100 #include "tree-ssanames.h"
 101 #include "tree-eh.h"
 102
 103 static struct datadep_stats
 104 {
 105   int num_dependence_tests;
 106   int num_dependence_dependent;
 107   int num_dependence_independent;
 108   int num_dependence_undetermined;
 109
 110   int num_subscript_tests;
 111   int num_subscript_undetermined;
 112   int num_same_subscript_function;
 113
 114   int num_ziv;
 115   int num_ziv_independent;
 116   int num_ziv_dependent;
 117   int num_ziv_unimplemented;
 118
 119   int num_siv;
 120   int num_siv_independent;
 121   int num_siv_dependent;
 122   int num_siv_unimplemented;
 123
 124   int num_miv;
 125   int num_miv_independent;
 126   int num_miv_dependent;
 127   int num_miv_unimplemented;
 128 } dependence_stats;
 129
 130 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 131                                            unsigned int, unsigned int,
 132                                            struct loop *);
 133 /* Returns true iff A divides B.  */
 134
 135 static inline bool
 136 tree_fold_divides_p (const_tree a, const_tree b)
 137 {
 138   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 139   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 140   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 141 }
 142
 143 /* Returns true iff A divides B.  */
 144
 145 static inline bool
 146 int_divides_p (int a, int b)
 147 {
 148   return ((b % a) == 0);
 149 }
 150
 151 /* Return true if reference REF contains a union access.  */
 152
 153 static bool
 154 ref_contains_union_access_p (tree ref)
 155 {
 156   while (handled_component_p (ref))
 157     {
 158       ref = TREE_OPERAND (ref, 0);
 159       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 160           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 161         return true;
 162     }
 163   return false;
 164 }
 165
 166 \f
 167
 168 /* Dump into FILE all the data references from DATAREFS.  */
 169
 170 static void
 171 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 172 {
 173   unsigned int i;
 174   struct data_reference *dr;
 175
 176   FOR_EACH_VEC_ELT (datarefs, i, dr)
 177     dump_data_reference (file, dr);
 178 }
 179
 180 /* Unified dump into FILE all the data references from DATAREFS.  */
 181
 182 DEBUG_FUNCTION void
 183 debug (vec<data_reference_p> &ref)
 184 {
 185   dump_data_references (stderr, ref);
 186 }
 187
 188 DEBUG_FUNCTION void
 189 debug (vec<data_reference_p> *ptr)
 190 {
 191   if (ptr)
 192     debug (*ptr);
 193   else
 194     fprintf (stderr, "<nil>\n");
 195 }
 196
 197
 198 /* Dump into STDERR all the data references from DATAREFS.  */
 199
 200 DEBUG_FUNCTION void
 201 debug_data_references (vec<data_reference_p> datarefs)
 202 {
 203   dump_data_references (stderr, datarefs);
 204 }
 205
 206 /* Print to STDERR the data_reference DR.  */
 207
 208 DEBUG_FUNCTION void
 209 debug_data_reference (struct data_reference *dr)
 210 {
 211   dump_data_reference (stderr, dr);
 212 }
 213
 214 /* Dump function for a DATA_REFERENCE structure.  */
 215
 216 void
 217 dump_data_reference (FILE *outf,
 218                      struct data_reference *dr)
 219 {
 220   unsigned int i;
 221
 222   fprintf (outf, "#(Data Ref: \n");
 223   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 224   fprintf (outf, "#  stmt: ");
 225   print_gimple_stmt (outf, DR_STMT (dr), 0);
 226   fprintf (outf, "#  ref: ");
 227   print_generic_stmt (outf, DR_REF (dr));
 228   fprintf (outf, "#  base_object: ");
 229   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 230
 231   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 232     {
 233       fprintf (outf, "#  Access function %d: ", i);
 234       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 235     }
 236   fprintf (outf, "#)\n");
 237 }
 238
 239 /* Unified dump function for a DATA_REFERENCE structure.  */
 240
 241 DEBUG_FUNCTION void
 242 debug (data_reference &ref)
 243 {
 244   dump_data_reference (stderr, &ref);
 245 }
 246
 247 DEBUG_FUNCTION void
 248 debug (data_reference *ptr)
 249 {
 250   if (ptr)
 251     debug (*ptr);
 252   else
 253     fprintf (stderr, "<nil>\n");
 254 }
 255
 256
 257 /* Dumps the affine function described by FN to the file OUTF.  */
 258
 259 DEBUG_FUNCTION void
 260 dump_affine_function (FILE *outf, affine_fn fn)
 261 {
 262   unsigned i;
 263   tree coef;
 264
 265   print_generic_expr (outf, fn[0], TDF_SLIM);
 266   for (i = 1; fn.iterate (i, &coef); i++)
 267     {
 268       fprintf (outf, " + ");
 269       print_generic_expr (outf, coef, TDF_SLIM);
 270       fprintf (outf, " * x_%u", i);
 271     }
 272 }
 273
 274 /* Dumps the conflict function CF to the file OUTF.  */
 275
 276 DEBUG_FUNCTION void
 277 dump_conflict_function (FILE *outf, conflict_function *cf)
 278 {
 279   unsigned i;
 280
 281   if (cf->n == NO_DEPENDENCE)
 282     fprintf (outf, "no dependence");
 283   else if (cf->n == NOT_KNOWN)
 284     fprintf (outf, "not known");
 285   else
 286     {
 287       for (i = 0; i < cf->n; i++)
 288         {
 289           if (i != 0)
 290             fprintf (outf, " ");
 291           fprintf (outf, "[");
 292           dump_affine_function (outf, cf->fns[i]);
 293           fprintf (outf, "]");
 294         }
 295     }
 296 }
 297
 298 /* Dump function for a SUBSCRIPT structure.  */
 299
 300 DEBUG_FUNCTION void
 301 dump_subscript (FILE *outf, struct subscript *subscript)
 302 {
 303   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 304
 305   fprintf (outf, "\n (subscript \n");
 306   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 307   dump_conflict_function (outf, cf);
 308   if (CF_NONTRIVIAL_P (cf))
 309     {
 310       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 311       fprintf (outf, "\n  last_conflict: ");
 312       print_generic_expr (outf, last_iteration);
 313     }
 314
 315   cf = SUB_CONFLICTS_IN_B (subscript);
 316   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 317   dump_conflict_function (outf, cf);
 318   if (CF_NONTRIVIAL_P (cf))
 319     {
 320       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 321       fprintf (outf, "\n  last_conflict: ");
 322       print_generic_expr (outf, last_iteration);
 323     }
 324
 325   fprintf (outf, "\n  (Subscript distance: ");
 326   print_generic_expr (outf, SUB_DISTANCE (subscript));
 327   fprintf (outf, " ))\n");
 328 }
 329
 330 /* Print the classic direction vector DIRV to OUTF.  */
 331
 332 DEBUG_FUNCTION void
 333 print_direction_vector (FILE *outf,
 334                         lambda_vector dirv,
 335                         int length)
 336 {
 337   int eq;
 338
 339   for (eq = 0; eq < length; eq++)
 340     {
 341       enum data_dependence_direction dir = ((enum data_dependence_direction)
 342                                             dirv[eq]);
 343
 344       switch (dir)
 345         {
 346         case dir_positive:
 347           fprintf (outf, "    +");
 348           break;
 349         case dir_negative:
 350           fprintf (outf, "    -");
 351           break;
 352         case dir_equal:
 353           fprintf (outf, "    =");
 354           break;
 355         case dir_positive_or_equal:
 356           fprintf (outf, "   +=");
 357           break;
 358         case dir_positive_or_negative:
 359           fprintf (outf, "   +-");
 360           break;
 361         case dir_negative_or_equal:
 362           fprintf (outf, "   -=");
 363           break;
 364         case dir_star:
 365           fprintf (outf, "    *");
 366           break;
 367         default:
 368           fprintf (outf, "indep");
 369           break;
 370         }
 371     }
 372   fprintf (outf, "\n");
 373 }
 374
 375 /* Print a vector of direction vectors.  */
 376
 377 DEBUG_FUNCTION void
 378 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 379                    int length)
 380 {
 381   unsigned j;
 382   lambda_vector v;
 383
 384   FOR_EACH_VEC_ELT (dir_vects, j, v)
 385     print_direction_vector (outf, v, length);
 386 }
 387
 388 /* Print out a vector VEC of length N to OUTFILE.  */
 389
 390 DEBUG_FUNCTION void
 391 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 392 {
 393   int i;
 394
 395   for (i = 0; i < n; i++)
 396     fprintf (outfile, "%3d ", vector[i]);
 397   fprintf (outfile, "\n");
 398 }
 399
 400 /* Print a vector of distance vectors.  */
 401
 402 DEBUG_FUNCTION void
 403 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 404                     int length)
 405 {
 406   unsigned j;
 407   lambda_vector v;
 408
 409   FOR_EACH_VEC_ELT (dist_vects, j, v)
 410     print_lambda_vector (outf, v, length);
 411 }
 412
 413 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 414
 415 DEBUG_FUNCTION void
 416 dump_data_dependence_relation (FILE *outf,
 417                                struct data_dependence_relation *ddr)
 418 {
 419   struct data_reference *dra, *drb;
 420
 421   fprintf (outf, "(Data Dep: \n");
 422
 423   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 424     {
 425       if (ddr)
 426         {
 427           dra = DDR_A (ddr);
 428           drb = DDR_B (ddr);
 429           if (dra)
 430             dump_data_reference (outf, dra);
 431           else
 432             fprintf (outf, "    (nil)\n");
 433           if (drb)
 434             dump_data_reference (outf, drb);
 435           else
 436             fprintf (outf, "    (nil)\n");
 437         }
 438       fprintf (outf, "    (don't know)\n)\n");
 439       return;
 440     }
 441
 442   dra = DDR_A (ddr);
 443   drb = DDR_B (ddr);
 444   dump_data_reference (outf, dra);
 445   dump_data_reference (outf, drb);
 446
 447   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 448     fprintf (outf, "    (no dependence)\n");
 449
 450   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 451     {
 452       unsigned int i;
 453       struct loop *loopi;
 454
 455       subscript *sub;
 456       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 457         {
 458           fprintf (outf, "  access_fn_A: ");
 459           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 460           fprintf (outf, "  access_fn_B: ");
 461           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 462           dump_subscript (outf, sub);
 463         }
 464
 465       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 466       fprintf (outf, "  loop nest: (");
 467       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 468         fprintf (outf, "%d ", loopi->num);
 469       fprintf (outf, ")\n");
 470
 471       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 472         {
 473           fprintf (outf, "  distance_vector: ");
 474           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 475                                DDR_NB_LOOPS (ddr));
 476         }
 477
 478       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 479         {
 480           fprintf (outf, "  direction_vector: ");
 481           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 482                                   DDR_NB_LOOPS (ddr));
 483         }
 484     }
 485
 486   fprintf (outf, ")\n");
 487 }
 488
 489 /* Debug version.  */
 490
 491 DEBUG_FUNCTION void
 492 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 493 {
 494   dump_data_dependence_relation (stderr, ddr);
 495 }
 496
 497 /* Dump into FILE all the dependence relations from DDRS.  */
 498
 499 DEBUG_FUNCTION void
 500 dump_data_dependence_relations (FILE *file,
 501                                 vec<ddr_p> ddrs)
 502 {
 503   unsigned int i;
 504   struct data_dependence_relation *ddr;
 505
 506   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 507     dump_data_dependence_relation (file, ddr);
 508 }
 509
 510 DEBUG_FUNCTION void
 511 debug (vec<ddr_p> &ref)
 512 {
 513   dump_data_dependence_relations (stderr, ref);
 514 }
 515
 516 DEBUG_FUNCTION void
 517 debug (vec<ddr_p> *ptr)
 518 {
 519   if (ptr)
 520     debug (*ptr);
 521   else
 522     fprintf (stderr, "<nil>\n");
 523 }
 524
 525
 526 /* Dump to STDERR all the dependence relations from DDRS.  */
 527
 528 DEBUG_FUNCTION void
 529 debug_data_dependence_relations (vec<ddr_p> ddrs)
 530 {
 531   dump_data_dependence_relations (stderr, ddrs);
 532 }
 533
 534 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 535    the dependence relations, and VECT_SIZE is the size of the
 536    dependence vectors, or in other words the number of loops in the
 537    considered nest.  */
 538
 539 DEBUG_FUNCTION void
 540 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 541 {
 542   unsigned int i, j;
 543   struct data_dependence_relation *ddr;
 544   lambda_vector v;
 545
 546   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 547     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 548       {
 549         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 550           {
 551             fprintf (file, "DISTANCE_V (");
 552             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 553             fprintf (file, ")\n");
 554           }
 555
 556         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 557           {
 558             fprintf (file, "DIRECTION_V (");
 559             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 560             fprintf (file, ")\n");
 561           }
 562       }
 563
 564   fprintf (file, "\n\n");
 565 }
 566
 567 /* Dumps the data dependence relations DDRS in FILE.  */
 568
 569 DEBUG_FUNCTION void
 570 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 571 {
 572   unsigned int i;
 573   struct data_dependence_relation *ddr;
 574
 575   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 576     dump_data_dependence_relation (file, ddr);
 577
 578   fprintf (file, "\n\n");
 579 }
 580
 581 DEBUG_FUNCTION void
 582 debug_ddrs (vec<ddr_p> ddrs)
 583 {
 584   dump_ddrs (stderr, ddrs);
 585 }
 586
 587 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 588    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 589    constant of type ssizetype, and returns true.  If we cannot do this
 590    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 591    is returned.  */
 592
 593 static bool
 594 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 595                          tree *var, tree *off)
 596 {
 597   tree var0, var1;
 598   tree off0, off1;
 599   enum tree_code ocode = code;
 600
 601   *var = NULL_TREE;
 602   *off = NULL_TREE;
 603
 604   switch (code)
 605     {
 606     case INTEGER_CST:
 607       *var = build_int_cst (type, 0);
 608       *off = fold_convert (ssizetype, op0);
 609       return true;
 610
 611     case POINTER_PLUS_EXPR:
 612       ocode = PLUS_EXPR;
 613       /* FALLTHROUGH */
 614     case PLUS_EXPR:
 615     case MINUS_EXPR:
 616       split_constant_offset (op0, &var0, &off0);
 617       split_constant_offset (op1, &var1, &off1);
 618       *var = fold_build2 (code, type, var0, var1);
 619       *off = size_binop (ocode, off0, off1);
 620       return true;
 621
 622     case MULT_EXPR:
 623       if (TREE_CODE (op1) != INTEGER_CST)
 624         return false;
 625
 626       split_constant_offset (op0, &var0, &off0);
 627       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 628       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 629       return true;
 630
 631     case ADDR_EXPR:
 632       {
 633         tree base, poffset;
 634         poly_int64 pbitsize, pbitpos, pbytepos;
 635         machine_mode pmode;
 636         int punsignedp, preversep, pvolatilep;
 637
 638         op0 = TREE_OPERAND (op0, 0);
 639         base
 640           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 641                                  &punsignedp, &preversep, &pvolatilep);
 642
 643         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 644           return false;
 645         base = build_fold_addr_expr (base);
 646         off0 = ssize_int (pbytepos);
 647
 648         if (poffset)
 649           {
 650             split_constant_offset (poffset, &poffset, &off1);
 651             off0 = size_binop (PLUS_EXPR, off0, off1);
 652             if (POINTER_TYPE_P (TREE_TYPE (base)))
 653               base = fold_build_pointer_plus (base, poffset);
 654             else
 655               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 656                                   fold_convert (TREE_TYPE (base), poffset));
 657           }
 658
 659         var0 = fold_convert (type, base);
 660
 661         /* If variable length types are involved, punt, otherwise casts
 662            might be converted into ARRAY_REFs in gimplify_conversion.
 663            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 664            possibly no longer appears in current GIMPLE, might resurface.
 665            This perhaps could run
 666            if (CONVERT_EXPR_P (var0))
 667              {
 668                gimplify_conversion (&var0);
 669                // Attempt to fill in any within var0 found ARRAY_REF's
 670                // element size from corresponding op embedded ARRAY_REF,
 671                // if unsuccessful, just punt.
 672              }  */
 673         while (POINTER_TYPE_P (type))
 674           type = TREE_TYPE (type);
 675         if (int_size_in_bytes (type) < 0)
 676           return false;
 677
 678         *var = var0;
 679         *off = off0;
 680         return true;
 681       }
 682
 683     case SSA_NAME:
 684       {
 685         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 686           return false;
 687
 688         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 689         enum tree_code subcode;
 690
 691         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 692           return false;
 693
 694         var0 = gimple_assign_rhs1 (def_stmt);
 695         subcode = gimple_assign_rhs_code (def_stmt);
 696         var1 = gimple_assign_rhs2 (def_stmt);
 697
 698         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 699       }
 700     CASE_CONVERT:
 701       {
 702         /* We must not introduce undefined overflow, and we must not change the value.
 703            Hence we're okay if the inner type doesn't overflow to start with
 704            (pointer or signed), the outer type also is an integer or pointer
 705            and the outer precision is at least as large as the inner.  */
 706         tree itype = TREE_TYPE (op0);
 707         if ((POINTER_TYPE_P (itype)
 708              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 709             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 710             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 711           {
 712             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 713               {
 714                 /* Split the unconverted operand and try to prove that
 715                    wrapping isn't a problem.  */
 716                 tree tmp_var, tmp_off;
 717                 split_constant_offset (op0, &tmp_var, &tmp_off);
 718
 719                 /* See whether we have an SSA_NAME whose range is known
 720                    to be [A, B].  */
 721                 if (TREE_CODE (tmp_var) != SSA_NAME)
 722                   return false;
 723                 wide_int var_min, var_max;
 724                 value_range_type vr_type = get_range_info (tmp_var, &var_min,
 725                                                            &var_max);
 726                 wide_int var_nonzero = get_nonzero_bits (tmp_var);
 727                 signop sgn = TYPE_SIGN (itype);
 728                 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
 729                                                        &var_max, var_nonzero,
 730                                                        sgn) != VR_RANGE)
 731                   return false;
 732
 733                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 734                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 735                    operations done in ITYPE.  The addition must overflow
 736                    at both ends of the range or at neither.  */
 737                 wi::overflow_type overflow[2];
 738                 unsigned int prec = TYPE_PRECISION (itype);
 739                 wide_int woff = wi::to_wide (tmp_off, prec);
 740                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 741                 wi::add (var_max, woff, sgn, &overflow[1]);
 742                 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
 743                   return false;
 744
 745                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 746                 widest_int diff = (widest_int::from (op0_min, sgn)
 747                                    - widest_int::from (var_min, sgn));
 748                 var0 = tmp_var;
 749                 *off = wide_int_to_tree (ssizetype, diff);
 750               }
 751             else
 752               split_constant_offset (op0, &var0, off);
 753             *var = fold_convert (type, var0);
 754             return true;
 755           }
 756         return false;
 757       }
 758
 759     default:
 760       return false;
 761     }
 762 }
 763
 764 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 765    will be ssizetype.  */
 766
 767 void
 768 split_constant_offset (tree exp, tree *var, tree *off)
 769 {
 770   tree type = TREE_TYPE (exp), op0, op1, e, o;
 771   enum tree_code code;
 772
 773   *var = exp;
 774   *off = ssize_int (0);
 775
 776   if (tree_is_chrec (exp)
 777       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 778     return;
 779
 780   code = TREE_CODE (exp);
 781   extract_ops_from_tree (exp, &code, &op0, &op1);
 782   if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
 783     {
 784       *var = e;
 785       *off = o;
 786     }
 787 }
 788
 789 /* Returns the address ADDR of an object in a canonical shape (without nop
 790    casts, and with type of pointer to the object).  */
 791
 792 static tree
 793 canonicalize_base_object_address (tree addr)
 794 {
 795   tree orig = addr;
 796
 797   STRIP_NOPS (addr);
 798
 799   /* The base address may be obtained by casting from integer, in that case
 800      keep the cast.  */
 801   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 802     return orig;
 803
 804   if (TREE_CODE (addr) != ADDR_EXPR)
 805     return addr;
 806
 807   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 808 }
 809
 810 /* Analyze the behavior of memory reference REF.  There are two modes:
 811
 812    - BB analysis.  In this case we simply split the address into base,
 813      init and offset components, without reference to any containing loop.
 814      The resulting base and offset are general expressions and they can
 815      vary arbitrarily from one iteration of the containing loop to the next.
 816      The step is always zero.
 817
 818    - loop analysis.  In this case we analyze the reference both wrt LOOP
 819      and on the basis that the reference occurs (is "used") in LOOP;
 820      see the comment above analyze_scalar_evolution_in_loop for more
 821      information about this distinction.  The base, init, offset and
 822      step fields are all invariant in LOOP.
 823
 824    Perform BB analysis if LOOP is null, or if LOOP is the function's
 825    dummy outermost loop.  In other cases perform loop analysis.
 826
 827    Return true if the analysis succeeded and store the results in DRB if so.
 828    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 829
 830 bool
 831 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 832                       struct loop *loop)
 833 {
 834   poly_int64 pbitsize, pbitpos;
 835   tree base, poffset;
 836   machine_mode pmode;
 837   int punsignedp, preversep, pvolatilep;
 838   affine_iv base_iv, offset_iv;
 839   tree init, dinit, step;
 840   bool in_loop = (loop && loop->num);
 841
 842   if (dump_file && (dump_flags & TDF_DETAILS))
 843     fprintf (dump_file, "analyze_innermost: ");
 844
 845   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 846                               &punsignedp, &preversep, &pvolatilep);
 847   gcc_assert (base != NULL_TREE);
 848
 849   poly_int64 pbytepos;
 850   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 851     {
 852       if (dump_file && (dump_flags & TDF_DETAILS))
 853         fprintf (dump_file, "failed: bit offset alignment.\n");
 854       return false;
 855     }
 856
 857   if (preversep)
 858     {
 859       if (dump_file && (dump_flags & TDF_DETAILS))
 860         fprintf (dump_file, "failed: reverse storage order.\n");
 861       return false;
 862     }
 863
 864   /* Calculate the alignment and misalignment for the inner reference.  */
 865   unsigned int HOST_WIDE_INT bit_base_misalignment;
 866   unsigned int bit_base_alignment;
 867   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 868
 869   /* There are no bitfield references remaining in BASE, so the values
 870      we got back must be whole bytes.  */
 871   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 872               && bit_base_misalignment % BITS_PER_UNIT == 0);
 873   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 874   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 875
 876   if (TREE_CODE (base) == MEM_REF)
 877     {
 878       if (!integer_zerop (TREE_OPERAND (base, 1)))
 879         {
 880           /* Subtract MOFF from the base and add it to POFFSET instead.
 881              Adjust the misalignment to reflect the amount we subtracted.  */
 882           poly_offset_int moff = mem_ref_offset (base);
 883           base_misalignment -= moff.force_shwi ();
 884           tree mofft = wide_int_to_tree (sizetype, moff);
 885           if (!poffset)
 886             poffset = mofft;
 887           else
 888             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 889         }
 890       base = TREE_OPERAND (base, 0);
 891     }
 892   else
 893     base = build_fold_addr_expr (base);
 894
 895   if (in_loop)
 896     {
 897       if (!simple_iv (loop, loop, base, &base_iv, true))
 898         {
 899           if (dump_file && (dump_flags & TDF_DETAILS))
 900             fprintf (dump_file, "failed: evolution of base is not affine.\n");
 901           return false;
 902         }
 903     }
 904   else
 905     {
 906       base_iv.base = base;
 907       base_iv.step = ssize_int (0);
 908       base_iv.no_overflow = true;
 909     }
 910
 911   if (!poffset)
 912     {
 913       offset_iv.base = ssize_int (0);
 914       offset_iv.step = ssize_int (0);
 915     }
 916   else
 917     {
 918       if (!in_loop)
 919         {
 920           offset_iv.base = poffset;
 921           offset_iv.step = ssize_int (0);
 922         }
 923       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 924         {
 925           if (dump_file && (dump_flags & TDF_DETAILS))
 926             fprintf (dump_file, "failed: evolution of offset is not affine.\n");
 927           return false;
 928         }
 929     }
 930
 931   init = ssize_int (pbytepos);
 932
 933   /* Subtract any constant component from the base and add it to INIT instead.
 934      Adjust the misalignment to reflect the amount we subtracted.  */
 935   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 936   init = size_binop (PLUS_EXPR, init, dinit);
 937   base_misalignment -= TREE_INT_CST_LOW (dinit);
 938
 939   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 940   init = size_binop (PLUS_EXPR, init, dinit);
 941
 942   step = size_binop (PLUS_EXPR,
 943                      fold_convert (ssizetype, base_iv.step),
 944                      fold_convert (ssizetype, offset_iv.step));
 945
 946   base = canonicalize_base_object_address (base_iv.base);
 947
 948   /* See if get_pointer_alignment can guarantee a higher alignment than
 949      the one we calculated above.  */
 950   unsigned int HOST_WIDE_INT alt_misalignment;
 951   unsigned int alt_alignment;
 952   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 953
 954   /* As above, these values must be whole bytes.  */
 955   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
 956               && alt_misalignment % BITS_PER_UNIT == 0);
 957   alt_alignment /= BITS_PER_UNIT;
 958   alt_misalignment /= BITS_PER_UNIT;
 959
 960   if (base_alignment < alt_alignment)
 961     {
 962       base_alignment = alt_alignment;
 963       base_misalignment = alt_misalignment;
 964     }
 965
 966   drb->base_address = base;
 967   drb->offset = fold_convert (ssizetype, offset_iv.base);
 968   drb->init = init;
 969   drb->step = step;
 970   if (known_misalignment (base_misalignment, base_alignment,
 971                           &drb->base_misalignment))
 972     drb->base_alignment = base_alignment;
 973   else
 974     {
 975       drb->base_alignment = known_alignment (base_misalignment);
 976       drb->base_misalignment = 0;
 977     }
 978   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
 979   drb->step_alignment = highest_pow2_factor (step);
 980
 981   if (dump_file && (dump_flags & TDF_DETAILS))
 982     fprintf (dump_file, "success.\n");
 983
 984   return true;
 985 }
 986
 987 /* Return true if OP is a valid component reference for a DR access
 988    function.  This accepts a subset of what handled_component_p accepts.  */
 989
 990 static bool
 991 access_fn_component_p (tree op)
 992 {
 993   switch (TREE_CODE (op))
 994     {
 995     case REALPART_EXPR:
 996     case IMAGPART_EXPR:
 997     case ARRAY_REF:
 998       return true;
 999
1000     case COMPONENT_REF:
1001       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1002
1003     default:
1004       return false;
1005     }
1006 }
1007
1008 /* Determines the base object and the list of indices of memory reference
1009    DR, analyzed in LOOP and instantiated before NEST.  */
1010
1011 static void
1012 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1013 {
1014   vec<tree> access_fns = vNULL;
1015   tree ref, op;
1016   tree base, off, access_fn;
1017
1018   /* If analyzing a basic-block there are no indices to analyze
1019      and thus no access functions.  */
1020   if (!nest)
1021     {
1022       DR_BASE_OBJECT (dr) = DR_REF (dr);
1023       DR_ACCESS_FNS (dr).create (0);
1024       return;
1025     }
1026
1027   ref = DR_REF (dr);
1028
1029   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1030      into a two element array with a constant index.  The base is
1031      then just the immediate underlying object.  */
1032   if (TREE_CODE (ref) == REALPART_EXPR)
1033     {
1034       ref = TREE_OPERAND (ref, 0);
1035       access_fns.safe_push (integer_zero_node);
1036     }
1037   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1038     {
1039       ref = TREE_OPERAND (ref, 0);
1040       access_fns.safe_push (integer_one_node);
1041     }
1042
1043   /* Analyze access functions of dimensions we know to be independent.
1044      The list of component references handled here should be kept in
1045      sync with access_fn_component_p.  */
1046   while (handled_component_p (ref))
1047     {
1048       if (TREE_CODE (ref) == ARRAY_REF)
1049         {
1050           op = TREE_OPERAND (ref, 1);
1051           access_fn = analyze_scalar_evolution (loop, op);
1052           access_fn = instantiate_scev (nest, loop, access_fn);
1053           access_fns.safe_push (access_fn);
1054         }
1055       else if (TREE_CODE (ref) == COMPONENT_REF
1056                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1057         {
1058           /* For COMPONENT_REFs of records (but not unions!) use the
1059              FIELD_DECL offset as constant access function so we can
1060              disambiguate a[i].f1 and a[i].f2.  */
1061           tree off = component_ref_field_offset (ref);
1062           off = size_binop (PLUS_EXPR,
1063                             size_binop (MULT_EXPR,
1064                                         fold_convert (bitsizetype, off),
1065                                         bitsize_int (BITS_PER_UNIT)),
1066                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1067           access_fns.safe_push (off);
1068         }
1069       else
1070         /* If we have an unhandled component we could not translate
1071            to an access function stop analyzing.  We have determined
1072            our base object in this case.  */
1073         break;
1074
1075       ref = TREE_OPERAND (ref, 0);
1076     }
1077
1078   /* If the address operand of a MEM_REF base has an evolution in the
1079      analyzed nest, add it as an additional independent access-function.  */
1080   if (TREE_CODE (ref) == MEM_REF)
1081     {
1082       op = TREE_OPERAND (ref, 0);
1083       access_fn = analyze_scalar_evolution (loop, op);
1084       access_fn = instantiate_scev (nest, loop, access_fn);
1085       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1086         {
1087           tree orig_type;
1088           tree memoff = TREE_OPERAND (ref, 1);
1089           base = initial_condition (access_fn);
1090           orig_type = TREE_TYPE (base);
1091           STRIP_USELESS_TYPE_CONVERSION (base);
1092           split_constant_offset (base, &base, &off);
1093           STRIP_USELESS_TYPE_CONVERSION (base);
1094           /* Fold the MEM_REF offset into the evolutions initial
1095              value to make more bases comparable.  */
1096           if (!integer_zerop (memoff))
1097             {
1098               off = size_binop (PLUS_EXPR, off,
1099                                 fold_convert (ssizetype, memoff));
1100               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1101             }
1102           /* Adjust the offset so it is a multiple of the access type
1103              size and thus we separate bases that can possibly be used
1104              to produce partial overlaps (which the access_fn machinery
1105              cannot handle).  */
1106           wide_int rem;
1107           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1108               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1109               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1110             rem = wi::mod_trunc
1111               (wi::to_wide (off),
1112                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1113                SIGNED);
1114           else
1115             /* If we can't compute the remainder simply force the initial
1116                condition to zero.  */
1117             rem = wi::to_wide (off);
1118           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1119           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1120           /* And finally replace the initial condition.  */
1121           access_fn = chrec_replace_initial_condition
1122               (access_fn, fold_convert (orig_type, off));
1123           /* ???  This is still not a suitable base object for
1124              dr_may_alias_p - the base object needs to be an
1125              access that covers the object as whole.  With
1126              an evolution in the pointer this cannot be
1127              guaranteed.
1128              As a band-aid, mark the access so we can special-case
1129              it in dr_may_alias_p.  */
1130           tree old = ref;
1131           ref = fold_build2_loc (EXPR_LOCATION (ref),
1132                                  MEM_REF, TREE_TYPE (ref),
1133                                  base, memoff);
1134           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1135           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1136           DR_UNCONSTRAINED_BASE (dr) = true;
1137           access_fns.safe_push (access_fn);
1138         }
1139     }
1140   else if (DECL_P (ref))
1141     {
1142       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1143       ref = build2 (MEM_REF, TREE_TYPE (ref),
1144                     build_fold_addr_expr (ref),
1145                     build_int_cst (reference_alias_ptr_type (ref), 0));
1146     }
1147
1148   DR_BASE_OBJECT (dr) = ref;
1149   DR_ACCESS_FNS (dr) = access_fns;
1150 }
1151
1152 /* Extracts the alias analysis information from the memory reference DR.  */
1153
1154 static void
1155 dr_analyze_alias (struct data_reference *dr)
1156 {
1157   tree ref = DR_REF (dr);
1158   tree base = get_base_address (ref), addr;
1159
1160   if (INDIRECT_REF_P (base)
1161       || TREE_CODE (base) == MEM_REF)
1162     {
1163       addr = TREE_OPERAND (base, 0);
1164       if (TREE_CODE (addr) == SSA_NAME)
1165         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1166     }
1167 }
1168
1169 /* Frees data reference DR.  */
1170
1171 void
1172 free_data_ref (data_reference_p dr)
1173 {
1174   DR_ACCESS_FNS (dr).release ();
1175   free (dr);
1176 }
1177
1178 /* Analyze memory reference MEMREF, which is accessed in STMT.
1179    The reference is a read if IS_READ is true, otherwise it is a write.
1180    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1181    within STMT, i.e. that it might not occur even if STMT is executed
1182    and runs to completion.
1183
1184    Return the data_reference description of MEMREF.  NEST is the outermost
1185    loop in which the reference should be instantiated, LOOP is the loop
1186    in which the data reference should be analyzed.  */
1187
1188 struct data_reference *
1189 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1190                  bool is_read, bool is_conditional_in_stmt)
1191 {
1192   struct data_reference *dr;
1193
1194   if (dump_file && (dump_flags & TDF_DETAILS))
1195     {
1196       fprintf (dump_file, "Creating dr for ");
1197       print_generic_expr (dump_file, memref, TDF_SLIM);
1198       fprintf (dump_file, "\n");
1199     }
1200
1201   dr = XCNEW (struct data_reference);
1202   DR_STMT (dr) = stmt;
1203   DR_REF (dr) = memref;
1204   DR_IS_READ (dr) = is_read;
1205   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1206
1207   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1208                         nest != NULL ? loop : NULL);
1209   dr_analyze_indices (dr, nest, loop);
1210   dr_analyze_alias (dr);
1211
1212   if (dump_file && (dump_flags & TDF_DETAILS))
1213     {
1214       unsigned i;
1215       fprintf (dump_file, "\tbase_address: ");
1216       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1217       fprintf (dump_file, "\n\toffset from base address: ");
1218       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1219       fprintf (dump_file, "\n\tconstant offset from base address: ");
1220       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1221       fprintf (dump_file, "\n\tstep: ");
1222       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1223       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1224       fprintf (dump_file, "\n\tbase misalignment: %d",
1225                DR_BASE_MISALIGNMENT (dr));
1226       fprintf (dump_file, "\n\toffset alignment: %d",
1227                DR_OFFSET_ALIGNMENT (dr));
1228       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1229       fprintf (dump_file, "\n\tbase_object: ");
1230       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1231       fprintf (dump_file, "\n");
1232       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1233         {
1234           fprintf (dump_file, "\tAccess function %d: ", i);
1235           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1236         }
1237     }
1238
1239   return dr;
1240 }
1241
1242 /*  A helper function computes order between two tree epxressions T1 and T2.
1243     This is used in comparator functions sorting objects based on the order
1244     of tree expressions.  The function returns -1, 0, or 1.  */
1245
1246 int
1247 data_ref_compare_tree (tree t1, tree t2)
1248 {
1249   int i, cmp;
1250   enum tree_code code;
1251   char tclass;
1252
1253   if (t1 == t2)
1254     return 0;
1255   if (t1 == NULL)
1256     return -1;
1257   if (t2 == NULL)
1258     return 1;
1259
1260   STRIP_USELESS_TYPE_CONVERSION (t1);
1261   STRIP_USELESS_TYPE_CONVERSION (t2);
1262   if (t1 == t2)
1263     return 0;
1264
1265   if (TREE_CODE (t1) != TREE_CODE (t2)
1266       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1267     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1268
1269   code = TREE_CODE (t1);
1270   switch (code)
1271     {
1272     case INTEGER_CST:
1273       return tree_int_cst_compare (t1, t2);
1274
1275     case STRING_CST:
1276       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1277         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1278       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1279                      TREE_STRING_LENGTH (t1));
1280
1281     case SSA_NAME:
1282       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1283         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1284       break;
1285
1286     default:
1287       if (POLY_INT_CST_P (t1))
1288         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1289                                        wi::to_poly_widest (t2));
1290
1291       tclass = TREE_CODE_CLASS (code);
1292
1293       /* For decls, compare their UIDs.  */
1294       if (tclass == tcc_declaration)
1295         {
1296           if (DECL_UID (t1) != DECL_UID (t2))
1297             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1298           break;
1299         }
1300       /* For expressions, compare their operands recursively.  */
1301       else if (IS_EXPR_CODE_CLASS (tclass))
1302         {
1303           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1304             {
1305               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1306                                            TREE_OPERAND (t2, i));
1307               if (cmp != 0)
1308                 return cmp;
1309             }
1310         }
1311       else
1312         gcc_unreachable ();
1313     }
1314
1315   return 0;
1316 }
1317
1318 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1319    check.  */
1320
1321 bool
1322 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1323 {
1324   if (dump_enabled_p ())
1325     dump_printf (MSG_NOTE,
1326                  "consider run-time aliasing test between %T and %T\n",
1327                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1328
1329   if (!speed_p)
1330     {
1331       if (dump_enabled_p ())
1332         dump_printf (MSG_MISSED_OPTIMIZATION,
1333                      "runtime alias check not supported when optimizing "
1334                      "for size.\n");
1335       return false;
1336     }
1337
1338   /* FORNOW: We don't support versioning with outer-loop in either
1339      vectorization or loop distribution.  */
1340   if (loop != NULL && loop->inner != NULL)
1341     {
1342       if (dump_enabled_p ())
1343         dump_printf (MSG_MISSED_OPTIMIZATION,
1344                      "runtime alias check not supported for outer loop.\n");
1345       return false;
1346     }
1347
1348   return true;
1349 }
1350
1351 /* Operator == between two dr_with_seg_len objects.
1352
1353    This equality operator is used to make sure two data refs
1354    are the same one so that we will consider to combine the
1355    aliasing checks of those two pairs of data dependent data
1356    refs.  */
1357
1358 static bool
1359 operator == (const dr_with_seg_len& d1,
1360              const dr_with_seg_len& d2)
1361 {
1362   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1363                            DR_BASE_ADDRESS (d2.dr), 0)
1364           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1365           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1366           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1367           && known_eq (d1.access_size, d2.access_size)
1368           && d1.align == d2.align);
1369 }
1370
1371 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1372    so that we can combine aliasing checks in one scan.  */
1373
1374 static int
1375 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1376 {
1377   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1378   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1379   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1380   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1381
1382   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1383      if a and c have the same basic address snd step, and b and d have the same
1384      address and step.  Therefore, if any a&c or b&d don't have the same address
1385      and step, we don't care the order of those two pairs after sorting.  */
1386   int comp_res;
1387
1388   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1389                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1390     return comp_res;
1391   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1392                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1393     return comp_res;
1394   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1395                                          DR_STEP (b1.dr))) != 0)
1396     return comp_res;
1397   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1398                                          DR_STEP (b2.dr))) != 0)
1399     return comp_res;
1400   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1401                                          DR_OFFSET (b1.dr))) != 0)
1402     return comp_res;
1403   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1404                                          DR_INIT (b1.dr))) != 0)
1405     return comp_res;
1406   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1407                                          DR_OFFSET (b2.dr))) != 0)
1408     return comp_res;
1409   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1410                                          DR_INIT (b2.dr))) != 0)
1411     return comp_res;
1412
1413   return 0;
1414 }
1415
1416 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1417    FACTOR is number of iterations that each data reference is accessed.
1418
1419    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1420    we create an expression:
1421
1422    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1423    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1424
1425    for aliasing checks.  However, in some cases we can decrease the number
1426    of checks by combining two checks into one.  For example, suppose we have
1427    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1428    condition is satisfied:
1429
1430    load_ptr_0 < load_ptr_1  &&
1431    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1432
1433    (this condition means, in each iteration of vectorized loop, the accessed
1434    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1435    load_ptr_1.)
1436
1437    we then can use only the following expression to finish the alising checks
1438    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1439
1440    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1441    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1442
1443    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1444    basic address.  */
1445
1446 void
1447 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1448                                poly_uint64)
1449 {
1450   /* Sort the collected data ref pairs so that we can scan them once to
1451      combine all possible aliasing checks.  */
1452   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1453
1454   /* Scan the sorted dr pairs and check if we can combine alias checks
1455      of two neighboring dr pairs.  */
1456   for (size_t i = 1; i < alias_pairs->length (); ++i)
1457     {
1458       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1459       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1460                       *dr_b1 = &(*alias_pairs)[i-1].second,
1461                       *dr_a2 = &(*alias_pairs)[i].first,
1462                       *dr_b2 = &(*alias_pairs)[i].second;
1463
1464       /* Remove duplicate data ref pairs.  */
1465       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1466         {
1467           if (dump_enabled_p ())
1468             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1469                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1470                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1471           alias_pairs->ordered_remove (i--);
1472           continue;
1473         }
1474
1475       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1476         {
1477           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1478              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1479           if (*dr_a1 == *dr_a2)
1480             {
1481               std::swap (dr_a1, dr_b1);
1482               std::swap (dr_a2, dr_b2);
1483             }
1484
1485           poly_int64 init_a1, init_a2;
1486           /* Only consider cases in which the distance between the initial
1487              DR_A1 and the initial DR_A2 is known at compile time.  */
1488           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1489                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1490               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1491                                    DR_OFFSET (dr_a2->dr), 0)
1492               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1493               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1494             continue;
1495
1496           /* Don't combine if we can't tell which one comes first.  */
1497           if (!ordered_p (init_a1, init_a2))
1498             continue;
1499
1500           /* Make sure dr_a1 starts left of dr_a2.  */
1501           if (maybe_gt (init_a1, init_a2))
1502             {
1503               std::swap (*dr_a1, *dr_a2);
1504               std::swap (init_a1, init_a2);
1505             }
1506
1507           /* Work out what the segment length would be if we did combine
1508              DR_A1 and DR_A2:
1509
1510              - If DR_A1 and DR_A2 have equal lengths, that length is
1511                also the combined length.
1512
1513              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1514                length is the lower bound on those lengths.
1515
1516              - If DR_A1 and DR_A2 both have positive lengths, the combined
1517                length is the upper bound on those lengths.
1518
1519              Other cases are unlikely to give a useful combination.
1520
1521              The lengths both have sizetype, so the sign is taken from
1522              the step instead.  */
1523           if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1524             {
1525               poly_uint64 seg_len_a1, seg_len_a2;
1526               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1527                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1528                 continue;
1529
1530               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1531               if (TREE_CODE (indicator_a) != INTEGER_CST)
1532                 continue;
1533
1534               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1535               if (TREE_CODE (indicator_b) != INTEGER_CST)
1536                 continue;
1537
1538               int sign_a = tree_int_cst_sgn (indicator_a);
1539               int sign_b = tree_int_cst_sgn (indicator_b);
1540
1541               poly_uint64 new_seg_len;
1542               if (sign_a <= 0 && sign_b <= 0)
1543                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1544               else if (sign_a >= 0 && sign_b >= 0)
1545                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1546               else
1547                 continue;
1548
1549               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1550                                               new_seg_len);
1551               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1552             }
1553
1554           /* This is always positive due to the swap above.  */
1555           poly_uint64 diff = init_a2 - init_a1;
1556
1557           /* The new check will start at DR_A1.  Make sure that its access
1558              size encompasses the initial DR_A2.  */
1559           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1560             {
1561               dr_a1->access_size = upper_bound (dr_a1->access_size,
1562                                                 diff + dr_a2->access_size);
1563               unsigned int new_align = known_alignment (dr_a1->access_size);
1564               dr_a1->align = MIN (dr_a1->align, new_align);
1565             }
1566           if (dump_enabled_p ())
1567             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1568                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1569                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1570           alias_pairs->ordered_remove (i);
1571           i--;
1572         }
1573     }
1574 }
1575
1576 /* Given LOOP's two data references and segment lengths described by DR_A
1577    and DR_B, create expression checking if the two addresses ranges intersect
1578    with each other based on index of the two addresses.  This can only be
1579    done if DR_A and DR_B referring to the same (array) object and the index
1580    is the only difference.  For example:
1581
1582                        DR_A                           DR_B
1583       data-ref         arr[i]                         arr[j]
1584       base_object      arr                            arr
1585       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1586
1587    The addresses and their index are like:
1588
1589         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1590      ------------------------------------------------------->
1591         |   |   |   |   |          |   |   |   |   |
1592      ------------------------------------------------------->
1593         i_0 ...         i_0+4      j_0 ...         j_0+4
1594
1595    We can create expression based on index rather than address:
1596
1597      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1598
1599    Note evolution step of index needs to be considered in comparison.  */
1600
1601 static bool
1602 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1603                                      const dr_with_seg_len& dr_a,
1604                                      const dr_with_seg_len& dr_b)
1605 {
1606   if (integer_zerop (DR_STEP (dr_a.dr))
1607       || integer_zerop (DR_STEP (dr_b.dr))
1608       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1609     return false;
1610
1611   poly_uint64 seg_len1, seg_len2;
1612   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1613       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1614     return false;
1615
1616   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1617     return false;
1618
1619   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1620     return false;
1621
1622   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1623     return false;
1624
1625   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1626
1627   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1628   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1629   if (neg_step)
1630     {
1631       abs_step = -abs_step;
1632       seg_len1 = -seg_len1;
1633       seg_len2 = -seg_len2;
1634     }
1635   else
1636     {
1637       /* Include the access size in the length, so that we only have one
1638          tree addition below.  */
1639       seg_len1 += dr_a.access_size;
1640       seg_len2 += dr_b.access_size;
1641     }
1642
1643   /* Infer the number of iterations with which the memory segment is accessed
1644      by DR.  In other words, alias is checked if memory segment accessed by
1645      DR_A in some iterations intersect with memory segment accessed by DR_B
1646      in the same amount iterations.
1647      Note segnment length is a linear function of number of iterations with
1648      DR_STEP as the coefficient.  */
1649   poly_uint64 niter_len1, niter_len2;
1650   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1651       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1652     return false;
1653
1654   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1655   if (neg_step)
1656     {
1657       /* Divide each access size by the byte step, rounding up.  */
1658       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1659                             abs_step, &niter_access1)
1660           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1661                                abs_step, &niter_access2))
1662         return false;
1663     }
1664
1665   unsigned int i;
1666   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1667     {
1668       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1669       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1670       /* Two indices must be the same if they are not scev, or not scev wrto
1671          current loop being vecorized.  */
1672       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1673           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1674           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1675           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1676         {
1677           if (operand_equal_p (access1, access2, 0))
1678             continue;
1679
1680           return false;
1681         }
1682       /* The two indices must have the same step.  */
1683       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1684         return false;
1685
1686       tree idx_step = CHREC_RIGHT (access1);
1687       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1688       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1689       /* Index must evaluate in the same direction as DR.  */
1690       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1691
1692       tree min1 = CHREC_LEFT (access1);
1693       tree min2 = CHREC_LEFT (access2);
1694       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1695         return false;
1696
1697       /* Ideally, alias can be checked against loop's control IV, but we
1698          need to prove linear mapping between control IV and reference
1699          index.  Although that should be true, we check against (array)
1700          index of data reference.  Like segment length, index length is
1701          linear function of the number of iterations with index_step as
1702          the coefficient, i.e, niter_len * idx_step.  */
1703       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1704                                    build_int_cst (TREE_TYPE (min1),
1705                                                   niter_len1));
1706       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1707                                    build_int_cst (TREE_TYPE (min2),
1708                                                   niter_len2));
1709       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1710       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1711       /* Adjust ranges for negative step.  */
1712       if (neg_step)
1713         {
1714           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1715           std::swap (min1, max1);
1716           std::swap (min2, max2);
1717
1718           /* As with the lengths just calculated, we've measured the access
1719              sizes in iterations, so multiply them by the index step.  */
1720           tree idx_access1
1721             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1722                            build_int_cst (TREE_TYPE (min1), niter_access1));
1723           tree idx_access2
1724             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1725                            build_int_cst (TREE_TYPE (min2), niter_access2));
1726
1727           /* MINUS_EXPR because the above values are negative.  */
1728           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1729           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1730         }
1731       tree part_cond_expr
1732         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1733             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1734             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1735       if (*cond_expr)
1736         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1737                                   *cond_expr, part_cond_expr);
1738       else
1739         *cond_expr = part_cond_expr;
1740     }
1741   return true;
1742 }
1743
1744 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1745    every address ADDR accessed by D:
1746
1747      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1748
1749    In this case, every element accessed by D is aligned to at least
1750    ALIGN bytes.
1751
1752    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1753
1754      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1755
1756 static void
1757 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1758                      tree *seg_max_out, HOST_WIDE_INT align)
1759 {
1760   /* Each access has the following pattern:
1761
1762           <- |seg_len| ->
1763           <--- A: -ve step --->
1764           +-----+-------+-----+-------+-----+
1765           | n-1 | ,.... |  0  | ..... | n-1 |
1766           +-----+-------+-----+-------+-----+
1767                         <--- B: +ve step --->
1768                         <- |seg_len| ->
1769                         |
1770                    base address
1771
1772      where "n" is the number of scalar iterations covered by the segment.
1773      (This should be VF for a particular pair if we know that both steps
1774      are the same, otherwise it will be the full number of scalar loop
1775      iterations.)
1776
1777      A is the range of bytes accessed when the step is negative,
1778      B is the range when the step is positive.
1779
1780      If the access size is "access_size" bytes, the lowest addressed byte is:
1781
1782          base + (step < 0 ? seg_len : 0)   [LB]
1783
1784      and the highest addressed byte is always below:
1785
1786          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1787
1788      Thus:
1789
1790          LB <= ADDR < UB
1791
1792      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1793      bytes, so:
1794
1795          LB <= ADDR <= UB - ALIGN
1796
1797      where "- ALIGN" folds naturally with the "+ access_size" and often
1798      cancels it out.
1799
1800      We don't try to simplify LB and UB beyond this (e.g. by using
1801      MIN and MAX based on whether seg_len rather than the stride is
1802      negative) because it is possible for the absolute size of the
1803      segment to overflow the range of a ssize_t.
1804
1805      Keeping the pointer_plus outside of the cond_expr should allow
1806      the cond_exprs to be shared with other alias checks.  */
1807   tree indicator = dr_direction_indicator (d.dr);
1808   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1809                                fold_convert (ssizetype, indicator),
1810                                ssize_int (0));
1811   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1812                                             DR_OFFSET (d.dr));
1813   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1814   tree seg_len
1815     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1816
1817   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1818                                 seg_len, size_zero_node);
1819   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1820                                 size_zero_node, seg_len);
1821   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1822                            size_int (d.access_size - align));
1823
1824   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1825   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1826 }
1827
1828 /* Given two data references and segment lengths described by DR_A and DR_B,
1829    create expression checking if the two addresses ranges intersect with
1830    each other:
1831
1832      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1833      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1834
1835 static void
1836 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1837                                const dr_with_seg_len& dr_a,
1838                                const dr_with_seg_len& dr_b)
1839 {
1840   *cond_expr = NULL_TREE;
1841   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1842     return;
1843
1844   unsigned HOST_WIDE_INT min_align;
1845   tree_code cmp_code;
1846   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1847       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1848     {
1849       /* In this case adding access_size to seg_len is likely to give
1850          a simple X * step, where X is either the number of scalar
1851          iterations or the vectorization factor.  We're better off
1852          keeping that, rather than subtracting an alignment from it.
1853
1854          In this case the maximum values are exclusive and so there is
1855          no alias if the maximum of one segment equals the minimum
1856          of another.  */
1857       min_align = 0;
1858       cmp_code = LE_EXPR;
1859     }
1860   else
1861     {
1862       /* Calculate the minimum alignment shared by all four pointers,
1863          then arrange for this alignment to be subtracted from the
1864          exclusive maximum values to get inclusive maximum values.
1865          This "- min_align" is cumulative with a "+ access_size"
1866          in the calculation of the maximum values.  In the best
1867          (and common) case, the two cancel each other out, leaving
1868          us with an inclusive bound based only on seg_len.  In the
1869          worst case we're simply adding a smaller number than before.
1870
1871          Because the maximum values are inclusive, there is an alias
1872          if the maximum value of one segment is equal to the minimum
1873          value of the other.  */
1874       min_align = MIN (dr_a.align, dr_b.align);
1875       cmp_code = LT_EXPR;
1876     }
1877
1878   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1879   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1880   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1881
1882   *cond_expr
1883     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1884         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1885         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1886 }
1887
1888 /* Create a conditional expression that represents the run-time checks for
1889    overlapping of address ranges represented by a list of data references
1890    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1891    COND_EXPR is the conditional expression to be used in the if statement
1892    that controls which version of the loop gets executed at runtime.  */
1893
1894 void
1895 create_runtime_alias_checks (struct loop *loop,
1896                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1897                              tree * cond_expr)
1898 {
1899   tree part_cond_expr;
1900
1901   fold_defer_overflow_warnings ();
1902   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1903     {
1904       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1905       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1906
1907       if (dump_enabled_p ())
1908         dump_printf (MSG_NOTE,
1909                      "create runtime check for data references %T and %T\n",
1910                      DR_REF (dr_a.dr), DR_REF (dr_b.dr));
1911
1912       /* Create condition expression for each pair data references.  */
1913       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1914       if (*cond_expr)
1915         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1916                                   *cond_expr, part_cond_expr);
1917       else
1918         *cond_expr = part_cond_expr;
1919     }
1920   fold_undefer_and_ignore_overflow_warnings ();
1921 }
1922
1923 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1924    expressions.  */
1925 static bool
1926 dr_equal_offsets_p1 (tree offset1, tree offset2)
1927 {
1928   bool res;
1929
1930   STRIP_NOPS (offset1);
1931   STRIP_NOPS (offset2);
1932
1933   if (offset1 == offset2)
1934     return true;
1935
1936   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1937       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1938     return false;
1939
1940   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1941                              TREE_OPERAND (offset2, 0));
1942
1943   if (!res || !BINARY_CLASS_P (offset1))
1944     return res;
1945
1946   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1947                              TREE_OPERAND (offset2, 1));
1948
1949   return res;
1950 }
1951
1952 /* Check if DRA and DRB have equal offsets.  */
1953 bool
1954 dr_equal_offsets_p (struct data_reference *dra,
1955                     struct data_reference *drb)
1956 {
1957   tree offset1, offset2;
1958
1959   offset1 = DR_OFFSET (dra);
1960   offset2 = DR_OFFSET (drb);
1961
1962   return dr_equal_offsets_p1 (offset1, offset2);
1963 }
1964
1965 /* Returns true if FNA == FNB.  */
1966
1967 static bool
1968 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1969 {
1970   unsigned i, n = fna.length ();
1971
1972   if (n != fnb.length ())
1973     return false;
1974
1975   for (i = 0; i < n; i++)
1976     if (!operand_equal_p (fna[i], fnb[i], 0))
1977       return false;
1978
1979   return true;
1980 }
1981
1982 /* If all the functions in CF are the same, returns one of them,
1983    otherwise returns NULL.  */
1984
1985 static affine_fn
1986 common_affine_function (conflict_function *cf)
1987 {
1988   unsigned i;
1989   affine_fn comm;
1990
1991   if (!CF_NONTRIVIAL_P (cf))
1992     return affine_fn ();
1993
1994   comm = cf->fns[0];
1995
1996   for (i = 1; i < cf->n; i++)
1997     if (!affine_function_equal_p (comm, cf->fns[i]))
1998       return affine_fn ();
1999
2000   return comm;
2001 }
2002
2003 /* Returns the base of the affine function FN.  */
2004
2005 static tree
2006 affine_function_base (affine_fn fn)
2007 {
2008   return fn[0];
2009 }
2010
2011 /* Returns true if FN is a constant.  */
2012
2013 static bool
2014 affine_function_constant_p (affine_fn fn)
2015 {
2016   unsigned i;
2017   tree coef;
2018
2019   for (i = 1; fn.iterate (i, &coef); i++)
2020     if (!integer_zerop (coef))
2021       return false;
2022
2023   return true;
2024 }
2025
2026 /* Returns true if FN is the zero constant function.  */
2027
2028 static bool
2029 affine_function_zero_p (affine_fn fn)
2030 {
2031   return (integer_zerop (affine_function_base (fn))
2032           && affine_function_constant_p (fn));
2033 }
2034
2035 /* Returns a signed integer type with the largest precision from TA
2036    and TB.  */
2037
2038 static tree
2039 signed_type_for_types (tree ta, tree tb)
2040 {
2041   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2042     return signed_type_for (ta);
2043   else
2044     return signed_type_for (tb);
2045 }
2046
2047 /* Applies operation OP on affine functions FNA and FNB, and returns the
2048    result.  */
2049
2050 static affine_fn
2051 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2052 {
2053   unsigned i, n, m;
2054   affine_fn ret;
2055   tree coef;
2056
2057   if (fnb.length () > fna.length ())
2058     {
2059       n = fna.length ();
2060       m = fnb.length ();
2061     }
2062   else
2063     {
2064       n = fnb.length ();
2065       m = fna.length ();
2066     }
2067
2068   ret.create (m);
2069   for (i = 0; i < n; i++)
2070     {
2071       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2072                                          TREE_TYPE (fnb[i]));
2073       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2074     }
2075
2076   for (; fna.iterate (i, &coef); i++)
2077     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2078                                  coef, integer_zero_node));
2079   for (; fnb.iterate (i, &coef); i++)
2080     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2081                                  integer_zero_node, coef));
2082
2083   return ret;
2084 }
2085
2086 /* Returns the sum of affine functions FNA and FNB.  */
2087
2088 static affine_fn
2089 affine_fn_plus (affine_fn fna, affine_fn fnb)
2090 {
2091   return affine_fn_op (PLUS_EXPR, fna, fnb);
2092 }
2093
2094 /* Returns the difference of affine functions FNA and FNB.  */
2095
2096 static affine_fn
2097 affine_fn_minus (affine_fn fna, affine_fn fnb)
2098 {
2099   return affine_fn_op (MINUS_EXPR, fna, fnb);
2100 }
2101
2102 /* Frees affine function FN.  */
2103
2104 static void
2105 affine_fn_free (affine_fn fn)
2106 {
2107   fn.release ();
2108 }
2109
2110 /* Determine for each subscript in the data dependence relation DDR
2111    the distance.  */
2112
2113 static void
2114 compute_subscript_distance (struct data_dependence_relation *ddr)
2115 {
2116   conflict_function *cf_a, *cf_b;
2117   affine_fn fn_a, fn_b, diff;
2118
2119   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2120     {
2121       unsigned int i;
2122
2123       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2124         {
2125           struct subscript *subscript;
2126
2127           subscript = DDR_SUBSCRIPT (ddr, i);
2128           cf_a = SUB_CONFLICTS_IN_A (subscript);
2129           cf_b = SUB_CONFLICTS_IN_B (subscript);
2130
2131           fn_a = common_affine_function (cf_a);
2132           fn_b = common_affine_function (cf_b);
2133           if (!fn_a.exists () || !fn_b.exists ())
2134             {
2135               SUB_DISTANCE (subscript) = chrec_dont_know;
2136               return;
2137             }
2138           diff = affine_fn_minus (fn_a, fn_b);
2139
2140           if (affine_function_constant_p (diff))
2141             SUB_DISTANCE (subscript) = affine_function_base (diff);
2142           else
2143             SUB_DISTANCE (subscript) = chrec_dont_know;
2144
2145           affine_fn_free (diff);
2146         }
2147     }
2148 }
2149
2150 /* Returns the conflict function for "unknown".  */
2151
2152 static conflict_function *
2153 conflict_fn_not_known (void)
2154 {
2155   conflict_function *fn = XCNEW (conflict_function);
2156   fn->n = NOT_KNOWN;
2157
2158   return fn;
2159 }
2160
2161 /* Returns the conflict function for "independent".  */
2162
2163 static conflict_function *
2164 conflict_fn_no_dependence (void)
2165 {
2166   conflict_function *fn = XCNEW (conflict_function);
2167   fn->n = NO_DEPENDENCE;
2168
2169   return fn;
2170 }
2171
2172 /* Returns true if the address of OBJ is invariant in LOOP.  */
2173
2174 static bool
2175 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2176 {
2177   while (handled_component_p (obj))
2178     {
2179       if (TREE_CODE (obj) == ARRAY_REF)
2180         {
2181           for (int i = 1; i < 4; ++i)
2182             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2183                                                         loop->num))
2184               return false;
2185         }
2186       else if (TREE_CODE (obj) == COMPONENT_REF)
2187         {
2188           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2189                                                       loop->num))
2190             return false;
2191         }
2192       obj = TREE_OPERAND (obj, 0);
2193     }
2194
2195   if (!INDIRECT_REF_P (obj)
2196       && TREE_CODE (obj) != MEM_REF)
2197     return true;
2198
2199   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2200                                                   loop->num);
2201 }
2202
2203 /* Returns false if we can prove that data references A and B do not alias,
2204    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2205    considered.  */
2206
2207 bool
2208 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2209                 bool loop_nest)
2210 {
2211   tree addr_a = DR_BASE_OBJECT (a);
2212   tree addr_b = DR_BASE_OBJECT (b);
2213
2214   /* If we are not processing a loop nest but scalar code we
2215      do not need to care about possible cross-iteration dependences
2216      and thus can process the full original reference.  Do so,
2217      similar to how loop invariant motion applies extra offset-based
2218      disambiguation.  */
2219   if (!loop_nest)
2220     {
2221       aff_tree off1, off2;
2222       poly_widest_int size1, size2;
2223       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2224       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2225       aff_combination_scale (&off1, -1);
2226       aff_combination_add (&off2, &off1);
2227       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2228         return false;
2229     }
2230
2231   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2232       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2233       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2234       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2235     return false;
2236
2237   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2238      do not know the size of the base-object.  So we cannot do any
2239      offset/overlap based analysis but have to rely on points-to
2240      information only.  */
2241   if (TREE_CODE (addr_a) == MEM_REF
2242       && (DR_UNCONSTRAINED_BASE (a)
2243           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2244     {
2245       /* For true dependences we can apply TBAA.  */
2246       if (flag_strict_aliasing
2247           && DR_IS_WRITE (a) && DR_IS_READ (b)
2248           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2249                                      get_alias_set (DR_REF (b))))
2250         return false;
2251       if (TREE_CODE (addr_b) == MEM_REF)
2252         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2253                                        TREE_OPERAND (addr_b, 0));
2254       else
2255         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2256                                        build_fold_addr_expr (addr_b));
2257     }
2258   else if (TREE_CODE (addr_b) == MEM_REF
2259            && (DR_UNCONSTRAINED_BASE (b)
2260                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2261     {
2262       /* For true dependences we can apply TBAA.  */
2263       if (flag_strict_aliasing
2264           && DR_IS_WRITE (a) && DR_IS_READ (b)
2265           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2266                                      get_alias_set (DR_REF (b))))
2267         return false;
2268       if (TREE_CODE (addr_a) == MEM_REF)
2269         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2270                                        TREE_OPERAND (addr_b, 0));
2271       else
2272         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2273                                        TREE_OPERAND (addr_b, 0));
2274     }
2275
2276   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2277      that is being subsetted in the loop nest.  */
2278   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2279     return refs_output_dependent_p (addr_a, addr_b);
2280   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2281     return refs_anti_dependent_p (addr_a, addr_b);
2282   return refs_may_alias_p (addr_a, addr_b);
2283 }
2284
2285 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2286    if it is meaningful to compare their associated access functions
2287    when checking for dependencies.  */
2288
2289 static bool
2290 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2291 {
2292   /* Allow pairs of component refs from the following sets:
2293
2294        { REALPART_EXPR, IMAGPART_EXPR }
2295        { COMPONENT_REF }
2296        { ARRAY_REF }.  */
2297   tree_code code_a = TREE_CODE (ref_a);
2298   tree_code code_b = TREE_CODE (ref_b);
2299   if (code_a == IMAGPART_EXPR)
2300     code_a = REALPART_EXPR;
2301   if (code_b == IMAGPART_EXPR)
2302     code_b = REALPART_EXPR;
2303   if (code_a != code_b)
2304     return false;
2305
2306   if (TREE_CODE (ref_a) == COMPONENT_REF)
2307     /* ??? We cannot simply use the type of operand #0 of the refs here as
2308        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2309        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2310     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2311             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2312
2313   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2314                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2315 }
2316
2317 /* Initialize a data dependence relation between data accesses A and
2318    B.  NB_LOOPS is the number of loops surrounding the references: the
2319    size of the classic distance/direction vectors.  */
2320
2321 struct data_dependence_relation *
2322 initialize_data_dependence_relation (struct data_reference *a,
2323                                      struct data_reference *b,
2324                                      vec<loop_p> loop_nest)
2325 {
2326   struct data_dependence_relation *res;
2327   unsigned int i;
2328
2329   res = XCNEW (struct data_dependence_relation);
2330   DDR_A (res) = a;
2331   DDR_B (res) = b;
2332   DDR_LOOP_NEST (res).create (0);
2333   DDR_SUBSCRIPTS (res).create (0);
2334   DDR_DIR_VECTS (res).create (0);
2335   DDR_DIST_VECTS (res).create (0);
2336
2337   if (a == NULL || b == NULL)
2338     {
2339       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2340       return res;
2341     }
2342
2343   /* If the data references do not alias, then they are independent.  */
2344   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2345     {
2346       DDR_ARE_DEPENDENT (res) = chrec_known;
2347       return res;
2348     }
2349
2350   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2351   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2352   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2353     {
2354       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2355       return res;
2356     }
2357
2358   /* For unconstrained bases, the root (highest-indexed) subscript
2359      describes a variation in the base of the original DR_REF rather
2360      than a component access.  We have no type that accurately describes
2361      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2362      applying this subscript) so limit the search to the last real
2363      component access.
2364
2365      E.g. for:
2366
2367         void
2368         f (int a[][8], int b[][8])
2369         {
2370           for (int i = 0; i < 8; ++i)
2371             a[i * 2][0] = b[i][0];
2372         }
2373
2374      the a and b accesses have a single ARRAY_REF component reference [0]
2375      but have two subscripts.  */
2376   if (DR_UNCONSTRAINED_BASE (a))
2377     num_dimensions_a -= 1;
2378   if (DR_UNCONSTRAINED_BASE (b))
2379     num_dimensions_b -= 1;
2380
2381   /* These structures describe sequences of component references in
2382      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2383      specific access function.  */
2384   struct {
2385     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2386        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2387        indices.  In C notation, these are the indices of the rightmost
2388        component references; e.g. for a sequence .b.c.d, the start
2389        index is for .d.  */
2390     unsigned int start_a;
2391     unsigned int start_b;
2392
2393     /* The sequence contains LENGTH consecutive access functions from
2394        each DR.  */
2395     unsigned int length;
2396
2397     /* The enclosing objects for the A and B sequences respectively,
2398        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2399        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2400     tree object_a;
2401     tree object_b;
2402   } full_seq = {}, struct_seq = {};
2403
2404   /* Before each iteration of the loop:
2405
2406      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2407      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2408   unsigned int index_a = 0;
2409   unsigned int index_b = 0;
2410   tree ref_a = DR_REF (a);
2411   tree ref_b = DR_REF (b);
2412
2413   /* Now walk the component references from the final DR_REFs back up to
2414      the enclosing base objects.  Each component reference corresponds
2415      to one access function in the DR, with access function 0 being for
2416      the final DR_REF and the highest-indexed access function being the
2417      one that is applied to the base of the DR.
2418
2419      Look for a sequence of component references whose access functions
2420      are comparable (see access_fn_components_comparable_p).  If more
2421      than one such sequence exists, pick the one nearest the base
2422      (which is the leftmost sequence in C notation).  Store this sequence
2423      in FULL_SEQ.
2424
2425      For example, if we have:
2426
2427         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2428
2429         A: a[0][i].s.c.d
2430         B: __real b[0][i].s.e[i].f
2431
2432      (where d is the same type as the real component of f) then the access
2433      functions would be:
2434
2435                          0   1   2   3
2436         A:              .d  .c  .s [i]
2437
2438                  0   1   2   3   4   5
2439         B:  __real  .f [i]  .e  .s [i]
2440
2441      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2442      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2443      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2444      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2445      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2446      index foo[10] arrays, so is again comparable.  The sequence is
2447      therefore:
2448
2449         A: [1, 3]  (i.e. [i].s.c)
2450         B: [3, 5]  (i.e. [i].s.e)
2451
2452      Also look for sequences of component references whose access
2453      functions are comparable and whose enclosing objects have the same
2454      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2455      example, STRUCT_SEQ would be:
2456
2457         A: [1, 2]  (i.e. s.c)
2458         B: [3, 4]  (i.e. s.e)  */
2459   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2460     {
2461       /* REF_A and REF_B must be one of the component access types
2462          allowed by dr_analyze_indices.  */
2463       gcc_checking_assert (access_fn_component_p (ref_a));
2464       gcc_checking_assert (access_fn_component_p (ref_b));
2465
2466       /* Get the immediately-enclosing objects for REF_A and REF_B,
2467          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2468          and DR_ACCESS_FN (B, INDEX_B).  */
2469       tree object_a = TREE_OPERAND (ref_a, 0);
2470       tree object_b = TREE_OPERAND (ref_b, 0);
2471
2472       tree type_a = TREE_TYPE (object_a);
2473       tree type_b = TREE_TYPE (object_b);
2474       if (access_fn_components_comparable_p (ref_a, ref_b))
2475         {
2476           /* This pair of component accesses is comparable for dependence
2477              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2478              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2479           if (full_seq.start_a + full_seq.length != index_a
2480               || full_seq.start_b + full_seq.length != index_b)
2481             {
2482               /* The accesses don't extend the current sequence,
2483                  so start a new one here.  */
2484               full_seq.start_a = index_a;
2485               full_seq.start_b = index_b;
2486               full_seq.length = 0;
2487             }
2488
2489           /* Add this pair of references to the sequence.  */
2490           full_seq.length += 1;
2491           full_seq.object_a = object_a;
2492           full_seq.object_b = object_b;
2493
2494           /* If the enclosing objects are structures (and thus have the
2495              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2496           if (TREE_CODE (type_a) == RECORD_TYPE)
2497             struct_seq = full_seq;
2498
2499           /* Move to the next containing reference for both A and B.  */
2500           ref_a = object_a;
2501           ref_b = object_b;
2502           index_a += 1;
2503           index_b += 1;
2504           continue;
2505         }
2506
2507       /* Try to approach equal type sizes.  */
2508       if (!COMPLETE_TYPE_P (type_a)
2509           || !COMPLETE_TYPE_P (type_b)
2510           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2511           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2512         break;
2513
2514       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2515       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2516       if (size_a <= size_b)
2517         {
2518           index_a += 1;
2519           ref_a = object_a;
2520         }
2521       if (size_b <= size_a)
2522         {
2523           index_b += 1;
2524           ref_b = object_b;
2525         }
2526     }
2527
2528   /* See whether FULL_SEQ ends at the base and whether the two bases
2529      are equal.  We do not care about TBAA or alignment info so we can
2530      use OEP_ADDRESS_OF to avoid false negatives.  */
2531   tree base_a = DR_BASE_OBJECT (a);
2532   tree base_b = DR_BASE_OBJECT (b);
2533   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2534                       && full_seq.start_b + full_seq.length == num_dimensions_b
2535                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2536                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2537                       && types_compatible_p (TREE_TYPE (base_a),
2538                                              TREE_TYPE (base_b))
2539                       && (!loop_nest.exists ()
2540                           || (object_address_invariant_in_loop_p
2541                               (loop_nest[0], base_a))));
2542
2543   /* If the bases are the same, we can include the base variation too.
2544      E.g. the b accesses in:
2545
2546        for (int i = 0; i < n; ++i)
2547          b[i + 4][0] = b[i][0];
2548
2549      have a definite dependence distance of 4, while for:
2550
2551        for (int i = 0; i < n; ++i)
2552          a[i + 4][0] = b[i][0];
2553
2554      the dependence distance depends on the gap between a and b.
2555
2556      If the bases are different then we can only rely on the sequence
2557      rooted at a structure access, since arrays are allowed to overlap
2558      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2559      valid code:
2560
2561        int a[256];
2562        ...
2563        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2564
2565      where two lvalues with the same int[4][3] type overlap, and where
2566      both lvalues are distinct from the object's declared type.  */
2567   if (same_base_p)
2568     {
2569       if (DR_UNCONSTRAINED_BASE (a))
2570         full_seq.length += 1;
2571     }
2572   else
2573     full_seq = struct_seq;
2574
2575   /* Punt if we didn't find a suitable sequence.  */
2576   if (full_seq.length == 0)
2577     {
2578       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2579       return res;
2580     }
2581
2582   if (!same_base_p)
2583     {
2584       /* Partial overlap is possible for different bases when strict aliasing
2585          is not in effect.  It's also possible if either base involves a union
2586          access; e.g. for:
2587
2588            struct s1 { int a[2]; };
2589            struct s2 { struct s1 b; int c; };
2590            struct s3 { int d; struct s1 e; };
2591            union u { struct s2 f; struct s3 g; } *p, *q;
2592
2593          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2594          "p->g.e" (base "p->g") and might partially overlap the s1 at
2595          "q->g.e" (base "q->g").  */
2596       if (!flag_strict_aliasing
2597           || ref_contains_union_access_p (full_seq.object_a)
2598           || ref_contains_union_access_p (full_seq.object_b))
2599         {
2600           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2601           return res;
2602         }
2603
2604       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2605       if (!loop_nest.exists ()
2606           || (object_address_invariant_in_loop_p (loop_nest[0],
2607                                                   full_seq.object_a)
2608               && object_address_invariant_in_loop_p (loop_nest[0],
2609                                                      full_seq.object_b)))
2610         {
2611           DDR_OBJECT_A (res) = full_seq.object_a;
2612           DDR_OBJECT_B (res) = full_seq.object_b;
2613         }
2614     }
2615
2616   DDR_AFFINE_P (res) = true;
2617   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2618   DDR_SUBSCRIPTS (res).create (full_seq.length);
2619   DDR_LOOP_NEST (res) = loop_nest;
2620   DDR_INNER_LOOP (res) = 0;
2621   DDR_SELF_REFERENCE (res) = false;
2622
2623   for (i = 0; i < full_seq.length; ++i)
2624     {
2625       struct subscript *subscript;
2626
2627       subscript = XNEW (struct subscript);
2628       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2629       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2630       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2631       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2632       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2633       SUB_DISTANCE (subscript) = chrec_dont_know;
2634       DDR_SUBSCRIPTS (res).safe_push (subscript);
2635     }
2636
2637   return res;
2638 }
2639
2640 /* Frees memory used by the conflict function F.  */
2641
2642 static void
2643 free_conflict_function (conflict_function *f)
2644 {
2645   unsigned i;
2646
2647   if (CF_NONTRIVIAL_P (f))
2648     {
2649       for (i = 0; i < f->n; i++)
2650         affine_fn_free (f->fns[i]);
2651     }
2652   free (f);
2653 }
2654
2655 /* Frees memory used by SUBSCRIPTS.  */
2656
2657 static void
2658 free_subscripts (vec<subscript_p> subscripts)
2659 {
2660   unsigned i;
2661   subscript_p s;
2662
2663   FOR_EACH_VEC_ELT (subscripts, i, s)
2664     {
2665       free_conflict_function (s->conflicting_iterations_in_a);
2666       free_conflict_function (s->conflicting_iterations_in_b);
2667       free (s);
2668     }
2669   subscripts.release ();
2670 }
2671
2672 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2673    description.  */
2674
2675 static inline void
2676 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2677                         tree chrec)
2678 {
2679   DDR_ARE_DEPENDENT (ddr) = chrec;
2680   free_subscripts (DDR_SUBSCRIPTS (ddr));
2681   DDR_SUBSCRIPTS (ddr).create (0);
2682 }
2683
2684 /* The dependence relation DDR cannot be represented by a distance
2685    vector.  */
2686
2687 static inline void
2688 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2689 {
2690   if (dump_file && (dump_flags & TDF_DETAILS))
2691     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2692
2693   DDR_AFFINE_P (ddr) = false;
2694 }
2695
2696 \f
2697
2698 /* This section contains the classic Banerjee tests.  */
2699
2700 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2701    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2702
2703 static inline bool
2704 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2705 {
2706   return (evolution_function_is_constant_p (chrec_a)
2707           && evolution_function_is_constant_p (chrec_b));
2708 }
2709
2710 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2711    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2712
2713 static bool
2714 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2715 {
2716   if ((evolution_function_is_constant_p (chrec_a)
2717        && evolution_function_is_univariate_p (chrec_b))
2718       || (evolution_function_is_constant_p (chrec_b)
2719           && evolution_function_is_univariate_p (chrec_a)))
2720     return true;
2721
2722   if (evolution_function_is_univariate_p (chrec_a)
2723       && evolution_function_is_univariate_p (chrec_b))
2724     {
2725       switch (TREE_CODE (chrec_a))
2726         {
2727         case POLYNOMIAL_CHREC:
2728           switch (TREE_CODE (chrec_b))
2729             {
2730             case POLYNOMIAL_CHREC:
2731               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2732                 return false;
2733               /* FALLTHRU */
2734
2735             default:
2736               return true;
2737             }
2738
2739         default:
2740           return true;
2741         }
2742     }
2743
2744   return false;
2745 }
2746
2747 /* Creates a conflict function with N dimensions.  The affine functions
2748    in each dimension follow.  */
2749
2750 static conflict_function *
2751 conflict_fn (unsigned n, ...)
2752 {
2753   unsigned i;
2754   conflict_function *ret = XCNEW (conflict_function);
2755   va_list ap;
2756
2757   gcc_assert (n > 0 && n <= MAX_DIM);
2758   va_start (ap, n);
2759
2760   ret->n = n;
2761   for (i = 0; i < n; i++)
2762     ret->fns[i] = va_arg (ap, affine_fn);
2763   va_end (ap);
2764
2765   return ret;
2766 }
2767
2768 /* Returns constant affine function with value CST.  */
2769
2770 static affine_fn
2771 affine_fn_cst (tree cst)
2772 {
2773   affine_fn fn;
2774   fn.create (1);
2775   fn.quick_push (cst);
2776   return fn;
2777 }
2778
2779 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2780
2781 static affine_fn
2782 affine_fn_univar (tree cst, unsigned dim, tree coef)
2783 {
2784   affine_fn fn;
2785   fn.create (dim + 1);
2786   unsigned i;
2787
2788   gcc_assert (dim > 0);
2789   fn.quick_push (cst);
2790   for (i = 1; i < dim; i++)
2791     fn.quick_push (integer_zero_node);
2792   fn.quick_push (coef);
2793   return fn;
2794 }
2795
2796 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2797    *OVERLAPS_B are initialized to the functions that describe the
2798    relation between the elements accessed twice by CHREC_A and
2799    CHREC_B.  For k >= 0, the following property is verified:
2800
2801    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2802
2803 static void
2804 analyze_ziv_subscript (tree chrec_a,
2805                        tree chrec_b,
2806                        conflict_function **overlaps_a,
2807                        conflict_function **overlaps_b,
2808                        tree *last_conflicts)
2809 {
2810   tree type, difference;
2811   dependence_stats.num_ziv++;
2812
2813   if (dump_file && (dump_flags & TDF_DETAILS))
2814     fprintf (dump_file, "(analyze_ziv_subscript \n");
2815
2816   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2817   chrec_a = chrec_convert (type, chrec_a, NULL);
2818   chrec_b = chrec_convert (type, chrec_b, NULL);
2819   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2820
2821   switch (TREE_CODE (difference))
2822     {
2823     case INTEGER_CST:
2824       if (integer_zerop (difference))
2825         {
2826           /* The difference is equal to zero: the accessed index
2827              overlaps for each iteration in the loop.  */
2828           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2829           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2830           *last_conflicts = chrec_dont_know;
2831           dependence_stats.num_ziv_dependent++;
2832         }
2833       else
2834         {
2835           /* The accesses do not overlap.  */
2836           *overlaps_a = conflict_fn_no_dependence ();
2837           *overlaps_b = conflict_fn_no_dependence ();
2838           *last_conflicts = integer_zero_node;
2839           dependence_stats.num_ziv_independent++;
2840         }
2841       break;
2842
2843     default:
2844       /* We're not sure whether the indexes overlap.  For the moment,
2845          conservatively answer "don't know".  */
2846       if (dump_file && (dump_flags & TDF_DETAILS))
2847         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2848
2849       *overlaps_a = conflict_fn_not_known ();
2850       *overlaps_b = conflict_fn_not_known ();
2851       *last_conflicts = chrec_dont_know;
2852       dependence_stats.num_ziv_unimplemented++;
2853       break;
2854     }
2855
2856   if (dump_file && (dump_flags & TDF_DETAILS))
2857     fprintf (dump_file, ")\n");
2858 }
2859
2860 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2861    and only if it fits to the int type.  If this is not the case, or the
2862    bound  on the number of iterations of LOOP could not be derived, returns
2863    chrec_dont_know.  */
2864
2865 static tree
2866 max_stmt_executions_tree (struct loop *loop)
2867 {
2868   widest_int nit;
2869
2870   if (!max_stmt_executions (loop, &nit))
2871     return chrec_dont_know;
2872
2873   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2874     return chrec_dont_know;
2875
2876   return wide_int_to_tree (unsigned_type_node, nit);
2877 }
2878
2879 /* Determine whether the CHREC is always positive/negative.  If the expression
2880    cannot be statically analyzed, return false, otherwise set the answer into
2881    VALUE.  */
2882
2883 static bool
2884 chrec_is_positive (tree chrec, bool *value)
2885 {
2886   bool value0, value1, value2;
2887   tree end_value, nb_iter;
2888
2889   switch (TREE_CODE (chrec))
2890     {
2891     case POLYNOMIAL_CHREC:
2892       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2893           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2894         return false;
2895
2896       /* FIXME -- overflows.  */
2897       if (value0 == value1)
2898         {
2899           *value = value0;
2900           return true;
2901         }
2902
2903       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2904          and the proof consists in showing that the sign never
2905          changes during the execution of the loop, from 0 to
2906          loop->nb_iterations.  */
2907       if (!evolution_function_is_affine_p (chrec))
2908         return false;
2909
2910       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2911       if (chrec_contains_undetermined (nb_iter))
2912         return false;
2913
2914 #if 0
2915       /* TODO -- If the test is after the exit, we may decrease the number of
2916          iterations by one.  */
2917       if (after_exit)
2918         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2919 #endif
2920
2921       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2922
2923       if (!chrec_is_positive (end_value, &value2))
2924         return false;
2925
2926       *value = value0;
2927       return value0 == value1;
2928
2929     case INTEGER_CST:
2930       switch (tree_int_cst_sgn (chrec))
2931         {
2932         case -1:
2933           *value = false;
2934           break;
2935         case 1:
2936           *value = true;
2937           break;
2938         default:
2939           return false;
2940         }
2941       return true;
2942
2943     default:
2944       return false;
2945     }
2946 }
2947
2948
2949 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2950    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2951    *OVERLAPS_B are initialized to the functions that describe the
2952    relation between the elements accessed twice by CHREC_A and
2953    CHREC_B.  For k >= 0, the following property is verified:
2954
2955    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2956
2957 static void
2958 analyze_siv_subscript_cst_affine (tree chrec_a,
2959                                   tree chrec_b,
2960                                   conflict_function **overlaps_a,
2961                                   conflict_function **overlaps_b,
2962                                   tree *last_conflicts)
2963 {
2964   bool value0, value1, value2;
2965   tree type, difference, tmp;
2966
2967   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2968   chrec_a = chrec_convert (type, chrec_a, NULL);
2969   chrec_b = chrec_convert (type, chrec_b, NULL);
2970   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2971
2972   /* Special case overlap in the first iteration.  */
2973   if (integer_zerop (difference))
2974     {
2975       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2976       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2977       *last_conflicts = integer_one_node;
2978       return;
2979     }
2980
2981   if (!chrec_is_positive (initial_condition (difference), &value0))
2982     {
2983       if (dump_file && (dump_flags & TDF_DETAILS))
2984         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2985
2986       dependence_stats.num_siv_unimplemented++;
2987       *overlaps_a = conflict_fn_not_known ();
2988       *overlaps_b = conflict_fn_not_known ();
2989       *last_conflicts = chrec_dont_know;
2990       return;
2991     }
2992   else
2993     {
2994       if (value0 == false)
2995         {
2996           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
2997               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2998             {
2999               if (dump_file && (dump_flags & TDF_DETAILS))
3000                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3001
3002               *overlaps_a = conflict_fn_not_known ();
3003               *overlaps_b = conflict_fn_not_known ();
3004               *last_conflicts = chrec_dont_know;
3005               dependence_stats.num_siv_unimplemented++;
3006               return;
3007             }
3008           else
3009             {
3010               if (value1 == true)
3011                 {
3012                   /* Example:
3013                      chrec_a = 12
3014                      chrec_b = {10, +, 1}
3015                   */
3016
3017                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3018                     {
3019                       HOST_WIDE_INT numiter;
3020                       struct loop *loop = get_chrec_loop (chrec_b);
3021
3022                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3023                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3024                                          fold_build1 (ABS_EXPR, type, difference),
3025                                          CHREC_RIGHT (chrec_b));
3026                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3027                       *last_conflicts = integer_one_node;
3028
3029
3030                       /* Perform weak-zero siv test to see if overlap is
3031                          outside the loop bounds.  */
3032                       numiter = max_stmt_executions_int (loop);
3033
3034                       if (numiter >= 0
3035                           && compare_tree_int (tmp, numiter) > 0)
3036                         {
3037                           free_conflict_function (*overlaps_a);
3038                           free_conflict_function (*overlaps_b);
3039                           *overlaps_a = conflict_fn_no_dependence ();
3040                           *overlaps_b = conflict_fn_no_dependence ();
3041                           *last_conflicts = integer_zero_node;
3042                           dependence_stats.num_siv_independent++;
3043                           return;
3044                         }
3045                       dependence_stats.num_siv_dependent++;
3046                       return;
3047                     }
3048
3049                   /* When the step does not divide the difference, there are
3050                      no overlaps.  */
3051                   else
3052                     {
3053                       *overlaps_a = conflict_fn_no_dependence ();
3054                       *overlaps_b = conflict_fn_no_dependence ();
3055                       *last_conflicts = integer_zero_node;
3056                       dependence_stats.num_siv_independent++;
3057                       return;
3058                     }
3059                 }
3060
3061               else
3062                 {
3063                   /* Example:
3064                      chrec_a = 12
3065                      chrec_b = {10, +, -1}
3066
3067                      In this case, chrec_a will not overlap with chrec_b.  */
3068                   *overlaps_a = conflict_fn_no_dependence ();
3069                   *overlaps_b = conflict_fn_no_dependence ();
3070                   *last_conflicts = integer_zero_node;
3071                   dependence_stats.num_siv_independent++;
3072                   return;
3073                 }
3074             }
3075         }
3076       else
3077         {
3078           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3079               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3080             {
3081               if (dump_file && (dump_flags & TDF_DETAILS))
3082                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3083
3084               *overlaps_a = conflict_fn_not_known ();
3085               *overlaps_b = conflict_fn_not_known ();
3086               *last_conflicts = chrec_dont_know;
3087               dependence_stats.num_siv_unimplemented++;
3088               return;
3089             }
3090           else
3091             {
3092               if (value2 == false)
3093                 {
3094                   /* Example:
3095                      chrec_a = 3
3096                      chrec_b = {10, +, -1}
3097                   */
3098                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3099                     {
3100                       HOST_WIDE_INT numiter;
3101                       struct loop *loop = get_chrec_loop (chrec_b);
3102
3103                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3104                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3105                                          CHREC_RIGHT (chrec_b));
3106                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3107                       *last_conflicts = integer_one_node;
3108
3109                       /* Perform weak-zero siv test to see if overlap is
3110                          outside the loop bounds.  */
3111                       numiter = max_stmt_executions_int (loop);
3112
3113                       if (numiter >= 0
3114                           && compare_tree_int (tmp, numiter) > 0)
3115                         {
3116                           free_conflict_function (*overlaps_a);
3117                           free_conflict_function (*overlaps_b);
3118                           *overlaps_a = conflict_fn_no_dependence ();
3119                           *overlaps_b = conflict_fn_no_dependence ();
3120                           *last_conflicts = integer_zero_node;
3121                           dependence_stats.num_siv_independent++;
3122                           return;
3123                         }
3124                       dependence_stats.num_siv_dependent++;
3125                       return;
3126                     }
3127
3128                   /* When the step does not divide the difference, there
3129                      are no overlaps.  */
3130                   else
3131                     {
3132                       *overlaps_a = conflict_fn_no_dependence ();
3133                       *overlaps_b = conflict_fn_no_dependence ();
3134                       *last_conflicts = integer_zero_node;
3135                       dependence_stats.num_siv_independent++;
3136                       return;
3137                     }
3138                 }
3139               else
3140                 {
3141                   /* Example:
3142                      chrec_a = 3
3143                      chrec_b = {4, +, 1}
3144
3145                      In this case, chrec_a will not overlap with chrec_b.  */
3146                   *overlaps_a = conflict_fn_no_dependence ();
3147                   *overlaps_b = conflict_fn_no_dependence ();
3148                   *last_conflicts = integer_zero_node;
3149                   dependence_stats.num_siv_independent++;
3150                   return;
3151                 }
3152             }
3153         }
3154     }
3155 }
3156
3157 /* Helper recursive function for initializing the matrix A.  Returns
3158    the initial value of CHREC.  */
3159
3160 static tree
3161 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3162 {
3163   gcc_assert (chrec);
3164
3165   switch (TREE_CODE (chrec))
3166     {
3167     case POLYNOMIAL_CHREC:
3168       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3169       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3170
3171     case PLUS_EXPR:
3172     case MULT_EXPR:
3173     case MINUS_EXPR:
3174       {
3175         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3176         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3177
3178         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3179       }
3180
3181     CASE_CONVERT:
3182       {
3183         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3184         return chrec_convert (chrec_type (chrec), op, NULL);
3185       }
3186
3187     case BIT_NOT_EXPR:
3188       {
3189         /* Handle ~X as -1 - X.  */
3190         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3191         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3192                               build_int_cst (TREE_TYPE (chrec), -1), op);
3193       }
3194
3195     case INTEGER_CST:
3196       return chrec;
3197
3198     default:
3199       gcc_unreachable ();
3200       return NULL_TREE;
3201     }
3202 }
3203
3204 #define FLOOR_DIV(x,y) ((x) / (y))
3205
3206 /* Solves the special case of the Diophantine equation:
3207    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3208
3209    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3210    number of iterations that loops X and Y run.  The overlaps will be
3211    constructed as evolutions in dimension DIM.  */
3212
3213 static void
3214 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3215                                          HOST_WIDE_INT step_a,
3216                                          HOST_WIDE_INT step_b,
3217                                          affine_fn *overlaps_a,
3218                                          affine_fn *overlaps_b,
3219                                          tree *last_conflicts, int dim)
3220 {
3221   if (((step_a > 0 && step_b > 0)
3222        || (step_a < 0 && step_b < 0)))
3223     {
3224       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3225       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3226
3227       gcd_steps_a_b = gcd (step_a, step_b);
3228       step_overlaps_a = step_b / gcd_steps_a_b;
3229       step_overlaps_b = step_a / gcd_steps_a_b;
3230
3231       if (niter > 0)
3232         {
3233           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3234           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3235           last_conflict = tau2;
3236           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3237         }
3238       else
3239         *last_conflicts = chrec_dont_know;
3240
3241       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3242                                       build_int_cst (NULL_TREE,
3243                                                      step_overlaps_a));
3244       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3245                                       build_int_cst (NULL_TREE,
3246                                                      step_overlaps_b));
3247     }
3248
3249   else
3250     {
3251       *overlaps_a = affine_fn_cst (integer_zero_node);
3252       *overlaps_b = affine_fn_cst (integer_zero_node);
3253       *last_conflicts = integer_zero_node;
3254     }
3255 }
3256
3257 /* Solves the special case of a Diophantine equation where CHREC_A is
3258    an affine bivariate function, and CHREC_B is an affine univariate
3259    function.  For example,
3260
3261    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3262
3263    has the following overlapping functions:
3264
3265    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3266    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3267    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3268
3269    FORNOW: This is a specialized implementation for a case occurring in
3270    a common benchmark.  Implement the general algorithm.  */
3271
3272 static void
3273 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3274                                       conflict_function **overlaps_a,
3275                                       conflict_function **overlaps_b,
3276                                       tree *last_conflicts)
3277 {
3278   bool xz_p, yz_p, xyz_p;
3279   HOST_WIDE_INT step_x, step_y, step_z;
3280   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3281   affine_fn overlaps_a_xz, overlaps_b_xz;
3282   affine_fn overlaps_a_yz, overlaps_b_yz;
3283   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3284   affine_fn ova1, ova2, ovb;
3285   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3286
3287   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3288   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3289   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3290
3291   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3292   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3293   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3294
3295   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3296     {
3297       if (dump_file && (dump_flags & TDF_DETAILS))
3298         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3299
3300       *overlaps_a = conflict_fn_not_known ();
3301       *overlaps_b = conflict_fn_not_known ();
3302       *last_conflicts = chrec_dont_know;
3303       return;
3304     }
3305
3306   niter = MIN (niter_x, niter_z);
3307   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3308                                            &overlaps_a_xz,
3309                                            &overlaps_b_xz,
3310                                            &last_conflicts_xz, 1);
3311   niter = MIN (niter_y, niter_z);
3312   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3313                                            &overlaps_a_yz,
3314                                            &overlaps_b_yz,
3315                                            &last_conflicts_yz, 2);
3316   niter = MIN (niter_x, niter_z);
3317   niter = MIN (niter_y, niter);
3318   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3319                                            &overlaps_a_xyz,
3320                                            &overlaps_b_xyz,
3321                                            &last_conflicts_xyz, 3);
3322
3323   xz_p = !integer_zerop (last_conflicts_xz);
3324   yz_p = !integer_zerop (last_conflicts_yz);
3325   xyz_p = !integer_zerop (last_conflicts_xyz);
3326
3327   if (xz_p || yz_p || xyz_p)
3328     {
3329       ova1 = affine_fn_cst (integer_zero_node);
3330       ova2 = affine_fn_cst (integer_zero_node);
3331       ovb = affine_fn_cst (integer_zero_node);
3332       if (xz_p)
3333         {
3334           affine_fn t0 = ova1;
3335           affine_fn t2 = ovb;
3336
3337           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3338           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3339           affine_fn_free (t0);
3340           affine_fn_free (t2);
3341           *last_conflicts = last_conflicts_xz;
3342         }
3343       if (yz_p)
3344         {
3345           affine_fn t0 = ova2;
3346           affine_fn t2 = ovb;
3347
3348           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3349           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3350           affine_fn_free (t0);
3351           affine_fn_free (t2);
3352           *last_conflicts = last_conflicts_yz;
3353         }
3354       if (xyz_p)
3355         {
3356           affine_fn t0 = ova1;
3357           affine_fn t2 = ova2;
3358           affine_fn t4 = ovb;
3359
3360           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3361           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3362           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3363           affine_fn_free (t0);
3364           affine_fn_free (t2);
3365           affine_fn_free (t4);
3366           *last_conflicts = last_conflicts_xyz;
3367         }
3368       *overlaps_a = conflict_fn (2, ova1, ova2);
3369       *overlaps_b = conflict_fn (1, ovb);
3370     }
3371   else
3372     {
3373       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3374       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3375       *last_conflicts = integer_zero_node;
3376     }
3377
3378   affine_fn_free (overlaps_a_xz);
3379   affine_fn_free (overlaps_b_xz);
3380   affine_fn_free (overlaps_a_yz);
3381   affine_fn_free (overlaps_b_yz);
3382   affine_fn_free (overlaps_a_xyz);
3383   affine_fn_free (overlaps_b_xyz);
3384 }
3385
3386 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3387
3388 static void
3389 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3390                     int size)
3391 {
3392   memcpy (vec2, vec1, size * sizeof (*vec1));
3393 }
3394
3395 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3396
3397 static void
3398 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3399                     int m, int n)
3400 {
3401   int i;
3402
3403   for (i = 0; i < m; i++)
3404     lambda_vector_copy (mat1[i], mat2[i], n);
3405 }
3406
3407 /* Store the N x N identity matrix in MAT.  */
3408
3409 static void
3410 lambda_matrix_id (lambda_matrix mat, int size)
3411 {
3412   int i, j;
3413
3414   for (i = 0; i < size; i++)
3415     for (j = 0; j < size; j++)
3416       mat[i][j] = (i == j) ? 1 : 0;
3417 }
3418
3419 /* Return the first nonzero element of vector VEC1 between START and N.
3420    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
3421
3422 static int
3423 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3424 {
3425   int j = start;
3426   while (j < n && vec1[j] == 0)
3427     j++;
3428   return j;
3429 }
3430
3431 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3432    R2 = R2 + CONST1 * R1.  */
3433
3434 static void
3435 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
3436 {
3437   int i;
3438
3439   if (const1 == 0)
3440     return;
3441
3442   for (i = 0; i < n; i++)
3443     mat[r2][i] += const1 * mat[r1][i];
3444 }
3445
3446 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3447    and store the result in VEC2.  */
3448
3449 static void
3450 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3451                           int size, int const1)
3452 {
3453   int i;
3454
3455   if (const1 == 0)
3456     lambda_vector_clear (vec2, size);
3457   else
3458     for (i = 0; i < size; i++)
3459       vec2[i] = const1 * vec1[i];
3460 }
3461
3462 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3463
3464 static void
3465 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3466                       int size)
3467 {
3468   lambda_vector_mult_const (vec1, vec2, size, -1);
3469 }
3470
3471 /* Negate row R1 of matrix MAT which has N columns.  */
3472
3473 static void
3474 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3475 {
3476   lambda_vector_negate (mat[r1], mat[r1], n);
3477 }
3478
3479 /* Return true if two vectors are equal.  */
3480
3481 static bool
3482 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3483 {
3484   int i;
3485   for (i = 0; i < size; i++)
3486     if (vec1[i] != vec2[i])
3487       return false;
3488   return true;
3489 }
3490
3491 /* Given an M x N integer matrix A, this function determines an M x
3492    M unimodular matrix U, and an M x N echelon matrix S such that
3493    "U.A = S".  This decomposition is also known as "right Hermite".
3494
3495    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3496    Restructuring Compilers" Utpal Banerjee.  */
3497
3498 static void
3499 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3500                              lambda_matrix S, lambda_matrix U)
3501 {
3502   int i, j, i0 = 0;
3503
3504   lambda_matrix_copy (A, S, m, n);
3505   lambda_matrix_id (U, m);
3506
3507   for (j = 0; j < n; j++)
3508     {
3509       if (lambda_vector_first_nz (S[j], m, i0) < m)
3510         {
3511           ++i0;
3512           for (i = m - 1; i >= i0; i--)
3513             {
3514               while (S[i][j] != 0)
3515                 {
3516                   int sigma, factor, a, b;
3517
3518                   a = S[i-1][j];
3519                   b = S[i][j];
3520                   sigma = (a * b < 0) ? -1: 1;
3521                   a = abs (a);
3522                   b = abs (b);
3523                   factor = sigma * (a / b);
3524
3525                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3526                   std::swap (S[i], S[i-1]);
3527
3528                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3529                   std::swap (U[i], U[i-1]);
3530                 }
3531             }
3532         }
3533     }
3534 }
3535
3536 /* Determines the overlapping elements due to accesses CHREC_A and
3537    CHREC_B, that are affine functions.  This function cannot handle
3538    symbolic evolution functions, ie. when initial conditions are
3539    parameters, because it uses lambda matrices of integers.  */
3540
3541 static void
3542 analyze_subscript_affine_affine (tree chrec_a,
3543                                  tree chrec_b,
3544                                  conflict_function **overlaps_a,
3545                                  conflict_function **overlaps_b,
3546                                  tree *last_conflicts)
3547 {
3548   unsigned nb_vars_a, nb_vars_b, dim;
3549   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3550   lambda_matrix A, U, S;
3551   struct obstack scratch_obstack;
3552
3553   if (eq_evolutions_p (chrec_a, chrec_b))
3554     {
3555       /* The accessed index overlaps for each iteration in the
3556          loop.  */
3557       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3558       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3559       *last_conflicts = chrec_dont_know;
3560       return;
3561     }
3562   if (dump_file && (dump_flags & TDF_DETAILS))
3563     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3564
3565   /* For determining the initial intersection, we have to solve a
3566      Diophantine equation.  This is the most time consuming part.
3567
3568      For answering to the question: "Is there a dependence?" we have
3569      to prove that there exists a solution to the Diophantine
3570      equation, and that the solution is in the iteration domain,
3571      i.e. the solution is positive or zero, and that the solution
3572      happens before the upper bound loop.nb_iterations.  Otherwise
3573      there is no dependence.  This function outputs a description of
3574      the iterations that hold the intersections.  */
3575
3576   nb_vars_a = nb_vars_in_chrec (chrec_a);
3577   nb_vars_b = nb_vars_in_chrec (chrec_b);
3578
3579   gcc_obstack_init (&scratch_obstack);
3580
3581   dim = nb_vars_a + nb_vars_b;
3582   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3583   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3584   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3585
3586   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3587   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3588   gamma = init_b - init_a;
3589
3590   /* Don't do all the hard work of solving the Diophantine equation
3591      when we already know the solution: for example,
3592      | {3, +, 1}_1
3593      | {3, +, 4}_2
3594      | gamma = 3 - 3 = 0.
3595      Then the first overlap occurs during the first iterations:
3596      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3597   */
3598   if (gamma == 0)
3599     {
3600       if (nb_vars_a == 1 && nb_vars_b == 1)
3601         {
3602           HOST_WIDE_INT step_a, step_b;
3603           HOST_WIDE_INT niter, niter_a, niter_b;
3604           affine_fn ova, ovb;
3605
3606           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3607           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3608           niter = MIN (niter_a, niter_b);
3609           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3610           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3611
3612           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3613                                                    &ova, &ovb,
3614                                                    last_conflicts, 1);
3615           *overlaps_a = conflict_fn (1, ova);
3616           *overlaps_b = conflict_fn (1, ovb);
3617         }
3618
3619       else if (nb_vars_a == 2 && nb_vars_b == 1)
3620         compute_overlap_steps_for_affine_1_2
3621           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3622
3623       else if (nb_vars_a == 1 && nb_vars_b == 2)
3624         compute_overlap_steps_for_affine_1_2
3625           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3626
3627       else
3628         {
3629           if (dump_file && (dump_flags & TDF_DETAILS))
3630             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3631           *overlaps_a = conflict_fn_not_known ();
3632           *overlaps_b = conflict_fn_not_known ();
3633           *last_conflicts = chrec_dont_know;
3634         }
3635       goto end_analyze_subs_aa;
3636     }
3637
3638   /* U.A = S */
3639   lambda_matrix_right_hermite (A, dim, 1, S, U);
3640
3641   if (S[0][0] < 0)
3642     {
3643       S[0][0] *= -1;
3644       lambda_matrix_row_negate (U, dim, 0);
3645     }
3646   gcd_alpha_beta = S[0][0];
3647
3648   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3649      but that is a quite strange case.  Instead of ICEing, answer
3650      don't know.  */
3651   if (gcd_alpha_beta == 0)
3652     {
3653       *overlaps_a = conflict_fn_not_known ();
3654       *overlaps_b = conflict_fn_not_known ();
3655       *last_conflicts = chrec_dont_know;
3656       goto end_analyze_subs_aa;
3657     }
3658
3659   /* The classic "gcd-test".  */
3660   if (!int_divides_p (gcd_alpha_beta, gamma))
3661     {
3662       /* The "gcd-test" has determined that there is no integer
3663          solution, i.e. there is no dependence.  */
3664       *overlaps_a = conflict_fn_no_dependence ();
3665       *overlaps_b = conflict_fn_no_dependence ();
3666       *last_conflicts = integer_zero_node;
3667     }
3668
3669   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3670   else if (nb_vars_a == 1 && nb_vars_b == 1)
3671     {
3672       /* Both functions should have the same evolution sign.  */
3673       if (((A[0][0] > 0 && -A[1][0] > 0)
3674            || (A[0][0] < 0 && -A[1][0] < 0)))
3675         {
3676           /* The solutions are given by:
3677              |
3678              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3679              |                           [u21 u22]    [y0]
3680
3681              For a given integer t.  Using the following variables,
3682
3683              | i0 = u11 * gamma / gcd_alpha_beta
3684              | j0 = u12 * gamma / gcd_alpha_beta
3685              | i1 = u21
3686              | j1 = u22
3687
3688              the solutions are:
3689
3690              | x0 = i0 + i1 * t,
3691              | y0 = j0 + j1 * t.  */
3692           HOST_WIDE_INT i0, j0, i1, j1;
3693
3694           i0 = U[0][0] * gamma / gcd_alpha_beta;
3695           j0 = U[0][1] * gamma / gcd_alpha_beta;
3696           i1 = U[1][0];
3697           j1 = U[1][1];
3698
3699           if ((i1 == 0 && i0 < 0)
3700               || (j1 == 0 && j0 < 0))
3701             {
3702               /* There is no solution.
3703                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3704                  falls in here, but for the moment we don't look at the
3705                  upper bound of the iteration domain.  */
3706               *overlaps_a = conflict_fn_no_dependence ();
3707               *overlaps_b = conflict_fn_no_dependence ();
3708               *last_conflicts = integer_zero_node;
3709               goto end_analyze_subs_aa;
3710             }
3711
3712           if (i1 > 0 && j1 > 0)
3713             {
3714               HOST_WIDE_INT niter_a
3715                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3716               HOST_WIDE_INT niter_b
3717                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3718               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3719
3720               /* (X0, Y0) is a solution of the Diophantine equation:
3721                  "chrec_a (X0) = chrec_b (Y0)".  */
3722               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3723                                         CEIL (-j0, j1));
3724               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3725               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3726
3727               /* (X1, Y1) is the smallest positive solution of the eq
3728                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3729                  first conflict occurs.  */
3730               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3731               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3732               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3733
3734               if (niter > 0)
3735                 {
3736                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3737                                             FLOOR_DIV (niter_b - j0, j1));
3738                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3739
3740                   /* If the overlap occurs outside of the bounds of the
3741                      loop, there is no dependence.  */
3742                   if (x1 >= niter_a || y1 >= niter_b)
3743                     {
3744                       *overlaps_a = conflict_fn_no_dependence ();
3745                       *overlaps_b = conflict_fn_no_dependence ();
3746                       *last_conflicts = integer_zero_node;
3747                       goto end_analyze_subs_aa;
3748                     }
3749                   else
3750                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3751                 }
3752               else
3753                 *last_conflicts = chrec_dont_know;
3754
3755               *overlaps_a
3756                 = conflict_fn (1,
3757                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3758                                                  1,
3759                                                  build_int_cst (NULL_TREE, i1)));
3760               *overlaps_b
3761                 = conflict_fn (1,
3762                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3763                                                  1,
3764                                                  build_int_cst (NULL_TREE, j1)));
3765             }
3766           else
3767             {
3768               /* FIXME: For the moment, the upper bound of the
3769                  iteration domain for i and j is not checked.  */
3770               if (dump_file && (dump_flags & TDF_DETAILS))
3771                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3772               *overlaps_a = conflict_fn_not_known ();
3773               *overlaps_b = conflict_fn_not_known ();
3774               *last_conflicts = chrec_dont_know;
3775             }
3776         }
3777       else
3778         {
3779           if (dump_file && (dump_flags & TDF_DETAILS))
3780             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3781           *overlaps_a = conflict_fn_not_known ();
3782           *overlaps_b = conflict_fn_not_known ();
3783           *last_conflicts = chrec_dont_know;
3784         }
3785     }
3786   else
3787     {
3788       if (dump_file && (dump_flags & TDF_DETAILS))
3789         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3790       *overlaps_a = conflict_fn_not_known ();
3791       *overlaps_b = conflict_fn_not_known ();
3792       *last_conflicts = chrec_dont_know;
3793     }
3794
3795 end_analyze_subs_aa:
3796   obstack_free (&scratch_obstack, NULL);
3797   if (dump_file && (dump_flags & TDF_DETAILS))
3798     {
3799       fprintf (dump_file, "  (overlaps_a = ");
3800       dump_conflict_function (dump_file, *overlaps_a);
3801       fprintf (dump_file, ")\n  (overlaps_b = ");
3802       dump_conflict_function (dump_file, *overlaps_b);
3803       fprintf (dump_file, "))\n");
3804     }
3805 }
3806
3807 /* Returns true when analyze_subscript_affine_affine can be used for
3808    determining the dependence relation between chrec_a and chrec_b,
3809    that contain symbols.  This function modifies chrec_a and chrec_b
3810    such that the analysis result is the same, and such that they don't
3811    contain symbols, and then can safely be passed to the analyzer.
3812
3813    Example: The analysis of the following tuples of evolutions produce
3814    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3815    vs. {0, +, 1}_1
3816
3817    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3818    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3819 */
3820
3821 static bool
3822 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3823 {
3824   tree diff, type, left_a, left_b, right_b;
3825
3826   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3827       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3828     /* FIXME: For the moment not handled.  Might be refined later.  */
3829     return false;
3830
3831   type = chrec_type (*chrec_a);
3832   left_a = CHREC_LEFT (*chrec_a);
3833   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3834   diff = chrec_fold_minus (type, left_a, left_b);
3835
3836   if (!evolution_function_is_constant_p (diff))
3837     return false;
3838
3839   if (dump_file && (dump_flags & TDF_DETAILS))
3840     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3841
3842   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3843                                      diff, CHREC_RIGHT (*chrec_a));
3844   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3845   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3846                                      build_int_cst (type, 0),
3847                                      right_b);
3848   return true;
3849 }
3850
3851 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3852    *OVERLAPS_B are initialized to the functions that describe the
3853    relation between the elements accessed twice by CHREC_A and
3854    CHREC_B.  For k >= 0, the following property is verified:
3855
3856    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3857
3858 static void
3859 analyze_siv_subscript (tree chrec_a,
3860                        tree chrec_b,
3861                        conflict_function **overlaps_a,
3862                        conflict_function **overlaps_b,
3863                        tree *last_conflicts,
3864                        int loop_nest_num)
3865 {
3866   dependence_stats.num_siv++;
3867
3868   if (dump_file && (dump_flags & TDF_DETAILS))
3869     fprintf (dump_file, "(analyze_siv_subscript \n");
3870
3871   if (evolution_function_is_constant_p (chrec_a)
3872       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3873     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3874                                       overlaps_a, overlaps_b, last_conflicts);
3875
3876   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3877            && evolution_function_is_constant_p (chrec_b))
3878     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3879                                       overlaps_b, overlaps_a, last_conflicts);
3880
3881   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3882            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3883     {
3884       if (!chrec_contains_symbols (chrec_a)
3885           && !chrec_contains_symbols (chrec_b))
3886         {
3887           analyze_subscript_affine_affine (chrec_a, chrec_b,
3888                                            overlaps_a, overlaps_b,
3889                                            last_conflicts);
3890
3891           if (CF_NOT_KNOWN_P (*overlaps_a)
3892               || CF_NOT_KNOWN_P (*overlaps_b))
3893             dependence_stats.num_siv_unimplemented++;
3894           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3895                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3896             dependence_stats.num_siv_independent++;
3897           else
3898             dependence_stats.num_siv_dependent++;
3899         }
3900       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3901                                                         &chrec_b))
3902         {
3903           analyze_subscript_affine_affine (chrec_a, chrec_b,
3904                                            overlaps_a, overlaps_b,
3905                                            last_conflicts);
3906
3907           if (CF_NOT_KNOWN_P (*overlaps_a)
3908               || CF_NOT_KNOWN_P (*overlaps_b))
3909             dependence_stats.num_siv_unimplemented++;
3910           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3911                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3912             dependence_stats.num_siv_independent++;
3913           else
3914             dependence_stats.num_siv_dependent++;
3915         }
3916       else
3917         goto siv_subscript_dontknow;
3918     }
3919
3920   else
3921     {
3922     siv_subscript_dontknow:;
3923       if (dump_file && (dump_flags & TDF_DETAILS))
3924         fprintf (dump_file, "  siv test failed: unimplemented");
3925       *overlaps_a = conflict_fn_not_known ();
3926       *overlaps_b = conflict_fn_not_known ();
3927       *last_conflicts = chrec_dont_know;
3928       dependence_stats.num_siv_unimplemented++;
3929     }
3930
3931   if (dump_file && (dump_flags & TDF_DETAILS))
3932     fprintf (dump_file, ")\n");
3933 }
3934
3935 /* Returns false if we can prove that the greatest common divisor of the steps
3936    of CHREC does not divide CST, false otherwise.  */
3937
3938 static bool
3939 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3940 {
3941   HOST_WIDE_INT cd = 0, val;
3942   tree step;
3943
3944   if (!tree_fits_shwi_p (cst))
3945     return true;
3946   val = tree_to_shwi (cst);
3947
3948   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3949     {
3950       step = CHREC_RIGHT (chrec);
3951       if (!tree_fits_shwi_p (step))
3952         return true;
3953       cd = gcd (cd, tree_to_shwi (step));
3954       chrec = CHREC_LEFT (chrec);
3955     }
3956
3957   return val % cd == 0;
3958 }
3959
3960 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3961    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
3962    functions that describe the relation between the elements accessed
3963    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
3964    is verified:
3965
3966    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3967
3968 static void
3969 analyze_miv_subscript (tree chrec_a,
3970                        tree chrec_b,
3971                        conflict_function **overlaps_a,
3972                        conflict_function **overlaps_b,
3973                        tree *last_conflicts,
3974                        struct loop *loop_nest)
3975 {
3976   tree type, difference;
3977
3978   dependence_stats.num_miv++;
3979   if (dump_file && (dump_flags & TDF_DETAILS))
3980     fprintf (dump_file, "(analyze_miv_subscript \n");
3981
3982   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3983   chrec_a = chrec_convert (type, chrec_a, NULL);
3984   chrec_b = chrec_convert (type, chrec_b, NULL);
3985   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3986
3987   if (eq_evolutions_p (chrec_a, chrec_b))
3988     {
3989       /* Access functions are the same: all the elements are accessed
3990          in the same order.  */
3991       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3992       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3993       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3994       dependence_stats.num_miv_dependent++;
3995     }
3996
3997   else if (evolution_function_is_constant_p (difference)
3998            && evolution_function_is_affine_multivariate_p (chrec_a,
3999                                                            loop_nest->num)
4000            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4001     {
4002       /* testsuite/.../ssa-chrec-33.c
4003          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4004
4005          The difference is 1, and all the evolution steps are multiples
4006          of 2, consequently there are no overlapping elements.  */
4007       *overlaps_a = conflict_fn_no_dependence ();
4008       *overlaps_b = conflict_fn_no_dependence ();
4009       *last_conflicts = integer_zero_node;
4010       dependence_stats.num_miv_independent++;
4011     }
4012
4013   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
4014            && !chrec_contains_symbols (chrec_a)
4015            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
4016            && !chrec_contains_symbols (chrec_b))
4017     {
4018       /* testsuite/.../ssa-chrec-35.c
4019          {0, +, 1}_2  vs.  {0, +, 1}_3
4020          the overlapping elements are respectively located at iterations:
4021          {0, +, 1}_x and {0, +, 1}_x,
4022          in other words, we have the equality:
4023          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4024
4025          Other examples:
4026          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4027          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4028
4029          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4030          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4031       */
4032       analyze_subscript_affine_affine (chrec_a, chrec_b,
4033                                        overlaps_a, overlaps_b, last_conflicts);
4034
4035       if (CF_NOT_KNOWN_P (*overlaps_a)
4036           || CF_NOT_KNOWN_P (*overlaps_b))
4037         dependence_stats.num_miv_unimplemented++;
4038       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4039                || CF_NO_DEPENDENCE_P (*overlaps_b))
4040         dependence_stats.num_miv_independent++;
4041       else
4042         dependence_stats.num_miv_dependent++;
4043     }
4044
4045   else
4046     {
4047       /* When the analysis is too difficult, answer "don't know".  */
4048       if (dump_file && (dump_flags & TDF_DETAILS))
4049         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4050
4051       *overlaps_a = conflict_fn_not_known ();
4052       *overlaps_b = conflict_fn_not_known ();
4053       *last_conflicts = chrec_dont_know;
4054       dependence_stats.num_miv_unimplemented++;
4055     }
4056
4057   if (dump_file && (dump_flags & TDF_DETAILS))
4058     fprintf (dump_file, ")\n");
4059 }
4060
4061 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4062    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4063    OVERLAP_ITERATIONS_B are initialized with two functions that
4064    describe the iterations that contain conflicting elements.
4065
4066    Remark: For an integer k >= 0, the following equality is true:
4067
4068    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4069 */
4070
4071 static void
4072 analyze_overlapping_iterations (tree chrec_a,
4073                                 tree chrec_b,
4074                                 conflict_function **overlap_iterations_a,
4075                                 conflict_function **overlap_iterations_b,
4076                                 tree *last_conflicts, struct loop *loop_nest)
4077 {
4078   unsigned int lnn = loop_nest->num;
4079
4080   dependence_stats.num_subscript_tests++;
4081
4082   if (dump_file && (dump_flags & TDF_DETAILS))
4083     {
4084       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4085       fprintf (dump_file, "  (chrec_a = ");
4086       print_generic_expr (dump_file, chrec_a);
4087       fprintf (dump_file, ")\n  (chrec_b = ");
4088       print_generic_expr (dump_file, chrec_b);
4089       fprintf (dump_file, ")\n");
4090     }
4091
4092   if (chrec_a == NULL_TREE
4093       || chrec_b == NULL_TREE
4094       || chrec_contains_undetermined (chrec_a)
4095       || chrec_contains_undetermined (chrec_b))
4096     {
4097       dependence_stats.num_subscript_undetermined++;
4098
4099       *overlap_iterations_a = conflict_fn_not_known ();
4100       *overlap_iterations_b = conflict_fn_not_known ();
4101     }
4102
4103   /* If they are the same chrec, and are affine, they overlap
4104      on every iteration.  */
4105   else if (eq_evolutions_p (chrec_a, chrec_b)
4106            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4107                || operand_equal_p (chrec_a, chrec_b, 0)))
4108     {
4109       dependence_stats.num_same_subscript_function++;
4110       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4111       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4112       *last_conflicts = chrec_dont_know;
4113     }
4114
4115   /* If they aren't the same, and aren't affine, we can't do anything
4116      yet.  */
4117   else if ((chrec_contains_symbols (chrec_a)
4118             || chrec_contains_symbols (chrec_b))
4119            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4120                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4121     {
4122       dependence_stats.num_subscript_undetermined++;
4123       *overlap_iterations_a = conflict_fn_not_known ();
4124       *overlap_iterations_b = conflict_fn_not_known ();
4125     }
4126
4127   else if (ziv_subscript_p (chrec_a, chrec_b))
4128     analyze_ziv_subscript (chrec_a, chrec_b,
4129                            overlap_iterations_a, overlap_iterations_b,
4130                            last_conflicts);
4131
4132   else if (siv_subscript_p (chrec_a, chrec_b))
4133     analyze_siv_subscript (chrec_a, chrec_b,
4134                            overlap_iterations_a, overlap_iterations_b,
4135                            last_conflicts, lnn);
4136
4137   else
4138     analyze_miv_subscript (chrec_a, chrec_b,
4139                            overlap_iterations_a, overlap_iterations_b,
4140                            last_conflicts, loop_nest);
4141
4142   if (dump_file && (dump_flags & TDF_DETAILS))
4143     {
4144       fprintf (dump_file, "  (overlap_iterations_a = ");
4145       dump_conflict_function (dump_file, *overlap_iterations_a);
4146       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4147       dump_conflict_function (dump_file, *overlap_iterations_b);
4148       fprintf (dump_file, "))\n");
4149     }
4150 }
4151
4152 /* Helper function for uniquely inserting distance vectors.  */
4153
4154 static void
4155 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4156 {
4157   unsigned i;
4158   lambda_vector v;
4159
4160   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4161     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4162       return;
4163
4164   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4165 }
4166
4167 /* Helper function for uniquely inserting direction vectors.  */
4168
4169 static void
4170 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4171 {
4172   unsigned i;
4173   lambda_vector v;
4174
4175   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4176     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4177       return;
4178
4179   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4180 }
4181
4182 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4183    haven't yet determined a distance for this outer loop, push a new
4184    distance vector composed of the previous distance, and a distance
4185    of 1 for this outer loop.  Example:
4186
4187    | loop_1
4188    |   loop_2
4189    |     A[10]
4190    |   endloop_2
4191    | endloop_1
4192
4193    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4194    save (0, 1), then we have to save (1, 0).  */
4195
4196 static void
4197 add_outer_distances (struct data_dependence_relation *ddr,
4198                      lambda_vector dist_v, int index)
4199 {
4200   /* For each outer loop where init_v is not set, the accesses are
4201      in dependence of distance 1 in the loop.  */
4202   while (--index >= 0)
4203     {
4204       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4205       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4206       save_v[index] = 1;
4207       save_dist_v (ddr, save_v);
4208     }
4209 }
4210
4211 /* Return false when fail to represent the data dependence as a
4212    distance vector.  A_INDEX is the index of the first reference
4213    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4214    second reference.  INIT_B is set to true when a component has been
4215    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4216    the index in DIST_V that carries the dependence.  */
4217
4218 static bool
4219 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4220                              unsigned int a_index, unsigned int b_index,
4221                              lambda_vector dist_v, bool *init_b,
4222                              int *index_carry)
4223 {
4224   unsigned i;
4225   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4226
4227   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4228     {
4229       tree access_fn_a, access_fn_b;
4230       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4231
4232       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4233         {
4234           non_affine_dependence_relation (ddr);
4235           return false;
4236         }
4237
4238       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4239       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4240
4241       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4242           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4243         {
4244           HOST_WIDE_INT dist;
4245           int index;
4246           int var_a = CHREC_VARIABLE (access_fn_a);
4247           int var_b = CHREC_VARIABLE (access_fn_b);
4248
4249           if (var_a != var_b
4250               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4251             {
4252               non_affine_dependence_relation (ddr);
4253               return false;
4254             }
4255
4256           dist = int_cst_value (SUB_DISTANCE (subscript));
4257           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4258           *index_carry = MIN (index, *index_carry);
4259
4260           /* This is the subscript coupling test.  If we have already
4261              recorded a distance for this loop (a distance coming from
4262              another subscript), it should be the same.  For example,
4263              in the following code, there is no dependence:
4264
4265              | loop i = 0, N, 1
4266              |   T[i+1][i] = ...
4267              |   ... = T[i][i]
4268              | endloop
4269           */
4270           if (init_v[index] != 0 && dist_v[index] != dist)
4271             {
4272               finalize_ddr_dependent (ddr, chrec_known);
4273               return false;
4274             }
4275
4276           dist_v[index] = dist;
4277           init_v[index] = 1;
4278           *init_b = true;
4279         }
4280       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4281         {
4282           /* This can be for example an affine vs. constant dependence
4283              (T[i] vs. T[3]) that is not an affine dependence and is
4284              not representable as a distance vector.  */
4285           non_affine_dependence_relation (ddr);
4286           return false;
4287         }
4288     }
4289
4290   return true;
4291 }
4292
4293 /* Return true when the DDR contains only constant access functions.  */
4294
4295 static bool
4296 constant_access_functions (const struct data_dependence_relation *ddr)
4297 {
4298   unsigned i;
4299   subscript *sub;
4300
4301   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4302     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4303         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4304       return false;
4305
4306   return true;
4307 }
4308
4309 /* Helper function for the case where DDR_A and DDR_B are the same
4310    multivariate access function with a constant step.  For an example
4311    see pr34635-1.c.  */
4312
4313 static void
4314 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4315 {
4316   int x_1, x_2;
4317   tree c_1 = CHREC_LEFT (c_2);
4318   tree c_0 = CHREC_LEFT (c_1);
4319   lambda_vector dist_v;
4320   HOST_WIDE_INT v1, v2, cd;
4321
4322   /* Polynomials with more than 2 variables are not handled yet.  When
4323      the evolution steps are parameters, it is not possible to
4324      represent the dependence using classical distance vectors.  */
4325   if (TREE_CODE (c_0) != INTEGER_CST
4326       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4327       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4328     {
4329       DDR_AFFINE_P (ddr) = false;
4330       return;
4331     }
4332
4333   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4334   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4335
4336   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4337   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4338   v1 = int_cst_value (CHREC_RIGHT (c_1));
4339   v2 = int_cst_value (CHREC_RIGHT (c_2));
4340   cd = gcd (v1, v2);
4341   v1 /= cd;
4342   v2 /= cd;
4343
4344   if (v2 < 0)
4345     {
4346       v2 = -v2;
4347       v1 = -v1;
4348     }
4349
4350   dist_v[x_1] = v2;
4351   dist_v[x_2] = -v1;
4352   save_dist_v (ddr, dist_v);
4353
4354   add_outer_distances (ddr, dist_v, x_1);
4355 }
4356
4357 /* Helper function for the case where DDR_A and DDR_B are the same
4358    access functions.  */
4359
4360 static void
4361 add_other_self_distances (struct data_dependence_relation *ddr)
4362 {
4363   lambda_vector dist_v;
4364   unsigned i;
4365   int index_carry = DDR_NB_LOOPS (ddr);
4366   subscript *sub;
4367
4368   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4369     {
4370       tree access_fun = SUB_ACCESS_FN (sub, 0);
4371
4372       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4373         {
4374           if (!evolution_function_is_univariate_p (access_fun))
4375             {
4376               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4377                 {
4378                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4379                   return;
4380                 }
4381
4382               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4383
4384               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4385                 add_multivariate_self_dist (ddr, access_fun);
4386               else
4387                 /* The evolution step is not constant: it varies in
4388                    the outer loop, so this cannot be represented by a
4389                    distance vector.  For example in pr34635.c the
4390                    evolution is {0, +, {0, +, 4}_1}_2.  */
4391                 DDR_AFFINE_P (ddr) = false;
4392
4393               return;
4394             }
4395
4396           index_carry = MIN (index_carry,
4397                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4398                                                  DDR_LOOP_NEST (ddr)));
4399         }
4400     }
4401
4402   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4403   add_outer_distances (ddr, dist_v, index_carry);
4404 }
4405
4406 static void
4407 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4408 {
4409   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4410
4411   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4412   save_dist_v (ddr, dist_v);
4413 }
4414
4415 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4416    is the case for example when access functions are the same and
4417    equal to a constant, as in:
4418
4419    | loop_1
4420    |   A[3] = ...
4421    |   ... = A[3]
4422    | endloop_1
4423
4424    in which case the distance vectors are (0) and (1).  */
4425
4426 static void
4427 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4428 {
4429   unsigned i, j;
4430
4431   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4432     {
4433       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4434       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4435       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4436
4437       for (j = 0; j < ca->n; j++)
4438         if (affine_function_zero_p (ca->fns[j]))
4439           {
4440             insert_innermost_unit_dist_vector (ddr);
4441             return;
4442           }
4443
4444       for (j = 0; j < cb->n; j++)
4445         if (affine_function_zero_p (cb->fns[j]))
4446           {
4447             insert_innermost_unit_dist_vector (ddr);
4448             return;
4449           }
4450     }
4451 }
4452
4453 /* Return true when the DDR contains two data references that have the
4454    same access functions.  */
4455
4456 static inline bool
4457 same_access_functions (const struct data_dependence_relation *ddr)
4458 {
4459   unsigned i;
4460   subscript *sub;
4461
4462   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4463     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4464                           SUB_ACCESS_FN (sub, 1)))
4465       return false;
4466
4467   return true;
4468 }
4469
4470 /* Compute the classic per loop distance vector.  DDR is the data
4471    dependence relation to build a vector from.  Return false when fail
4472    to represent the data dependence as a distance vector.  */
4473
4474 static bool
4475 build_classic_dist_vector (struct data_dependence_relation *ddr,
4476                            struct loop *loop_nest)
4477 {
4478   bool init_b = false;
4479   int index_carry = DDR_NB_LOOPS (ddr);
4480   lambda_vector dist_v;
4481
4482   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4483     return false;
4484
4485   if (same_access_functions (ddr))
4486     {
4487       /* Save the 0 vector.  */
4488       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4489       save_dist_v (ddr, dist_v);
4490
4491       if (constant_access_functions (ddr))
4492         add_distance_for_zero_overlaps (ddr);
4493
4494       if (DDR_NB_LOOPS (ddr) > 1)
4495         add_other_self_distances (ddr);
4496
4497       return true;
4498     }
4499
4500   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4501   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4502     return false;
4503
4504   /* Save the distance vector if we initialized one.  */
4505   if (init_b)
4506     {
4507       /* Verify a basic constraint: classic distance vectors should
4508          always be lexicographically positive.
4509
4510          Data references are collected in the order of execution of
4511          the program, thus for the following loop
4512
4513          | for (i = 1; i < 100; i++)
4514          |   for (j = 1; j < 100; j++)
4515          |     {
4516          |       t = T[j+1][i-1];  // A
4517          |       T[j][i] = t + 2;  // B
4518          |     }
4519
4520          references are collected following the direction of the wind:
4521          A then B.  The data dependence tests are performed also
4522          following this order, such that we're looking at the distance
4523          separating the elements accessed by A from the elements later
4524          accessed by B.  But in this example, the distance returned by
4525          test_dep (A, B) is lexicographically negative (-1, 1), that
4526          means that the access A occurs later than B with respect to
4527          the outer loop, ie. we're actually looking upwind.  In this
4528          case we solve test_dep (B, A) looking downwind to the
4529          lexicographically positive solution, that returns the
4530          distance vector (1, -1).  */
4531       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4532         {
4533           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4534           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4535             return false;
4536           compute_subscript_distance (ddr);
4537           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4538                                             &index_carry))
4539             return false;
4540           save_dist_v (ddr, save_v);
4541           DDR_REVERSED_P (ddr) = true;
4542
4543           /* In this case there is a dependence forward for all the
4544              outer loops:
4545
4546              | for (k = 1; k < 100; k++)
4547              |  for (i = 1; i < 100; i++)
4548              |   for (j = 1; j < 100; j++)
4549              |     {
4550              |       t = T[j+1][i-1];  // A
4551              |       T[j][i] = t + 2;  // B
4552              |     }
4553
4554              the vectors are:
4555              (0,  1, -1)
4556              (1,  1, -1)
4557              (1, -1,  1)
4558           */
4559           if (DDR_NB_LOOPS (ddr) > 1)
4560             {
4561               add_outer_distances (ddr, save_v, index_carry);
4562               add_outer_distances (ddr, dist_v, index_carry);
4563             }
4564         }
4565       else
4566         {
4567           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4568           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4569
4570           if (DDR_NB_LOOPS (ddr) > 1)
4571             {
4572               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4573
4574               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4575                 return false;
4576               compute_subscript_distance (ddr);
4577               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4578                                                 &index_carry))
4579                 return false;
4580
4581               save_dist_v (ddr, save_v);
4582               add_outer_distances (ddr, dist_v, index_carry);
4583               add_outer_distances (ddr, opposite_v, index_carry);
4584             }
4585           else
4586             save_dist_v (ddr, save_v);
4587         }
4588     }
4589   else
4590     {
4591       /* There is a distance of 1 on all the outer loops: Example:
4592          there is a dependence of distance 1 on loop_1 for the array A.
4593
4594          | loop_1
4595          |   A[5] = ...
4596          | endloop
4597       */
4598       add_outer_distances (ddr, dist_v,
4599                            lambda_vector_first_nz (dist_v,
4600                                                    DDR_NB_LOOPS (ddr), 0));
4601     }
4602
4603   if (dump_file && (dump_flags & TDF_DETAILS))
4604     {
4605       unsigned i;
4606
4607       fprintf (dump_file, "(build_classic_dist_vector\n");
4608       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4609         {
4610           fprintf (dump_file, "  dist_vector = (");
4611           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4612                                DDR_NB_LOOPS (ddr));
4613           fprintf (dump_file, "  )\n");
4614         }
4615       fprintf (dump_file, ")\n");
4616     }
4617
4618   return true;
4619 }
4620
4621 /* Return the direction for a given distance.
4622    FIXME: Computing dir this way is suboptimal, since dir can catch
4623    cases that dist is unable to represent.  */
4624
4625 static inline enum data_dependence_direction
4626 dir_from_dist (int dist)
4627 {
4628   if (dist > 0)
4629     return dir_positive;
4630   else if (dist < 0)
4631     return dir_negative;
4632   else
4633     return dir_equal;
4634 }
4635
4636 /* Compute the classic per loop direction vector.  DDR is the data
4637    dependence relation to build a vector from.  */
4638
4639 static void
4640 build_classic_dir_vector (struct data_dependence_relation *ddr)
4641 {
4642   unsigned i, j;
4643   lambda_vector dist_v;
4644
4645   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4646     {
4647       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4648
4649       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4650         dir_v[j] = dir_from_dist (dist_v[j]);
4651
4652       save_dir_v (ddr, dir_v);
4653     }
4654 }
4655
4656 /* Helper function.  Returns true when there is a dependence between the
4657    data references.  A_INDEX is the index of the first reference (0 for
4658    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4659
4660 static bool
4661 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4662                                unsigned int a_index, unsigned int b_index,
4663                                struct loop *loop_nest)
4664 {
4665   unsigned int i;
4666   tree last_conflicts;
4667   struct subscript *subscript;
4668   tree res = NULL_TREE;
4669
4670   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4671     {
4672       conflict_function *overlaps_a, *overlaps_b;
4673
4674       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4675                                       SUB_ACCESS_FN (subscript, b_index),
4676                                       &overlaps_a, &overlaps_b,
4677                                       &last_conflicts, loop_nest);
4678
4679       if (SUB_CONFLICTS_IN_A (subscript))
4680         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4681       if (SUB_CONFLICTS_IN_B (subscript))
4682         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4683
4684       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4685       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4686       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4687
4688       /* If there is any undetermined conflict function we have to
4689          give a conservative answer in case we cannot prove that
4690          no dependence exists when analyzing another subscript.  */
4691       if (CF_NOT_KNOWN_P (overlaps_a)
4692           || CF_NOT_KNOWN_P (overlaps_b))
4693         {
4694           res = chrec_dont_know;
4695           continue;
4696         }
4697
4698       /* When there is a subscript with no dependence we can stop.  */
4699       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4700                || CF_NO_DEPENDENCE_P (overlaps_b))
4701         {
4702           res = chrec_known;
4703           break;
4704         }
4705     }
4706
4707   if (res == NULL_TREE)
4708     return true;
4709
4710   if (res == chrec_known)
4711     dependence_stats.num_dependence_independent++;
4712   else
4713     dependence_stats.num_dependence_undetermined++;
4714   finalize_ddr_dependent (ddr, res);
4715   return false;
4716 }
4717
4718 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4719
4720 static void
4721 subscript_dependence_tester (struct data_dependence_relation *ddr,
4722                              struct loop *loop_nest)
4723 {
4724   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4725     dependence_stats.num_dependence_dependent++;
4726
4727   compute_subscript_distance (ddr);
4728   if (build_classic_dist_vector (ddr, loop_nest))
4729     build_classic_dir_vector (ddr);
4730 }
4731
4732 /* Returns true when all the access functions of A are affine or
4733    constant with respect to LOOP_NEST.  */
4734
4735 static bool
4736 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4737                                            const struct loop *loop_nest)
4738 {
4739   unsigned int i;
4740   vec<tree> fns = DR_ACCESS_FNS (a);
4741   tree t;
4742
4743   FOR_EACH_VEC_ELT (fns, i, t)
4744     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4745         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4746       return false;
4747
4748   return true;
4749 }
4750
4751 /* This computes the affine dependence relation between A and B with
4752    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4753    independence between two accesses, while CHREC_DONT_KNOW is used
4754    for representing the unknown relation.
4755
4756    Note that it is possible to stop the computation of the dependence
4757    relation the first time we detect a CHREC_KNOWN element for a given
4758    subscript.  */
4759
4760 void
4761 compute_affine_dependence (struct data_dependence_relation *ddr,
4762                            struct loop *loop_nest)
4763 {
4764   struct data_reference *dra = DDR_A (ddr);
4765   struct data_reference *drb = DDR_B (ddr);
4766
4767   if (dump_file && (dump_flags & TDF_DETAILS))
4768     {
4769       fprintf (dump_file, "(compute_affine_dependence\n");
4770       fprintf (dump_file, "  stmt_a: ");
4771       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4772       fprintf (dump_file, "  stmt_b: ");
4773       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4774     }
4775
4776   /* Analyze only when the dependence relation is not yet known.  */
4777   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4778     {
4779       dependence_stats.num_dependence_tests++;
4780
4781       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4782           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4783         subscript_dependence_tester (ddr, loop_nest);
4784
4785       /* As a last case, if the dependence cannot be determined, or if
4786          the dependence is considered too difficult to determine, answer
4787          "don't know".  */
4788       else
4789         {
4790           dependence_stats.num_dependence_undetermined++;
4791
4792           if (dump_file && (dump_flags & TDF_DETAILS))
4793             {
4794               fprintf (dump_file, "Data ref a:\n");
4795               dump_data_reference (dump_file, dra);
4796               fprintf (dump_file, "Data ref b:\n");
4797               dump_data_reference (dump_file, drb);
4798               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4799             }
4800           finalize_ddr_dependent (ddr, chrec_dont_know);
4801         }
4802     }
4803
4804   if (dump_file && (dump_flags & TDF_DETAILS))
4805     {
4806       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4807         fprintf (dump_file, ") -> no dependence\n");
4808       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4809         fprintf (dump_file, ") -> dependence analysis failed\n");
4810       else
4811         fprintf (dump_file, ")\n");
4812     }
4813 }
4814
4815 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4816    the data references in DATAREFS, in the LOOP_NEST.  When
4817    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4818    relations.  Return true when successful, i.e. data references number
4819    is small enough to be handled.  */
4820
4821 bool
4822 compute_all_dependences (vec<data_reference_p> datarefs,
4823                          vec<ddr_p> *dependence_relations,
4824                          vec<loop_p> loop_nest,
4825                          bool compute_self_and_rr)
4826 {
4827   struct data_dependence_relation *ddr;
4828   struct data_reference *a, *b;
4829   unsigned int i, j;
4830
4831   if ((int) datarefs.length ()
4832       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4833     {
4834       struct data_dependence_relation *ddr;
4835
4836       /* Insert a single relation into dependence_relations:
4837          chrec_dont_know.  */
4838       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4839       dependence_relations->safe_push (ddr);
4840       return false;
4841     }
4842
4843   FOR_EACH_VEC_ELT (datarefs, i, a)
4844     for (j = i + 1; datarefs.iterate (j, &b); j++)
4845       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4846         {
4847           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4848           dependence_relations->safe_push (ddr);
4849           if (loop_nest.exists ())
4850             compute_affine_dependence (ddr, loop_nest[0]);
4851         }
4852
4853   if (compute_self_and_rr)
4854     FOR_EACH_VEC_ELT (datarefs, i, a)
4855       {
4856         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4857         dependence_relations->safe_push (ddr);
4858         if (loop_nest.exists ())
4859           compute_affine_dependence (ddr, loop_nest[0]);
4860       }
4861
4862   return true;
4863 }
4864
4865 /* Describes a location of a memory reference.  */
4866
4867 struct data_ref_loc
4868 {
4869   /* The memory reference.  */
4870   tree ref;
4871
4872   /* True if the memory reference is read.  */
4873   bool is_read;
4874
4875   /* True if the data reference is conditional within the containing
4876      statement, i.e. if it might not occur even when the statement
4877      is executed and runs to completion.  */
4878   bool is_conditional_in_stmt;
4879 };
4880
4881
4882 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4883    true if STMT clobbers memory, false otherwise.  */
4884
4885 static bool
4886 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4887 {
4888   bool clobbers_memory = false;
4889   data_ref_loc ref;
4890   tree op0, op1;
4891   enum gimple_code stmt_code = gimple_code (stmt);
4892
4893   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4894      As we cannot model data-references to not spelled out
4895      accesses give up if they may occur.  */
4896   if (stmt_code == GIMPLE_CALL
4897       && !(gimple_call_flags (stmt) & ECF_CONST))
4898     {
4899       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4900       if (gimple_call_internal_p (stmt))
4901         switch (gimple_call_internal_fn (stmt))
4902           {
4903           case IFN_GOMP_SIMD_LANE:
4904             {
4905               struct loop *loop = gimple_bb (stmt)->loop_father;
4906               tree uid = gimple_call_arg (stmt, 0);
4907               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4908               if (loop == NULL
4909                   || loop->simduid != SSA_NAME_VAR (uid))
4910                 clobbers_memory = true;
4911               break;
4912             }
4913           case IFN_MASK_LOAD:
4914           case IFN_MASK_STORE:
4915             break;
4916           default:
4917             clobbers_memory = true;
4918             break;
4919           }
4920       else
4921         clobbers_memory = true;
4922     }
4923   else if (stmt_code == GIMPLE_ASM
4924            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4925                || gimple_vuse (stmt)))
4926     clobbers_memory = true;
4927
4928   if (!gimple_vuse (stmt))
4929     return clobbers_memory;
4930
4931   if (stmt_code == GIMPLE_ASSIGN)
4932     {
4933       tree base;
4934       op0 = gimple_assign_lhs (stmt);
4935       op1 = gimple_assign_rhs1 (stmt);
4936
4937       if (DECL_P (op1)
4938           || (REFERENCE_CLASS_P (op1)
4939               && (base = get_base_address (op1))
4940               && TREE_CODE (base) != SSA_NAME
4941               && !is_gimple_min_invariant (base)))
4942         {
4943           ref.ref = op1;
4944           ref.is_read = true;
4945           ref.is_conditional_in_stmt = false;
4946           references->safe_push (ref);
4947         }
4948     }
4949   else if (stmt_code == GIMPLE_CALL)
4950     {
4951       unsigned i, n;
4952       tree ptr, type;
4953       unsigned int align;
4954
4955       ref.is_read = false;
4956       if (gimple_call_internal_p (stmt))
4957         switch (gimple_call_internal_fn (stmt))
4958           {
4959           case IFN_MASK_LOAD:
4960             if (gimple_call_lhs (stmt) == NULL_TREE)
4961               break;
4962             ref.is_read = true;
4963             /* FALLTHRU */
4964           case IFN_MASK_STORE:
4965             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4966             align = tree_to_shwi (gimple_call_arg (stmt, 1));
4967             if (ref.is_read)
4968               type = TREE_TYPE (gimple_call_lhs (stmt));
4969             else
4970               type = TREE_TYPE (gimple_call_arg (stmt, 3));
4971             if (TYPE_ALIGN (type) != align)
4972               type = build_aligned_type (type, align);
4973             ref.is_conditional_in_stmt = true;
4974             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4975                                    ptr);
4976             references->safe_push (ref);
4977             return false;
4978           default:
4979             break;
4980           }
4981
4982       op0 = gimple_call_lhs (stmt);
4983       n = gimple_call_num_args (stmt);
4984       for (i = 0; i < n; i++)
4985         {
4986           op1 = gimple_call_arg (stmt, i);
4987
4988           if (DECL_P (op1)
4989               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4990             {
4991               ref.ref = op1;
4992               ref.is_read = true;
4993               ref.is_conditional_in_stmt = false;
4994               references->safe_push (ref);
4995             }
4996         }
4997     }
4998   else
4999     return clobbers_memory;
5000
5001   if (op0
5002       && (DECL_P (op0)
5003           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5004     {
5005       ref.ref = op0;
5006       ref.is_read = false;
5007       ref.is_conditional_in_stmt = false;
5008       references->safe_push (ref);
5009     }
5010   return clobbers_memory;
5011 }
5012
5013
5014 /* Returns true if the loop-nest has any data reference.  */
5015
5016 bool
5017 loop_nest_has_data_refs (loop_p loop)
5018 {
5019   basic_block *bbs = get_loop_body (loop);
5020   auto_vec<data_ref_loc, 3> references;
5021
5022   for (unsigned i = 0; i < loop->num_nodes; i++)
5023     {
5024       basic_block bb = bbs[i];
5025       gimple_stmt_iterator bsi;
5026
5027       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5028         {
5029           gimple *stmt = gsi_stmt (bsi);
5030           get_references_in_stmt (stmt, &references);
5031           if (references.length ())
5032             {
5033               free (bbs);
5034               return true;
5035             }
5036         }
5037     }
5038   free (bbs);
5039   return false;
5040 }
5041
5042 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5043    reference, returns false, otherwise returns true.  NEST is the outermost
5044    loop of the loop nest in which the references should be analyzed.  */
5045
5046 bool
5047 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
5048                               vec<data_reference_p> *datarefs)
5049 {
5050   unsigned i;
5051   auto_vec<data_ref_loc, 2> references;
5052   data_ref_loc *ref;
5053   bool ret = true;
5054   data_reference_p dr;
5055
5056   if (get_references_in_stmt (stmt, &references))
5057     return false;
5058
5059   FOR_EACH_VEC_ELT (references, i, ref)
5060     {
5061       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5062                             loop_containing_stmt (stmt), ref->ref,
5063                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5064       gcc_assert (dr != NULL);
5065       datarefs->safe_push (dr);
5066     }
5067
5068   return ret;
5069 }
5070
5071 /* Stores the data references in STMT to DATAREFS.  If there is an
5072    unanalyzable reference, returns false, otherwise returns true.
5073    NEST is the outermost loop of the loop nest in which the references
5074    should be instantiated, LOOP is the loop in which the references
5075    should be analyzed.  */
5076
5077 bool
5078 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5079                                        vec<data_reference_p> *datarefs)
5080 {
5081   unsigned i;
5082   auto_vec<data_ref_loc, 2> references;
5083   data_ref_loc *ref;
5084   bool ret = true;
5085   data_reference_p dr;
5086
5087   if (get_references_in_stmt (stmt, &references))
5088     return false;
5089
5090   FOR_EACH_VEC_ELT (references, i, ref)
5091     {
5092       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5093                             ref->is_conditional_in_stmt);
5094       gcc_assert (dr != NULL);
5095       datarefs->safe_push (dr);
5096     }
5097
5098   return ret;
5099 }
5100
5101 /* Search the data references in LOOP, and record the information into
5102    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5103    difficult case, returns NULL_TREE otherwise.  */
5104
5105 tree
5106 find_data_references_in_bb (struct loop *loop, basic_block bb,
5107                             vec<data_reference_p> *datarefs)
5108 {
5109   gimple_stmt_iterator bsi;
5110
5111   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5112     {
5113       gimple *stmt = gsi_stmt (bsi);
5114
5115       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5116         {
5117           struct data_reference *res;
5118           res = XCNEW (struct data_reference);
5119           datarefs->safe_push (res);
5120
5121           return chrec_dont_know;
5122         }
5123     }
5124
5125   return NULL_TREE;
5126 }
5127
5128 /* Search the data references in LOOP, and record the information into
5129    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5130    difficult case, returns NULL_TREE otherwise.
5131
5132    TODO: This function should be made smarter so that it can handle address
5133    arithmetic as if they were array accesses, etc.  */
5134
5135 tree
5136 find_data_references_in_loop (struct loop *loop,
5137                               vec<data_reference_p> *datarefs)
5138 {
5139   basic_block bb, *bbs;
5140   unsigned int i;
5141
5142   bbs = get_loop_body_in_dom_order (loop);
5143
5144   for (i = 0; i < loop->num_nodes; i++)
5145     {
5146       bb = bbs[i];
5147
5148       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5149         {
5150           free (bbs);
5151           return chrec_dont_know;
5152         }
5153     }
5154   free (bbs);
5155
5156   return NULL_TREE;
5157 }
5158
5159 /* Return the alignment in bytes that DRB is guaranteed to have at all
5160    times.  */
5161
5162 unsigned int
5163 dr_alignment (innermost_loop_behavior *drb)
5164 {
5165   /* Get the alignment of BASE_ADDRESS + INIT.  */
5166   unsigned int alignment = drb->base_alignment;
5167   unsigned int misalignment = (drb->base_misalignment
5168                                + TREE_INT_CST_LOW (drb->init));
5169   if (misalignment != 0)
5170     alignment = MIN (alignment, misalignment & -misalignment);
5171
5172   /* Cap it to the alignment of OFFSET.  */
5173   if (!integer_zerop (drb->offset))
5174     alignment = MIN (alignment, drb->offset_alignment);
5175
5176   /* Cap it to the alignment of STEP.  */
5177   if (!integer_zerop (drb->step))
5178     alignment = MIN (alignment, drb->step_alignment);
5179
5180   return alignment;
5181 }
5182
5183 /* If BASE is a pointer-typed SSA name, try to find the object that it
5184    is based on.  Return this object X on success and store the alignment
5185    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5186
5187 static tree
5188 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5189 {
5190   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5191     return NULL_TREE;
5192
5193   gimple *def = SSA_NAME_DEF_STMT (base);
5194   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5195
5196   /* Peel chrecs and record the minimum alignment preserved by
5197      all steps.  */
5198   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5199   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5200     {
5201       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5202       alignment = MIN (alignment, step_alignment);
5203       base = CHREC_LEFT (base);
5204     }
5205
5206   /* Punt if the expression is too complicated to handle.  */
5207   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5208     return NULL_TREE;
5209
5210   /* The only useful cases are those for which a dereference folds to something
5211      other than an INDIRECT_REF.  */
5212   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5213   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5214   if (!ref)
5215     return NULL_TREE;
5216
5217   /* Analyze the base to which the steps we peeled were applied.  */
5218   poly_int64 bitsize, bitpos, bytepos;
5219   machine_mode mode;
5220   int unsignedp, reversep, volatilep;
5221   tree offset;
5222   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5223                               &unsignedp, &reversep, &volatilep);
5224   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5225     return NULL_TREE;
5226
5227   /* Restrict the alignment to that guaranteed by the offsets.  */
5228   unsigned int bytepos_alignment = known_alignment (bytepos);
5229   if (bytepos_alignment != 0)
5230     alignment = MIN (alignment, bytepos_alignment);
5231   if (offset)
5232     {
5233       unsigned int offset_alignment = highest_pow2_factor (offset);
5234       alignment = MIN (alignment, offset_alignment);
5235     }
5236
5237   *alignment_out = alignment;
5238   return base;
5239 }
5240
5241 /* Return the object whose alignment would need to be changed in order
5242    to increase the alignment of ADDR.  Store the maximum achievable
5243    alignment in *MAX_ALIGNMENT.  */
5244
5245 tree
5246 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5247 {
5248   tree base = get_base_for_alignment_1 (addr, max_alignment);
5249   if (base)
5250     return base;
5251
5252   if (TREE_CODE (addr) == ADDR_EXPR)
5253     addr = TREE_OPERAND (addr, 0);
5254   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5255   return addr;
5256 }
5257
5258 /* Recursive helper function.  */
5259
5260 static bool
5261 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5262 {
5263   /* Inner loops of the nest should not contain siblings.  Example:
5264      when there are two consecutive loops,
5265
5266      | loop_0
5267      |   loop_1
5268      |     A[{0, +, 1}_1]
5269      |   endloop_1
5270      |   loop_2
5271      |     A[{0, +, 1}_2]
5272      |   endloop_2
5273      | endloop_0
5274
5275      the dependence relation cannot be captured by the distance
5276      abstraction.  */
5277   if (loop->next)
5278     return false;
5279
5280   loop_nest->safe_push (loop);
5281   if (loop->inner)
5282     return find_loop_nest_1 (loop->inner, loop_nest);
5283   return true;
5284 }
5285
5286 /* Return false when the LOOP is not well nested.  Otherwise return
5287    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5288    contain the loops from the outermost to the innermost, as they will
5289    appear in the classic distance vector.  */
5290
5291 bool
5292 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5293 {
5294   loop_nest->safe_push (loop);
5295   if (loop->inner)
5296     return find_loop_nest_1 (loop->inner, loop_nest);
5297   return true;
5298 }
5299
5300 /* Returns true when the data dependences have been computed, false otherwise.
5301    Given a loop nest LOOP, the following vectors are returned:
5302    DATAREFS is initialized to all the array elements contained in this loop,
5303    DEPENDENCE_RELATIONS contains the relations between the data references.
5304    Compute read-read and self relations if
5305    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5306
5307 bool
5308 compute_data_dependences_for_loop (struct loop *loop,
5309                                    bool compute_self_and_read_read_dependences,
5310                                    vec<loop_p> *loop_nest,
5311                                    vec<data_reference_p> *datarefs,
5312                                    vec<ddr_p> *dependence_relations)
5313 {
5314   bool res = true;
5315
5316   memset (&dependence_stats, 0, sizeof (dependence_stats));
5317
5318   /* If the loop nest is not well formed, or one of the data references
5319      is not computable, give up without spending time to compute other
5320      dependences.  */
5321   if (!loop
5322       || !find_loop_nest (loop, loop_nest)
5323       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5324       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5325                                    compute_self_and_read_read_dependences))
5326     res = false;
5327
5328   if (dump_file && (dump_flags & TDF_STATS))
5329     {
5330       fprintf (dump_file, "Dependence tester statistics:\n");
5331
5332       fprintf (dump_file, "Number of dependence tests: %d\n",
5333                dependence_stats.num_dependence_tests);
5334       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5335                dependence_stats.num_dependence_dependent);
5336       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5337                dependence_stats.num_dependence_independent);
5338       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5339                dependence_stats.num_dependence_undetermined);
5340
5341       fprintf (dump_file, "Number of subscript tests: %d\n",
5342                dependence_stats.num_subscript_tests);
5343       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5344                dependence_stats.num_subscript_undetermined);
5345       fprintf (dump_file, "Number of same subscript function: %d\n",
5346                dependence_stats.num_same_subscript_function);
5347
5348       fprintf (dump_file, "Number of ziv tests: %d\n",
5349                dependence_stats.num_ziv);
5350       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5351                dependence_stats.num_ziv_dependent);
5352       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5353                dependence_stats.num_ziv_independent);
5354       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5355                dependence_stats.num_ziv_unimplemented);
5356
5357       fprintf (dump_file, "Number of siv tests: %d\n",
5358                dependence_stats.num_siv);
5359       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5360                dependence_stats.num_siv_dependent);
5361       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5362                dependence_stats.num_siv_independent);
5363       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5364                dependence_stats.num_siv_unimplemented);
5365
5366       fprintf (dump_file, "Number of miv tests: %d\n",
5367                dependence_stats.num_miv);
5368       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5369                dependence_stats.num_miv_dependent);
5370       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5371                dependence_stats.num_miv_independent);
5372       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5373                dependence_stats.num_miv_unimplemented);
5374     }
5375
5376   return res;
5377 }
5378
5379 /* Free the memory used by a data dependence relation DDR.  */
5380
5381 void
5382 free_dependence_relation (struct data_dependence_relation *ddr)
5383 {
5384   if (ddr == NULL)
5385     return;
5386
5387   if (DDR_SUBSCRIPTS (ddr).exists ())
5388     free_subscripts (DDR_SUBSCRIPTS (ddr));
5389   DDR_DIST_VECTS (ddr).release ();
5390   DDR_DIR_VECTS (ddr).release ();
5391
5392   free (ddr);
5393 }
5394
5395 /* Free the memory used by the data dependence relations from
5396    DEPENDENCE_RELATIONS.  */
5397
5398 void
5399 free_dependence_relations (vec<ddr_p> dependence_relations)
5400 {
5401   unsigned int i;
5402   struct data_dependence_relation *ddr;
5403
5404   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5405     if (ddr)
5406       free_dependence_relation (ddr);
5407
5408   dependence_relations.release ();
5409 }
5410
5411 /* Free the memory used by the data references from DATAREFS.  */
5412
5413 void
5414 free_data_refs (vec<data_reference_p> datarefs)
5415 {
5416   unsigned int i;
5417   struct data_reference *dr;
5418
5419   FOR_EACH_VEC_ELT (datarefs, i, dr)
5420     free_data_ref (dr);
5421   datarefs.release ();
5422 }
5423
5424 /* Common routine implementing both dr_direction_indicator and
5425    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5426    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5427    Return the step as the indicator otherwise.  */
5428
5429 static tree
5430 dr_step_indicator (struct data_reference *dr, int useful_min)
5431 {
5432   tree step = DR_STEP (dr);
5433   if (!step)
5434     return NULL_TREE;
5435   STRIP_NOPS (step);
5436   /* Look for cases where the step is scaled by a positive constant
5437      integer, which will often be the access size.  If the multiplication
5438      doesn't change the sign (due to overflow effects) then we can
5439      test the unscaled value instead.  */
5440   if (TREE_CODE (step) == MULT_EXPR
5441       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5442       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5443     {
5444       tree factor = TREE_OPERAND (step, 1);
5445       step = TREE_OPERAND (step, 0);
5446
5447       /* Strip widening and truncating conversions as well as nops.  */
5448       if (CONVERT_EXPR_P (step)
5449           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5450         step = TREE_OPERAND (step, 0);
5451       tree type = TREE_TYPE (step);
5452
5453       /* Get the range of step values that would not cause overflow.  */
5454       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5455                          / wi::to_widest (factor));
5456       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5457                          / wi::to_widest (factor));
5458
5459       /* Get the range of values that the unconverted step actually has.  */
5460       wide_int step_min, step_max;
5461       if (TREE_CODE (step) != SSA_NAME
5462           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5463         {
5464           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5465           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5466         }
5467
5468       /* Check whether the unconverted step has an acceptable range.  */
5469       signop sgn = TYPE_SIGN (type);
5470       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5471           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5472         {
5473           if (wi::ge_p (step_min, useful_min, sgn))
5474             return ssize_int (useful_min);
5475           else if (wi::lt_p (step_max, 0, sgn))
5476             return ssize_int (-1);
5477           else
5478             return fold_convert (ssizetype, step);
5479         }
5480     }
5481   return DR_STEP (dr);
5482 }
5483
5484 /* Return a value that is negative iff DR has a negative step.  */
5485
5486 tree
5487 dr_direction_indicator (struct data_reference *dr)
5488 {
5489   return dr_step_indicator (dr, 0);
5490 }
5491
5492 /* Return a value that is zero iff DR has a zero step.  */
5493
5494 tree
5495 dr_zero_step_indicator (struct data_reference *dr)
5496 {
5497   return dr_step_indicator (dr, 1);
5498 }
5499
5500 /* Return true if DR is known to have a nonnegative (but possibly zero)
5501    step.  */
5502
5503 bool
5504 dr_known_forward_stride_p (struct data_reference *dr)
5505 {
5506   tree indicator = dr_direction_indicator (dr);
5507   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5508                                    fold_convert (ssizetype, indicator),
5509                                    ssize_int (0));
5510   return neg_step_val && integer_zerop (neg_step_val);
5511 }