gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "backend.h"
  80 #include "rtl.h"
  81 #include "tree.h"
  82 #include "gimple.h"
  83 #include "gimple-pretty-print.h"
  84 #include "alias.h"
  85 #include "fold-const.h"
  86 #include "expr.h"
  87 #include "gimple-iterator.h"
  88 #include "tree-ssa-loop-niter.h"
  89 #include "tree-ssa-loop.h"
  90 #include "tree-ssa.h"
  91 #include "cfgloop.h"
  92 #include "tree-data-ref.h"
  93 #include "tree-scalar-evolution.h"
  94 #include "dumpfile.h"
  95 #include "tree-affine.h"
  96 #include "params.h"
  97 #include "builtins.h"
  98 #include "stringpool.h"
  99 #include "tree-vrp.h"
 100 #include "tree-ssanames.h"
 101 #include "tree-eh.h"
 102
 103 static struct datadep_stats
 104 {
 105   int num_dependence_tests;
 106   int num_dependence_dependent;
 107   int num_dependence_independent;
 108   int num_dependence_undetermined;
 109
 110   int num_subscript_tests;
 111   int num_subscript_undetermined;
 112   int num_same_subscript_function;
 113
 114   int num_ziv;
 115   int num_ziv_independent;
 116   int num_ziv_dependent;
 117   int num_ziv_unimplemented;
 118
 119   int num_siv;
 120   int num_siv_independent;
 121   int num_siv_dependent;
 122   int num_siv_unimplemented;
 123
 124   int num_miv;
 125   int num_miv_independent;
 126   int num_miv_dependent;
 127   int num_miv_unimplemented;
 128 } dependence_stats;
 129
 130 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 131                                            unsigned int, unsigned int,
 132                                            struct loop *);
 133 /* Returns true iff A divides B.  */
 134
 135 static inline bool
 136 tree_fold_divides_p (const_tree a, const_tree b)
 137 {
 138   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 139   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 140   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 141 }
 142
 143 /* Returns true iff A divides B.  */
 144
 145 static inline bool
 146 int_divides_p (int a, int b)
 147 {
 148   return ((b % a) == 0);
 149 }
 150
 151 /* Return true if reference REF contains a union access.  */
 152
 153 static bool
 154 ref_contains_union_access_p (tree ref)
 155 {
 156   while (handled_component_p (ref))
 157     {
 158       ref = TREE_OPERAND (ref, 0);
 159       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 160           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 161         return true;
 162     }
 163   return false;
 164 }
 165
 166 \f
 167
 168 /* Dump into FILE all the data references from DATAREFS.  */
 169
 170 static void
 171 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 172 {
 173   unsigned int i;
 174   struct data_reference *dr;
 175
 176   FOR_EACH_VEC_ELT (datarefs, i, dr)
 177     dump_data_reference (file, dr);
 178 }
 179
 180 /* Unified dump into FILE all the data references from DATAREFS.  */
 181
 182 DEBUG_FUNCTION void
 183 debug (vec<data_reference_p> &ref)
 184 {
 185   dump_data_references (stderr, ref);
 186 }
 187
 188 DEBUG_FUNCTION void
 189 debug (vec<data_reference_p> *ptr)
 190 {
 191   if (ptr)
 192     debug (*ptr);
 193   else
 194     fprintf (stderr, "<nil>\n");
 195 }
 196
 197
 198 /* Dump into STDERR all the data references from DATAREFS.  */
 199
 200 DEBUG_FUNCTION void
 201 debug_data_references (vec<data_reference_p> datarefs)
 202 {
 203   dump_data_references (stderr, datarefs);
 204 }
 205
 206 /* Print to STDERR the data_reference DR.  */
 207
 208 DEBUG_FUNCTION void
 209 debug_data_reference (struct data_reference *dr)
 210 {
 211   dump_data_reference (stderr, dr);
 212 }
 213
 214 /* Dump function for a DATA_REFERENCE structure.  */
 215
 216 void
 217 dump_data_reference (FILE *outf,
 218                      struct data_reference *dr)
 219 {
 220   unsigned int i;
 221
 222   fprintf (outf, "#(Data Ref: \n");
 223   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 224   fprintf (outf, "#  stmt: ");
 225   print_gimple_stmt (outf, DR_STMT (dr), 0);
 226   fprintf (outf, "#  ref: ");
 227   print_generic_stmt (outf, DR_REF (dr));
 228   fprintf (outf, "#  base_object: ");
 229   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 230
 231   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 232     {
 233       fprintf (outf, "#  Access function %d: ", i);
 234       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 235     }
 236   fprintf (outf, "#)\n");
 237 }
 238
 239 /* Unified dump function for a DATA_REFERENCE structure.  */
 240
 241 DEBUG_FUNCTION void
 242 debug (data_reference &ref)
 243 {
 244   dump_data_reference (stderr, &ref);
 245 }
 246
 247 DEBUG_FUNCTION void
 248 debug (data_reference *ptr)
 249 {
 250   if (ptr)
 251     debug (*ptr);
 252   else
 253     fprintf (stderr, "<nil>\n");
 254 }
 255
 256
 257 /* Dumps the affine function described by FN to the file OUTF.  */
 258
 259 DEBUG_FUNCTION void
 260 dump_affine_function (FILE *outf, affine_fn fn)
 261 {
 262   unsigned i;
 263   tree coef;
 264
 265   print_generic_expr (outf, fn[0], TDF_SLIM);
 266   for (i = 1; fn.iterate (i, &coef); i++)
 267     {
 268       fprintf (outf, " + ");
 269       print_generic_expr (outf, coef, TDF_SLIM);
 270       fprintf (outf, " * x_%u", i);
 271     }
 272 }
 273
 274 /* Dumps the conflict function CF to the file OUTF.  */
 275
 276 DEBUG_FUNCTION void
 277 dump_conflict_function (FILE *outf, conflict_function *cf)
 278 {
 279   unsigned i;
 280
 281   if (cf->n == NO_DEPENDENCE)
 282     fprintf (outf, "no dependence");
 283   else if (cf->n == NOT_KNOWN)
 284     fprintf (outf, "not known");
 285   else
 286     {
 287       for (i = 0; i < cf->n; i++)
 288         {
 289           if (i != 0)
 290             fprintf (outf, " ");
 291           fprintf (outf, "[");
 292           dump_affine_function (outf, cf->fns[i]);
 293           fprintf (outf, "]");
 294         }
 295     }
 296 }
 297
 298 /* Dump function for a SUBSCRIPT structure.  */
 299
 300 DEBUG_FUNCTION void
 301 dump_subscript (FILE *outf, struct subscript *subscript)
 302 {
 303   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 304
 305   fprintf (outf, "\n (subscript \n");
 306   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 307   dump_conflict_function (outf, cf);
 308   if (CF_NONTRIVIAL_P (cf))
 309     {
 310       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 311       fprintf (outf, "\n  last_conflict: ");
 312       print_generic_expr (outf, last_iteration);
 313     }
 314
 315   cf = SUB_CONFLICTS_IN_B (subscript);
 316   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 317   dump_conflict_function (outf, cf);
 318   if (CF_NONTRIVIAL_P (cf))
 319     {
 320       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 321       fprintf (outf, "\n  last_conflict: ");
 322       print_generic_expr (outf, last_iteration);
 323     }
 324
 325   fprintf (outf, "\n  (Subscript distance: ");
 326   print_generic_expr (outf, SUB_DISTANCE (subscript));
 327   fprintf (outf, " ))\n");
 328 }
 329
 330 /* Print the classic direction vector DIRV to OUTF.  */
 331
 332 DEBUG_FUNCTION void
 333 print_direction_vector (FILE *outf,
 334                         lambda_vector dirv,
 335                         int length)
 336 {
 337   int eq;
 338
 339   for (eq = 0; eq < length; eq++)
 340     {
 341       enum data_dependence_direction dir = ((enum data_dependence_direction)
 342                                             dirv[eq]);
 343
 344       switch (dir)
 345         {
 346         case dir_positive:
 347           fprintf (outf, "    +");
 348           break;
 349         case dir_negative:
 350           fprintf (outf, "    -");
 351           break;
 352         case dir_equal:
 353           fprintf (outf, "    =");
 354           break;
 355         case dir_positive_or_equal:
 356           fprintf (outf, "   +=");
 357           break;
 358         case dir_positive_or_negative:
 359           fprintf (outf, "   +-");
 360           break;
 361         case dir_negative_or_equal:
 362           fprintf (outf, "   -=");
 363           break;
 364         case dir_star:
 365           fprintf (outf, "    *");
 366           break;
 367         default:
 368           fprintf (outf, "indep");
 369           break;
 370         }
 371     }
 372   fprintf (outf, "\n");
 373 }
 374
 375 /* Print a vector of direction vectors.  */
 376
 377 DEBUG_FUNCTION void
 378 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 379                    int length)
 380 {
 381   unsigned j;
 382   lambda_vector v;
 383
 384   FOR_EACH_VEC_ELT (dir_vects, j, v)
 385     print_direction_vector (outf, v, length);
 386 }
 387
 388 /* Print out a vector VEC of length N to OUTFILE.  */
 389
 390 DEBUG_FUNCTION void
 391 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 392 {
 393   int i;
 394
 395   for (i = 0; i < n; i++)
 396     fprintf (outfile, "%3d ", (int)vector[i]);
 397   fprintf (outfile, "\n");
 398 }
 399
 400 /* Print a vector of distance vectors.  */
 401
 402 DEBUG_FUNCTION void
 403 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 404                     int length)
 405 {
 406   unsigned j;
 407   lambda_vector v;
 408
 409   FOR_EACH_VEC_ELT (dist_vects, j, v)
 410     print_lambda_vector (outf, v, length);
 411 }
 412
 413 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 414
 415 DEBUG_FUNCTION void
 416 dump_data_dependence_relation (FILE *outf,
 417                                struct data_dependence_relation *ddr)
 418 {
 419   struct data_reference *dra, *drb;
 420
 421   fprintf (outf, "(Data Dep: \n");
 422
 423   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 424     {
 425       if (ddr)
 426         {
 427           dra = DDR_A (ddr);
 428           drb = DDR_B (ddr);
 429           if (dra)
 430             dump_data_reference (outf, dra);
 431           else
 432             fprintf (outf, "    (nil)\n");
 433           if (drb)
 434             dump_data_reference (outf, drb);
 435           else
 436             fprintf (outf, "    (nil)\n");
 437         }
 438       fprintf (outf, "    (don't know)\n)\n");
 439       return;
 440     }
 441
 442   dra = DDR_A (ddr);
 443   drb = DDR_B (ddr);
 444   dump_data_reference (outf, dra);
 445   dump_data_reference (outf, drb);
 446
 447   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 448     fprintf (outf, "    (no dependence)\n");
 449
 450   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 451     {
 452       unsigned int i;
 453       struct loop *loopi;
 454
 455       subscript *sub;
 456       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 457         {
 458           fprintf (outf, "  access_fn_A: ");
 459           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 460           fprintf (outf, "  access_fn_B: ");
 461           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 462           dump_subscript (outf, sub);
 463         }
 464
 465       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 466       fprintf (outf, "  loop nest: (");
 467       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 468         fprintf (outf, "%d ", loopi->num);
 469       fprintf (outf, ")\n");
 470
 471       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 472         {
 473           fprintf (outf, "  distance_vector: ");
 474           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 475                                DDR_NB_LOOPS (ddr));
 476         }
 477
 478       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 479         {
 480           fprintf (outf, "  direction_vector: ");
 481           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 482                                   DDR_NB_LOOPS (ddr));
 483         }
 484     }
 485
 486   fprintf (outf, ")\n");
 487 }
 488
 489 /* Debug version.  */
 490
 491 DEBUG_FUNCTION void
 492 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 493 {
 494   dump_data_dependence_relation (stderr, ddr);
 495 }
 496
 497 /* Dump into FILE all the dependence relations from DDRS.  */
 498
 499 DEBUG_FUNCTION void
 500 dump_data_dependence_relations (FILE *file,
 501                                 vec<ddr_p> ddrs)
 502 {
 503   unsigned int i;
 504   struct data_dependence_relation *ddr;
 505
 506   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 507     dump_data_dependence_relation (file, ddr);
 508 }
 509
 510 DEBUG_FUNCTION void
 511 debug (vec<ddr_p> &ref)
 512 {
 513   dump_data_dependence_relations (stderr, ref);
 514 }
 515
 516 DEBUG_FUNCTION void
 517 debug (vec<ddr_p> *ptr)
 518 {
 519   if (ptr)
 520     debug (*ptr);
 521   else
 522     fprintf (stderr, "<nil>\n");
 523 }
 524
 525
 526 /* Dump to STDERR all the dependence relations from DDRS.  */
 527
 528 DEBUG_FUNCTION void
 529 debug_data_dependence_relations (vec<ddr_p> ddrs)
 530 {
 531   dump_data_dependence_relations (stderr, ddrs);
 532 }
 533
 534 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 535    the dependence relations, and VECT_SIZE is the size of the
 536    dependence vectors, or in other words the number of loops in the
 537    considered nest.  */
 538
 539 DEBUG_FUNCTION void
 540 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 541 {
 542   unsigned int i, j;
 543   struct data_dependence_relation *ddr;
 544   lambda_vector v;
 545
 546   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 547     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 548       {
 549         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 550           {
 551             fprintf (file, "DISTANCE_V (");
 552             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 553             fprintf (file, ")\n");
 554           }
 555
 556         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 557           {
 558             fprintf (file, "DIRECTION_V (");
 559             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 560             fprintf (file, ")\n");
 561           }
 562       }
 563
 564   fprintf (file, "\n\n");
 565 }
 566
 567 /* Dumps the data dependence relations DDRS in FILE.  */
 568
 569 DEBUG_FUNCTION void
 570 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 571 {
 572   unsigned int i;
 573   struct data_dependence_relation *ddr;
 574
 575   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 576     dump_data_dependence_relation (file, ddr);
 577
 578   fprintf (file, "\n\n");
 579 }
 580
 581 DEBUG_FUNCTION void
 582 debug_ddrs (vec<ddr_p> ddrs)
 583 {
 584   dump_ddrs (stderr, ddrs);
 585 }
 586
 587 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 588    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 589    constant of type ssizetype, and returns true.  If we cannot do this
 590    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 591    is returned.  */
 592
 593 static bool
 594 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 595                          tree *var, tree *off)
 596 {
 597   tree var0, var1;
 598   tree off0, off1;
 599   enum tree_code ocode = code;
 600
 601   *var = NULL_TREE;
 602   *off = NULL_TREE;
 603
 604   switch (code)
 605     {
 606     case INTEGER_CST:
 607       *var = build_int_cst (type, 0);
 608       *off = fold_convert (ssizetype, op0);
 609       return true;
 610
 611     case POINTER_PLUS_EXPR:
 612       ocode = PLUS_EXPR;
 613       /* FALLTHROUGH */
 614     case PLUS_EXPR:
 615     case MINUS_EXPR:
 616       split_constant_offset (op0, &var0, &off0);
 617       split_constant_offset (op1, &var1, &off1);
 618       *var = fold_build2 (code, type, var0, var1);
 619       *off = size_binop (ocode, off0, off1);
 620       return true;
 621
 622     case MULT_EXPR:
 623       if (TREE_CODE (op1) != INTEGER_CST)
 624         return false;
 625
 626       split_constant_offset (op0, &var0, &off0);
 627       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 628       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 629       return true;
 630
 631     case ADDR_EXPR:
 632       {
 633         tree base, poffset;
 634         poly_int64 pbitsize, pbitpos, pbytepos;
 635         machine_mode pmode;
 636         int punsignedp, preversep, pvolatilep;
 637
 638         op0 = TREE_OPERAND (op0, 0);
 639         base
 640           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 641                                  &punsignedp, &preversep, &pvolatilep);
 642
 643         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 644           return false;
 645         base = build_fold_addr_expr (base);
 646         off0 = ssize_int (pbytepos);
 647
 648         if (poffset)
 649           {
 650             split_constant_offset (poffset, &poffset, &off1);
 651             off0 = size_binop (PLUS_EXPR, off0, off1);
 652             if (POINTER_TYPE_P (TREE_TYPE (base)))
 653               base = fold_build_pointer_plus (base, poffset);
 654             else
 655               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 656                                   fold_convert (TREE_TYPE (base), poffset));
 657           }
 658
 659         var0 = fold_convert (type, base);
 660
 661         /* If variable length types are involved, punt, otherwise casts
 662            might be converted into ARRAY_REFs in gimplify_conversion.
 663            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 664            possibly no longer appears in current GIMPLE, might resurface.
 665            This perhaps could run
 666            if (CONVERT_EXPR_P (var0))
 667              {
 668                gimplify_conversion (&var0);
 669                // Attempt to fill in any within var0 found ARRAY_REF's
 670                // element size from corresponding op embedded ARRAY_REF,
 671                // if unsuccessful, just punt.
 672              }  */
 673         while (POINTER_TYPE_P (type))
 674           type = TREE_TYPE (type);
 675         if (int_size_in_bytes (type) < 0)
 676           return false;
 677
 678         *var = var0;
 679         *off = off0;
 680         return true;
 681       }
 682
 683     case SSA_NAME:
 684       {
 685         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 686           return false;
 687
 688         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 689         enum tree_code subcode;
 690
 691         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 692           return false;
 693
 694         var0 = gimple_assign_rhs1 (def_stmt);
 695         subcode = gimple_assign_rhs_code (def_stmt);
 696         var1 = gimple_assign_rhs2 (def_stmt);
 697
 698         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 699       }
 700     CASE_CONVERT:
 701       {
 702         /* We must not introduce undefined overflow, and we must not change the value.
 703            Hence we're okay if the inner type doesn't overflow to start with
 704            (pointer or signed), the outer type also is an integer or pointer
 705            and the outer precision is at least as large as the inner.  */
 706         tree itype = TREE_TYPE (op0);
 707         if ((POINTER_TYPE_P (itype)
 708              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 709             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 710             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 711           {
 712             if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
 713               {
 714                 /* Split the unconverted operand and try to prove that
 715                    wrapping isn't a problem.  */
 716                 tree tmp_var, tmp_off;
 717                 split_constant_offset (op0, &tmp_var, &tmp_off);
 718
 719                 /* See whether we have an SSA_NAME whose range is known
 720                    to be [A, B].  */
 721                 if (TREE_CODE (tmp_var) != SSA_NAME)
 722                   return false;
 723                 wide_int var_min, var_max;
 724                 value_range_kind vr_type = get_range_info (tmp_var, &var_min,
 725                                                            &var_max);
 726                 wide_int var_nonzero = get_nonzero_bits (tmp_var);
 727                 signop sgn = TYPE_SIGN (itype);
 728                 if (intersect_range_with_nonzero_bits (vr_type, &var_min,
 729                                                        &var_max, var_nonzero,
 730                                                        sgn) != VR_RANGE)
 731                   return false;
 732
 733                 /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
 734                    is known to be [A + TMP_OFF, B + TMP_OFF], with all
 735                    operations done in ITYPE.  The addition must overflow
 736                    at both ends of the range or at neither.  */
 737                 wi::overflow_type overflow[2];
 738                 unsigned int prec = TYPE_PRECISION (itype);
 739                 wide_int woff = wi::to_wide (tmp_off, prec);
 740                 wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
 741                 wi::add (var_max, woff, sgn, &overflow[1]);
 742                 if ((overflow[0] != wi::OVF_NONE) != (overflow[1] != wi::OVF_NONE))
 743                   return false;
 744
 745                 /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR.  */
 746                 widest_int diff = (widest_int::from (op0_min, sgn)
 747                                    - widest_int::from (var_min, sgn));
 748                 var0 = tmp_var;
 749                 *off = wide_int_to_tree (ssizetype, diff);
 750               }
 751             else
 752               split_constant_offset (op0, &var0, off);
 753             *var = fold_convert (type, var0);
 754             return true;
 755           }
 756         return false;
 757       }
 758
 759     default:
 760       return false;
 761     }
 762 }
 763
 764 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 765    will be ssizetype.  */
 766
 767 void
 768 split_constant_offset (tree exp, tree *var, tree *off)
 769 {
 770   tree type = TREE_TYPE (exp), op0, op1, e, o;
 771   enum tree_code code;
 772
 773   *var = exp;
 774   *off = ssize_int (0);
 775
 776   if (tree_is_chrec (exp)
 777       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 778     return;
 779
 780   code = TREE_CODE (exp);
 781   extract_ops_from_tree (exp, &code, &op0, &op1);
 782   if (split_constant_offset_1 (type, op0, code, op1, &e, &o))
 783     {
 784       *var = e;
 785       *off = o;
 786     }
 787 }
 788
 789 /* Returns the address ADDR of an object in a canonical shape (without nop
 790    casts, and with type of pointer to the object).  */
 791
 792 static tree
 793 canonicalize_base_object_address (tree addr)
 794 {
 795   tree orig = addr;
 796
 797   STRIP_NOPS (addr);
 798
 799   /* The base address may be obtained by casting from integer, in that case
 800      keep the cast.  */
 801   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 802     return orig;
 803
 804   if (TREE_CODE (addr) != ADDR_EXPR)
 805     return addr;
 806
 807   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 808 }
 809
 810 /* Analyze the behavior of memory reference REF within STMT.
 811    There are two modes:
 812
 813    - BB analysis.  In this case we simply split the address into base,
 814      init and offset components, without reference to any containing loop.
 815      The resulting base and offset are general expressions and they can
 816      vary arbitrarily from one iteration of the containing loop to the next.
 817      The step is always zero.
 818
 819    - loop analysis.  In this case we analyze the reference both wrt LOOP
 820      and on the basis that the reference occurs (is "used") in LOOP;
 821      see the comment above analyze_scalar_evolution_in_loop for more
 822      information about this distinction.  The base, init, offset and
 823      step fields are all invariant in LOOP.
 824
 825    Perform BB analysis if LOOP is null, or if LOOP is the function's
 826    dummy outermost loop.  In other cases perform loop analysis.
 827
 828    Return true if the analysis succeeded and store the results in DRB if so.
 829    BB analysis can only fail for bitfield or reversed-storage accesses.  */
 830
 831 opt_result
 832 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
 833                       struct loop *loop, const gimple *stmt)
 834 {
 835   poly_int64 pbitsize, pbitpos;
 836   tree base, poffset;
 837   machine_mode pmode;
 838   int punsignedp, preversep, pvolatilep;
 839   affine_iv base_iv, offset_iv;
 840   tree init, dinit, step;
 841   bool in_loop = (loop && loop->num);
 842
 843   if (dump_file && (dump_flags & TDF_DETAILS))
 844     fprintf (dump_file, "analyze_innermost: ");
 845
 846   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
 847                               &punsignedp, &preversep, &pvolatilep);
 848   gcc_assert (base != NULL_TREE);
 849
 850   poly_int64 pbytepos;
 851   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 852     return opt_result::failure_at (stmt,
 853                                    "failed: bit offset alignment.\n");
 854
 855   if (preversep)
 856     return opt_result::failure_at (stmt,
 857                                    "failed: reverse storage order.\n");
 858
 859   /* Calculate the alignment and misalignment for the inner reference.  */
 860   unsigned int HOST_WIDE_INT bit_base_misalignment;
 861   unsigned int bit_base_alignment;
 862   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
 863
 864   /* There are no bitfield references remaining in BASE, so the values
 865      we got back must be whole bytes.  */
 866   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
 867               && bit_base_misalignment % BITS_PER_UNIT == 0);
 868   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
 869   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
 870
 871   if (TREE_CODE (base) == MEM_REF)
 872     {
 873       if (!integer_zerop (TREE_OPERAND (base, 1)))
 874         {
 875           /* Subtract MOFF from the base and add it to POFFSET instead.
 876              Adjust the misalignment to reflect the amount we subtracted.  */
 877           poly_offset_int moff = mem_ref_offset (base);
 878           base_misalignment -= moff.force_shwi ();
 879           tree mofft = wide_int_to_tree (sizetype, moff);
 880           if (!poffset)
 881             poffset = mofft;
 882           else
 883             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 884         }
 885       base = TREE_OPERAND (base, 0);
 886     }
 887   else
 888     base = build_fold_addr_expr (base);
 889
 890   if (in_loop)
 891     {
 892       if (!simple_iv (loop, loop, base, &base_iv, true))
 893         return opt_result::failure_at
 894           (stmt, "failed: evolution of base is not affine.\n");
 895     }
 896   else
 897     {
 898       base_iv.base = base;
 899       base_iv.step = ssize_int (0);
 900       base_iv.no_overflow = true;
 901     }
 902
 903   if (!poffset)
 904     {
 905       offset_iv.base = ssize_int (0);
 906       offset_iv.step = ssize_int (0);
 907     }
 908   else
 909     {
 910       if (!in_loop)
 911         {
 912           offset_iv.base = poffset;
 913           offset_iv.step = ssize_int (0);
 914         }
 915       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 916         return opt_result::failure_at
 917           (stmt, "failed: evolution of offset is not affine.\n");
 918     }
 919
 920   init = ssize_int (pbytepos);
 921
 922   /* Subtract any constant component from the base and add it to INIT instead.
 923      Adjust the misalignment to reflect the amount we subtracted.  */
 924   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 925   init = size_binop (PLUS_EXPR, init, dinit);
 926   base_misalignment -= TREE_INT_CST_LOW (dinit);
 927
 928   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 929   init = size_binop (PLUS_EXPR, init, dinit);
 930
 931   step = size_binop (PLUS_EXPR,
 932                      fold_convert (ssizetype, base_iv.step),
 933                      fold_convert (ssizetype, offset_iv.step));
 934
 935   base = canonicalize_base_object_address (base_iv.base);
 936
 937   /* See if get_pointer_alignment can guarantee a higher alignment than
 938      the one we calculated above.  */
 939   unsigned int HOST_WIDE_INT alt_misalignment;
 940   unsigned int alt_alignment;
 941   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
 942
 943   /* As above, these values must be whole bytes.  */
 944   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
 945               && alt_misalignment % BITS_PER_UNIT == 0);
 946   alt_alignment /= BITS_PER_UNIT;
 947   alt_misalignment /= BITS_PER_UNIT;
 948
 949   if (base_alignment < alt_alignment)
 950     {
 951       base_alignment = alt_alignment;
 952       base_misalignment = alt_misalignment;
 953     }
 954
 955   drb->base_address = base;
 956   drb->offset = fold_convert (ssizetype, offset_iv.base);
 957   drb->init = init;
 958   drb->step = step;
 959   if (known_misalignment (base_misalignment, base_alignment,
 960                           &drb->base_misalignment))
 961     drb->base_alignment = base_alignment;
 962   else
 963     {
 964       drb->base_alignment = known_alignment (base_misalignment);
 965       drb->base_misalignment = 0;
 966     }
 967   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
 968   drb->step_alignment = highest_pow2_factor (step);
 969
 970   if (dump_file && (dump_flags & TDF_DETAILS))
 971     fprintf (dump_file, "success.\n");
 972
 973   return opt_result::success ();
 974 }
 975
 976 /* Return true if OP is a valid component reference for a DR access
 977    function.  This accepts a subset of what handled_component_p accepts.  */
 978
 979 static bool
 980 access_fn_component_p (tree op)
 981 {
 982   switch (TREE_CODE (op))
 983     {
 984     case REALPART_EXPR:
 985     case IMAGPART_EXPR:
 986     case ARRAY_REF:
 987       return true;
 988
 989     case COMPONENT_REF:
 990       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
 991
 992     default:
 993       return false;
 994     }
 995 }
 996
 997 /* Determines the base object and the list of indices of memory reference
 998    DR, analyzed in LOOP and instantiated before NEST.  */
 999
1000 static void
1001 dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
1002 {
1003   vec<tree> access_fns = vNULL;
1004   tree ref, op;
1005   tree base, off, access_fn;
1006
1007   /* If analyzing a basic-block there are no indices to analyze
1008      and thus no access functions.  */
1009   if (!nest)
1010     {
1011       DR_BASE_OBJECT (dr) = DR_REF (dr);
1012       DR_ACCESS_FNS (dr).create (0);
1013       return;
1014     }
1015
1016   ref = DR_REF (dr);
1017
1018   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1019      into a two element array with a constant index.  The base is
1020      then just the immediate underlying object.  */
1021   if (TREE_CODE (ref) == REALPART_EXPR)
1022     {
1023       ref = TREE_OPERAND (ref, 0);
1024       access_fns.safe_push (integer_zero_node);
1025     }
1026   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1027     {
1028       ref = TREE_OPERAND (ref, 0);
1029       access_fns.safe_push (integer_one_node);
1030     }
1031
1032   /* Analyze access functions of dimensions we know to be independent.
1033      The list of component references handled here should be kept in
1034      sync with access_fn_component_p.  */
1035   while (handled_component_p (ref))
1036     {
1037       if (TREE_CODE (ref) == ARRAY_REF)
1038         {
1039           op = TREE_OPERAND (ref, 1);
1040           access_fn = analyze_scalar_evolution (loop, op);
1041           access_fn = instantiate_scev (nest, loop, access_fn);
1042           access_fns.safe_push (access_fn);
1043         }
1044       else if (TREE_CODE (ref) == COMPONENT_REF
1045                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1046         {
1047           /* For COMPONENT_REFs of records (but not unions!) use the
1048              FIELD_DECL offset as constant access function so we can
1049              disambiguate a[i].f1 and a[i].f2.  */
1050           tree off = component_ref_field_offset (ref);
1051           off = size_binop (PLUS_EXPR,
1052                             size_binop (MULT_EXPR,
1053                                         fold_convert (bitsizetype, off),
1054                                         bitsize_int (BITS_PER_UNIT)),
1055                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1056           access_fns.safe_push (off);
1057         }
1058       else
1059         /* If we have an unhandled component we could not translate
1060            to an access function stop analyzing.  We have determined
1061            our base object in this case.  */
1062         break;
1063
1064       ref = TREE_OPERAND (ref, 0);
1065     }
1066
1067   /* If the address operand of a MEM_REF base has an evolution in the
1068      analyzed nest, add it as an additional independent access-function.  */
1069   if (TREE_CODE (ref) == MEM_REF)
1070     {
1071       op = TREE_OPERAND (ref, 0);
1072       access_fn = analyze_scalar_evolution (loop, op);
1073       access_fn = instantiate_scev (nest, loop, access_fn);
1074       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1075         {
1076           tree orig_type;
1077           tree memoff = TREE_OPERAND (ref, 1);
1078           base = initial_condition (access_fn);
1079           orig_type = TREE_TYPE (base);
1080           STRIP_USELESS_TYPE_CONVERSION (base);
1081           split_constant_offset (base, &base, &off);
1082           STRIP_USELESS_TYPE_CONVERSION (base);
1083           /* Fold the MEM_REF offset into the evolutions initial
1084              value to make more bases comparable.  */
1085           if (!integer_zerop (memoff))
1086             {
1087               off = size_binop (PLUS_EXPR, off,
1088                                 fold_convert (ssizetype, memoff));
1089               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1090             }
1091           /* Adjust the offset so it is a multiple of the access type
1092              size and thus we separate bases that can possibly be used
1093              to produce partial overlaps (which the access_fn machinery
1094              cannot handle).  */
1095           wide_int rem;
1096           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1097               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1098               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1099             rem = wi::mod_trunc
1100               (wi::to_wide (off),
1101                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1102                SIGNED);
1103           else
1104             /* If we can't compute the remainder simply force the initial
1105                condition to zero.  */
1106             rem = wi::to_wide (off);
1107           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1108           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1109           /* And finally replace the initial condition.  */
1110           access_fn = chrec_replace_initial_condition
1111               (access_fn, fold_convert (orig_type, off));
1112           /* ???  This is still not a suitable base object for
1113              dr_may_alias_p - the base object needs to be an
1114              access that covers the object as whole.  With
1115              an evolution in the pointer this cannot be
1116              guaranteed.
1117              As a band-aid, mark the access so we can special-case
1118              it in dr_may_alias_p.  */
1119           tree old = ref;
1120           ref = fold_build2_loc (EXPR_LOCATION (ref),
1121                                  MEM_REF, TREE_TYPE (ref),
1122                                  base, memoff);
1123           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1124           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1125           DR_UNCONSTRAINED_BASE (dr) = true;
1126           access_fns.safe_push (access_fn);
1127         }
1128     }
1129   else if (DECL_P (ref))
1130     {
1131       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1132       ref = build2 (MEM_REF, TREE_TYPE (ref),
1133                     build_fold_addr_expr (ref),
1134                     build_int_cst (reference_alias_ptr_type (ref), 0));
1135     }
1136
1137   DR_BASE_OBJECT (dr) = ref;
1138   DR_ACCESS_FNS (dr) = access_fns;
1139 }
1140
1141 /* Extracts the alias analysis information from the memory reference DR.  */
1142
1143 static void
1144 dr_analyze_alias (struct data_reference *dr)
1145 {
1146   tree ref = DR_REF (dr);
1147   tree base = get_base_address (ref), addr;
1148
1149   if (INDIRECT_REF_P (base)
1150       || TREE_CODE (base) == MEM_REF)
1151     {
1152       addr = TREE_OPERAND (base, 0);
1153       if (TREE_CODE (addr) == SSA_NAME)
1154         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1155     }
1156 }
1157
1158 /* Frees data reference DR.  */
1159
1160 void
1161 free_data_ref (data_reference_p dr)
1162 {
1163   DR_ACCESS_FNS (dr).release ();
1164   free (dr);
1165 }
1166
1167 /* Analyze memory reference MEMREF, which is accessed in STMT.
1168    The reference is a read if IS_READ is true, otherwise it is a write.
1169    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1170    within STMT, i.e. that it might not occur even if STMT is executed
1171    and runs to completion.
1172
1173    Return the data_reference description of MEMREF.  NEST is the outermost
1174    loop in which the reference should be instantiated, LOOP is the loop
1175    in which the data reference should be analyzed.  */
1176
1177 struct data_reference *
1178 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1179                  bool is_read, bool is_conditional_in_stmt)
1180 {
1181   struct data_reference *dr;
1182
1183   if (dump_file && (dump_flags & TDF_DETAILS))
1184     {
1185       fprintf (dump_file, "Creating dr for ");
1186       print_generic_expr (dump_file, memref, TDF_SLIM);
1187       fprintf (dump_file, "\n");
1188     }
1189
1190   dr = XCNEW (struct data_reference);
1191   DR_STMT (dr) = stmt;
1192   DR_REF (dr) = memref;
1193   DR_IS_READ (dr) = is_read;
1194   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1195
1196   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1197                         nest != NULL ? loop : NULL, stmt);
1198   dr_analyze_indices (dr, nest, loop);
1199   dr_analyze_alias (dr);
1200
1201   if (dump_file && (dump_flags & TDF_DETAILS))
1202     {
1203       unsigned i;
1204       fprintf (dump_file, "\tbase_address: ");
1205       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1206       fprintf (dump_file, "\n\toffset from base address: ");
1207       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1208       fprintf (dump_file, "\n\tconstant offset from base address: ");
1209       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1210       fprintf (dump_file, "\n\tstep: ");
1211       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1212       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1213       fprintf (dump_file, "\n\tbase misalignment: %d",
1214                DR_BASE_MISALIGNMENT (dr));
1215       fprintf (dump_file, "\n\toffset alignment: %d",
1216                DR_OFFSET_ALIGNMENT (dr));
1217       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1218       fprintf (dump_file, "\n\tbase_object: ");
1219       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1220       fprintf (dump_file, "\n");
1221       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1222         {
1223           fprintf (dump_file, "\tAccess function %d: ", i);
1224           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1225         }
1226     }
1227
1228   return dr;
1229 }
1230
1231 /*  A helper function computes order between two tree epxressions T1 and T2.
1232     This is used in comparator functions sorting objects based on the order
1233     of tree expressions.  The function returns -1, 0, or 1.  */
1234
1235 int
1236 data_ref_compare_tree (tree t1, tree t2)
1237 {
1238   int i, cmp;
1239   enum tree_code code;
1240   char tclass;
1241
1242   if (t1 == t2)
1243     return 0;
1244   if (t1 == NULL)
1245     return -1;
1246   if (t2 == NULL)
1247     return 1;
1248
1249   STRIP_USELESS_TYPE_CONVERSION (t1);
1250   STRIP_USELESS_TYPE_CONVERSION (t2);
1251   if (t1 == t2)
1252     return 0;
1253
1254   if (TREE_CODE (t1) != TREE_CODE (t2)
1255       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1256     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1257
1258   code = TREE_CODE (t1);
1259   switch (code)
1260     {
1261     case INTEGER_CST:
1262       return tree_int_cst_compare (t1, t2);
1263
1264     case STRING_CST:
1265       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1266         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1267       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1268                      TREE_STRING_LENGTH (t1));
1269
1270     case SSA_NAME:
1271       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1272         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1273       break;
1274
1275     default:
1276       if (POLY_INT_CST_P (t1))
1277         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1278                                        wi::to_poly_widest (t2));
1279
1280       tclass = TREE_CODE_CLASS (code);
1281
1282       /* For decls, compare their UIDs.  */
1283       if (tclass == tcc_declaration)
1284         {
1285           if (DECL_UID (t1) != DECL_UID (t2))
1286             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1287           break;
1288         }
1289       /* For expressions, compare their operands recursively.  */
1290       else if (IS_EXPR_CODE_CLASS (tclass))
1291         {
1292           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1293             {
1294               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1295                                            TREE_OPERAND (t2, i));
1296               if (cmp != 0)
1297                 return cmp;
1298             }
1299         }
1300       else
1301         gcc_unreachable ();
1302     }
1303
1304   return 0;
1305 }
1306
1307 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1308    check.  */
1309
1310 opt_result
1311 runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p)
1312 {
1313   if (dump_enabled_p ())
1314     dump_printf (MSG_NOTE,
1315                  "consider run-time aliasing test between %T and %T\n",
1316                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1317
1318   if (!speed_p)
1319     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1320                                    "runtime alias check not supported when"
1321                                    " optimizing for size.\n");
1322
1323   /* FORNOW: We don't support versioning with outer-loop in either
1324      vectorization or loop distribution.  */
1325   if (loop != NULL && loop->inner != NULL)
1326     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1327                                    "runtime alias check not supported for"
1328                                    " outer loop.\n");
1329
1330   return opt_result::success ();
1331 }
1332
1333 /* Operator == between two dr_with_seg_len objects.
1334
1335    This equality operator is used to make sure two data refs
1336    are the same one so that we will consider to combine the
1337    aliasing checks of those two pairs of data dependent data
1338    refs.  */
1339
1340 static bool
1341 operator == (const dr_with_seg_len& d1,
1342              const dr_with_seg_len& d2)
1343 {
1344   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1345                            DR_BASE_ADDRESS (d2.dr), 0)
1346           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1347           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1348           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1349           && known_eq (d1.access_size, d2.access_size)
1350           && d1.align == d2.align);
1351 }
1352
1353 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1354    so that we can combine aliasing checks in one scan.  */
1355
1356 static int
1357 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1358 {
1359   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1360   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1361   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1362   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1363
1364   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1365      if a and c have the same basic address snd step, and b and d have the same
1366      address and step.  Therefore, if any a&c or b&d don't have the same address
1367      and step, we don't care the order of those two pairs after sorting.  */
1368   int comp_res;
1369
1370   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1371                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1372     return comp_res;
1373   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1374                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1375     return comp_res;
1376   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1377                                          DR_STEP (b1.dr))) != 0)
1378     return comp_res;
1379   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1380                                          DR_STEP (b2.dr))) != 0)
1381     return comp_res;
1382   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1383                                          DR_OFFSET (b1.dr))) != 0)
1384     return comp_res;
1385   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1386                                          DR_INIT (b1.dr))) != 0)
1387     return comp_res;
1388   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1389                                          DR_OFFSET (b2.dr))) != 0)
1390     return comp_res;
1391   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1392                                          DR_INIT (b2.dr))) != 0)
1393     return comp_res;
1394
1395   return 0;
1396 }
1397
1398 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1399    FACTOR is number of iterations that each data reference is accessed.
1400
1401    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1402    we create an expression:
1403
1404    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1405    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1406
1407    for aliasing checks.  However, in some cases we can decrease the number
1408    of checks by combining two checks into one.  For example, suppose we have
1409    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1410    condition is satisfied:
1411
1412    load_ptr_0 < load_ptr_1  &&
1413    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1414
1415    (this condition means, in each iteration of vectorized loop, the accessed
1416    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1417    load_ptr_1.)
1418
1419    we then can use only the following expression to finish the alising checks
1420    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1421
1422    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1423    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1424
1425    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1426    basic address.  */
1427
1428 void
1429 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1430                                poly_uint64)
1431 {
1432   /* Sort the collected data ref pairs so that we can scan them once to
1433      combine all possible aliasing checks.  */
1434   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1435
1436   /* Scan the sorted dr pairs and check if we can combine alias checks
1437      of two neighboring dr pairs.  */
1438   for (size_t i = 1; i < alias_pairs->length (); ++i)
1439     {
1440       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1441       dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
1442                       *dr_b1 = &(*alias_pairs)[i-1].second,
1443                       *dr_a2 = &(*alias_pairs)[i].first,
1444                       *dr_b2 = &(*alias_pairs)[i].second;
1445
1446       /* Remove duplicate data ref pairs.  */
1447       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1448         {
1449           if (dump_enabled_p ())
1450             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1451                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1452                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1453           alias_pairs->ordered_remove (i--);
1454           continue;
1455         }
1456
1457       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1458         {
1459           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1460              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1461           if (*dr_a1 == *dr_a2)
1462             {
1463               std::swap (dr_a1, dr_b1);
1464               std::swap (dr_a2, dr_b2);
1465             }
1466
1467           poly_int64 init_a1, init_a2;
1468           /* Only consider cases in which the distance between the initial
1469              DR_A1 and the initial DR_A2 is known at compile time.  */
1470           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1471                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1472               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1473                                    DR_OFFSET (dr_a2->dr), 0)
1474               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1475               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1476             continue;
1477
1478           /* Don't combine if we can't tell which one comes first.  */
1479           if (!ordered_p (init_a1, init_a2))
1480             continue;
1481
1482           /* Make sure dr_a1 starts left of dr_a2.  */
1483           if (maybe_gt (init_a1, init_a2))
1484             {
1485               std::swap (*dr_a1, *dr_a2);
1486               std::swap (init_a1, init_a2);
1487             }
1488
1489           /* Work out what the segment length would be if we did combine
1490              DR_A1 and DR_A2:
1491
1492              - If DR_A1 and DR_A2 have equal lengths, that length is
1493                also the combined length.
1494
1495              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1496                length is the lower bound on those lengths.
1497
1498              - If DR_A1 and DR_A2 both have positive lengths, the combined
1499                length is the upper bound on those lengths.
1500
1501              Other cases are unlikely to give a useful combination.
1502
1503              The lengths both have sizetype, so the sign is taken from
1504              the step instead.  */
1505           if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
1506             {
1507               poly_uint64 seg_len_a1, seg_len_a2;
1508               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1509                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1510                 continue;
1511
1512               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1513               if (TREE_CODE (indicator_a) != INTEGER_CST)
1514                 continue;
1515
1516               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1517               if (TREE_CODE (indicator_b) != INTEGER_CST)
1518                 continue;
1519
1520               int sign_a = tree_int_cst_sgn (indicator_a);
1521               int sign_b = tree_int_cst_sgn (indicator_b);
1522
1523               poly_uint64 new_seg_len;
1524               if (sign_a <= 0 && sign_b <= 0)
1525                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1526               else if (sign_a >= 0 && sign_b >= 0)
1527                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1528               else
1529                 continue;
1530
1531               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1532                                               new_seg_len);
1533               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1534             }
1535
1536           /* This is always positive due to the swap above.  */
1537           poly_uint64 diff = init_a2 - init_a1;
1538
1539           /* The new check will start at DR_A1.  Make sure that its access
1540              size encompasses the initial DR_A2.  */
1541           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1542             {
1543               dr_a1->access_size = upper_bound (dr_a1->access_size,
1544                                                 diff + dr_a2->access_size);
1545               unsigned int new_align = known_alignment (dr_a1->access_size);
1546               dr_a1->align = MIN (dr_a1->align, new_align);
1547             }
1548           if (dump_enabled_p ())
1549             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1550                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1551                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1552           alias_pairs->ordered_remove (i);
1553           i--;
1554         }
1555     }
1556 }
1557
1558 /* Given LOOP's two data references and segment lengths described by DR_A
1559    and DR_B, create expression checking if the two addresses ranges intersect
1560    with each other based on index of the two addresses.  This can only be
1561    done if DR_A and DR_B referring to the same (array) object and the index
1562    is the only difference.  For example:
1563
1564                        DR_A                           DR_B
1565       data-ref         arr[i]                         arr[j]
1566       base_object      arr                            arr
1567       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
1568
1569    The addresses and their index are like:
1570
1571         |<- ADDR_A    ->|          |<- ADDR_B    ->|
1572      ------------------------------------------------------->
1573         |   |   |   |   |          |   |   |   |   |
1574      ------------------------------------------------------->
1575         i_0 ...         i_0+4      j_0 ...         j_0+4
1576
1577    We can create expression based on index rather than address:
1578
1579      (i_0 + 4 < j_0 || j_0 + 4 < i_0)
1580
1581    Note evolution step of index needs to be considered in comparison.  */
1582
1583 static bool
1584 create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
1585                                      const dr_with_seg_len& dr_a,
1586                                      const dr_with_seg_len& dr_b)
1587 {
1588   if (integer_zerop (DR_STEP (dr_a.dr))
1589       || integer_zerop (DR_STEP (dr_b.dr))
1590       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
1591     return false;
1592
1593   poly_uint64 seg_len1, seg_len2;
1594   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
1595       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
1596     return false;
1597
1598   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
1599     return false;
1600
1601   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
1602     return false;
1603
1604   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
1605     return false;
1606
1607   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
1608
1609   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
1610   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
1611   if (neg_step)
1612     {
1613       abs_step = -abs_step;
1614       seg_len1 = -seg_len1;
1615       seg_len2 = -seg_len2;
1616     }
1617   else
1618     {
1619       /* Include the access size in the length, so that we only have one
1620          tree addition below.  */
1621       seg_len1 += dr_a.access_size;
1622       seg_len2 += dr_b.access_size;
1623     }
1624
1625   /* Infer the number of iterations with which the memory segment is accessed
1626      by DR.  In other words, alias is checked if memory segment accessed by
1627      DR_A in some iterations intersect with memory segment accessed by DR_B
1628      in the same amount iterations.
1629      Note segnment length is a linear function of number of iterations with
1630      DR_STEP as the coefficient.  */
1631   poly_uint64 niter_len1, niter_len2;
1632   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
1633       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
1634     return false;
1635
1636   poly_uint64 niter_access1 = 0, niter_access2 = 0;
1637   if (neg_step)
1638     {
1639       /* Divide each access size by the byte step, rounding up.  */
1640       if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
1641                             abs_step, &niter_access1)
1642           || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
1643                                abs_step, &niter_access2))
1644         return false;
1645     }
1646
1647   unsigned int i;
1648   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
1649     {
1650       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
1651       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
1652       /* Two indices must be the same if they are not scev, or not scev wrto
1653          current loop being vecorized.  */
1654       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
1655           || TREE_CODE (access2) != POLYNOMIAL_CHREC
1656           || CHREC_VARIABLE (access1) != (unsigned)loop->num
1657           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
1658         {
1659           if (operand_equal_p (access1, access2, 0))
1660             continue;
1661
1662           return false;
1663         }
1664       /* The two indices must have the same step.  */
1665       if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
1666         return false;
1667
1668       tree idx_step = CHREC_RIGHT (access1);
1669       /* Index must have const step, otherwise DR_STEP won't be constant.  */
1670       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
1671       /* Index must evaluate in the same direction as DR.  */
1672       gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
1673
1674       tree min1 = CHREC_LEFT (access1);
1675       tree min2 = CHREC_LEFT (access2);
1676       if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
1677         return false;
1678
1679       /* Ideally, alias can be checked against loop's control IV, but we
1680          need to prove linear mapping between control IV and reference
1681          index.  Although that should be true, we check against (array)
1682          index of data reference.  Like segment length, index length is
1683          linear function of the number of iterations with index_step as
1684          the coefficient, i.e, niter_len * idx_step.  */
1685       tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1686                                    build_int_cst (TREE_TYPE (min1),
1687                                                   niter_len1));
1688       tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1689                                    build_int_cst (TREE_TYPE (min2),
1690                                                   niter_len2));
1691       tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
1692       tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
1693       /* Adjust ranges for negative step.  */
1694       if (neg_step)
1695         {
1696           /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
1697           std::swap (min1, max1);
1698           std::swap (min2, max2);
1699
1700           /* As with the lengths just calculated, we've measured the access
1701              sizes in iterations, so multiply them by the index step.  */
1702           tree idx_access1
1703             = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
1704                            build_int_cst (TREE_TYPE (min1), niter_access1));
1705           tree idx_access2
1706             = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
1707                            build_int_cst (TREE_TYPE (min2), niter_access2));
1708
1709           /* MINUS_EXPR because the above values are negative.  */
1710           max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
1711           max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
1712         }
1713       tree part_cond_expr
1714         = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1715             fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
1716             fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
1717       if (*cond_expr)
1718         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1719                                   *cond_expr, part_cond_expr);
1720       else
1721         *cond_expr = part_cond_expr;
1722     }
1723   return true;
1724 }
1725
1726 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
1727    every address ADDR accessed by D:
1728
1729      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
1730
1731    In this case, every element accessed by D is aligned to at least
1732    ALIGN bytes.
1733
1734    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
1735
1736      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
1737
1738 static void
1739 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
1740                      tree *seg_max_out, HOST_WIDE_INT align)
1741 {
1742   /* Each access has the following pattern:
1743
1744           <- |seg_len| ->
1745           <--- A: -ve step --->
1746           +-----+-------+-----+-------+-----+
1747           | n-1 | ,.... |  0  | ..... | n-1 |
1748           +-----+-------+-----+-------+-----+
1749                         <--- B: +ve step --->
1750                         <- |seg_len| ->
1751                         |
1752                    base address
1753
1754      where "n" is the number of scalar iterations covered by the segment.
1755      (This should be VF for a particular pair if we know that both steps
1756      are the same, otherwise it will be the full number of scalar loop
1757      iterations.)
1758
1759      A is the range of bytes accessed when the step is negative,
1760      B is the range when the step is positive.
1761
1762      If the access size is "access_size" bytes, the lowest addressed byte is:
1763
1764          base + (step < 0 ? seg_len : 0)   [LB]
1765
1766      and the highest addressed byte is always below:
1767
1768          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
1769
1770      Thus:
1771
1772          LB <= ADDR < UB
1773
1774      If ALIGN is nonzero, all three values are aligned to at least ALIGN
1775      bytes, so:
1776
1777          LB <= ADDR <= UB - ALIGN
1778
1779      where "- ALIGN" folds naturally with the "+ access_size" and often
1780      cancels it out.
1781
1782      We don't try to simplify LB and UB beyond this (e.g. by using
1783      MIN and MAX based on whether seg_len rather than the stride is
1784      negative) because it is possible for the absolute size of the
1785      segment to overflow the range of a ssize_t.
1786
1787      Keeping the pointer_plus outside of the cond_expr should allow
1788      the cond_exprs to be shared with other alias checks.  */
1789   tree indicator = dr_direction_indicator (d.dr);
1790   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
1791                                fold_convert (ssizetype, indicator),
1792                                ssize_int (0));
1793   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
1794                                             DR_OFFSET (d.dr));
1795   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
1796   tree seg_len
1797     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
1798
1799   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1800                                 seg_len, size_zero_node);
1801   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
1802                                 size_zero_node, seg_len);
1803   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
1804                            size_int (d.access_size - align));
1805
1806   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
1807   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
1808 }
1809
1810 /* Given two data references and segment lengths described by DR_A and DR_B,
1811    create expression checking if the two addresses ranges intersect with
1812    each other:
1813
1814      ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
1815      || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
1816
1817 static void
1818 create_intersect_range_checks (struct loop *loop, tree *cond_expr,
1819                                const dr_with_seg_len& dr_a,
1820                                const dr_with_seg_len& dr_b)
1821 {
1822   *cond_expr = NULL_TREE;
1823   if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
1824     return;
1825
1826   unsigned HOST_WIDE_INT min_align;
1827   tree_code cmp_code;
1828   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
1829       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
1830     {
1831       /* In this case adding access_size to seg_len is likely to give
1832          a simple X * step, where X is either the number of scalar
1833          iterations or the vectorization factor.  We're better off
1834          keeping that, rather than subtracting an alignment from it.
1835
1836          In this case the maximum values are exclusive and so there is
1837          no alias if the maximum of one segment equals the minimum
1838          of another.  */
1839       min_align = 0;
1840       cmp_code = LE_EXPR;
1841     }
1842   else
1843     {
1844       /* Calculate the minimum alignment shared by all four pointers,
1845          then arrange for this alignment to be subtracted from the
1846          exclusive maximum values to get inclusive maximum values.
1847          This "- min_align" is cumulative with a "+ access_size"
1848          in the calculation of the maximum values.  In the best
1849          (and common) case, the two cancel each other out, leaving
1850          us with an inclusive bound based only on seg_len.  In the
1851          worst case we're simply adding a smaller number than before.
1852
1853          Because the maximum values are inclusive, there is an alias
1854          if the maximum value of one segment is equal to the minimum
1855          value of the other.  */
1856       min_align = MIN (dr_a.align, dr_b.align);
1857       cmp_code = LT_EXPR;
1858     }
1859
1860   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
1861   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
1862   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
1863
1864   *cond_expr
1865     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1866         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
1867         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
1868 }
1869
1870 /* Create a conditional expression that represents the run-time checks for
1871    overlapping of address ranges represented by a list of data references
1872    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
1873    COND_EXPR is the conditional expression to be used in the if statement
1874    that controls which version of the loop gets executed at runtime.  */
1875
1876 void
1877 create_runtime_alias_checks (struct loop *loop,
1878                              vec<dr_with_seg_len_pair_t> *alias_pairs,
1879                              tree * cond_expr)
1880 {
1881   tree part_cond_expr;
1882
1883   fold_defer_overflow_warnings ();
1884   for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
1885     {
1886       const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
1887       const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
1888
1889       if (dump_enabled_p ())
1890         dump_printf (MSG_NOTE,
1891                      "create runtime check for data references %T and %T\n",
1892                      DR_REF (dr_a.dr), DR_REF (dr_b.dr));
1893
1894       /* Create condition expression for each pair data references.  */
1895       create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
1896       if (*cond_expr)
1897         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1898                                   *cond_expr, part_cond_expr);
1899       else
1900         *cond_expr = part_cond_expr;
1901     }
1902   fold_undefer_and_ignore_overflow_warnings ();
1903 }
1904
1905 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1906    expressions.  */
1907 static bool
1908 dr_equal_offsets_p1 (tree offset1, tree offset2)
1909 {
1910   bool res;
1911
1912   STRIP_NOPS (offset1);
1913   STRIP_NOPS (offset2);
1914
1915   if (offset1 == offset2)
1916     return true;
1917
1918   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1919       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1920     return false;
1921
1922   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1923                              TREE_OPERAND (offset2, 0));
1924
1925   if (!res || !BINARY_CLASS_P (offset1))
1926     return res;
1927
1928   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1929                              TREE_OPERAND (offset2, 1));
1930
1931   return res;
1932 }
1933
1934 /* Check if DRA and DRB have equal offsets.  */
1935 bool
1936 dr_equal_offsets_p (struct data_reference *dra,
1937                     struct data_reference *drb)
1938 {
1939   tree offset1, offset2;
1940
1941   offset1 = DR_OFFSET (dra);
1942   offset2 = DR_OFFSET (drb);
1943
1944   return dr_equal_offsets_p1 (offset1, offset2);
1945 }
1946
1947 /* Returns true if FNA == FNB.  */
1948
1949 static bool
1950 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1951 {
1952   unsigned i, n = fna.length ();
1953
1954   if (n != fnb.length ())
1955     return false;
1956
1957   for (i = 0; i < n; i++)
1958     if (!operand_equal_p (fna[i], fnb[i], 0))
1959       return false;
1960
1961   return true;
1962 }
1963
1964 /* If all the functions in CF are the same, returns one of them,
1965    otherwise returns NULL.  */
1966
1967 static affine_fn
1968 common_affine_function (conflict_function *cf)
1969 {
1970   unsigned i;
1971   affine_fn comm;
1972
1973   if (!CF_NONTRIVIAL_P (cf))
1974     return affine_fn ();
1975
1976   comm = cf->fns[0];
1977
1978   for (i = 1; i < cf->n; i++)
1979     if (!affine_function_equal_p (comm, cf->fns[i]))
1980       return affine_fn ();
1981
1982   return comm;
1983 }
1984
1985 /* Returns the base of the affine function FN.  */
1986
1987 static tree
1988 affine_function_base (affine_fn fn)
1989 {
1990   return fn[0];
1991 }
1992
1993 /* Returns true if FN is a constant.  */
1994
1995 static bool
1996 affine_function_constant_p (affine_fn fn)
1997 {
1998   unsigned i;
1999   tree coef;
2000
2001   for (i = 1; fn.iterate (i, &coef); i++)
2002     if (!integer_zerop (coef))
2003       return false;
2004
2005   return true;
2006 }
2007
2008 /* Returns true if FN is the zero constant function.  */
2009
2010 static bool
2011 affine_function_zero_p (affine_fn fn)
2012 {
2013   return (integer_zerop (affine_function_base (fn))
2014           && affine_function_constant_p (fn));
2015 }
2016
2017 /* Returns a signed integer type with the largest precision from TA
2018    and TB.  */
2019
2020 static tree
2021 signed_type_for_types (tree ta, tree tb)
2022 {
2023   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2024     return signed_type_for (ta);
2025   else
2026     return signed_type_for (tb);
2027 }
2028
2029 /* Applies operation OP on affine functions FNA and FNB, and returns the
2030    result.  */
2031
2032 static affine_fn
2033 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2034 {
2035   unsigned i, n, m;
2036   affine_fn ret;
2037   tree coef;
2038
2039   if (fnb.length () > fna.length ())
2040     {
2041       n = fna.length ();
2042       m = fnb.length ();
2043     }
2044   else
2045     {
2046       n = fnb.length ();
2047       m = fna.length ();
2048     }
2049
2050   ret.create (m);
2051   for (i = 0; i < n; i++)
2052     {
2053       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2054                                          TREE_TYPE (fnb[i]));
2055       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2056     }
2057
2058   for (; fna.iterate (i, &coef); i++)
2059     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2060                                  coef, integer_zero_node));
2061   for (; fnb.iterate (i, &coef); i++)
2062     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2063                                  integer_zero_node, coef));
2064
2065   return ret;
2066 }
2067
2068 /* Returns the sum of affine functions FNA and FNB.  */
2069
2070 static affine_fn
2071 affine_fn_plus (affine_fn fna, affine_fn fnb)
2072 {
2073   return affine_fn_op (PLUS_EXPR, fna, fnb);
2074 }
2075
2076 /* Returns the difference of affine functions FNA and FNB.  */
2077
2078 static affine_fn
2079 affine_fn_minus (affine_fn fna, affine_fn fnb)
2080 {
2081   return affine_fn_op (MINUS_EXPR, fna, fnb);
2082 }
2083
2084 /* Frees affine function FN.  */
2085
2086 static void
2087 affine_fn_free (affine_fn fn)
2088 {
2089   fn.release ();
2090 }
2091
2092 /* Determine for each subscript in the data dependence relation DDR
2093    the distance.  */
2094
2095 static void
2096 compute_subscript_distance (struct data_dependence_relation *ddr)
2097 {
2098   conflict_function *cf_a, *cf_b;
2099   affine_fn fn_a, fn_b, diff;
2100
2101   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2102     {
2103       unsigned int i;
2104
2105       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2106         {
2107           struct subscript *subscript;
2108
2109           subscript = DDR_SUBSCRIPT (ddr, i);
2110           cf_a = SUB_CONFLICTS_IN_A (subscript);
2111           cf_b = SUB_CONFLICTS_IN_B (subscript);
2112
2113           fn_a = common_affine_function (cf_a);
2114           fn_b = common_affine_function (cf_b);
2115           if (!fn_a.exists () || !fn_b.exists ())
2116             {
2117               SUB_DISTANCE (subscript) = chrec_dont_know;
2118               return;
2119             }
2120           diff = affine_fn_minus (fn_a, fn_b);
2121
2122           if (affine_function_constant_p (diff))
2123             SUB_DISTANCE (subscript) = affine_function_base (diff);
2124           else
2125             SUB_DISTANCE (subscript) = chrec_dont_know;
2126
2127           affine_fn_free (diff);
2128         }
2129     }
2130 }
2131
2132 /* Returns the conflict function for "unknown".  */
2133
2134 static conflict_function *
2135 conflict_fn_not_known (void)
2136 {
2137   conflict_function *fn = XCNEW (conflict_function);
2138   fn->n = NOT_KNOWN;
2139
2140   return fn;
2141 }
2142
2143 /* Returns the conflict function for "independent".  */
2144
2145 static conflict_function *
2146 conflict_fn_no_dependence (void)
2147 {
2148   conflict_function *fn = XCNEW (conflict_function);
2149   fn->n = NO_DEPENDENCE;
2150
2151   return fn;
2152 }
2153
2154 /* Returns true if the address of OBJ is invariant in LOOP.  */
2155
2156 static bool
2157 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
2158 {
2159   while (handled_component_p (obj))
2160     {
2161       if (TREE_CODE (obj) == ARRAY_REF)
2162         {
2163           for (int i = 1; i < 4; ++i)
2164             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2165                                                         loop->num))
2166               return false;
2167         }
2168       else if (TREE_CODE (obj) == COMPONENT_REF)
2169         {
2170           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2171                                                       loop->num))
2172             return false;
2173         }
2174       obj = TREE_OPERAND (obj, 0);
2175     }
2176
2177   if (!INDIRECT_REF_P (obj)
2178       && TREE_CODE (obj) != MEM_REF)
2179     return true;
2180
2181   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2182                                                   loop->num);
2183 }
2184
2185 /* Returns false if we can prove that data references A and B do not alias,
2186    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2187    considered.  */
2188
2189 bool
2190 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2191                 bool loop_nest)
2192 {
2193   tree addr_a = DR_BASE_OBJECT (a);
2194   tree addr_b = DR_BASE_OBJECT (b);
2195
2196   /* If we are not processing a loop nest but scalar code we
2197      do not need to care about possible cross-iteration dependences
2198      and thus can process the full original reference.  Do so,
2199      similar to how loop invariant motion applies extra offset-based
2200      disambiguation.  */
2201   if (!loop_nest)
2202     {
2203       aff_tree off1, off2;
2204       poly_widest_int size1, size2;
2205       get_inner_reference_aff (DR_REF (a), &off1, &size1);
2206       get_inner_reference_aff (DR_REF (b), &off2, &size2);
2207       aff_combination_scale (&off1, -1);
2208       aff_combination_add (&off2, &off1);
2209       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2210         return false;
2211     }
2212
2213   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2214       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2215       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2216       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2217     return false;
2218
2219   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2220      do not know the size of the base-object.  So we cannot do any
2221      offset/overlap based analysis but have to rely on points-to
2222      information only.  */
2223   if (TREE_CODE (addr_a) == MEM_REF
2224       && (DR_UNCONSTRAINED_BASE (a)
2225           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2226     {
2227       /* For true dependences we can apply TBAA.  */
2228       if (flag_strict_aliasing
2229           && DR_IS_WRITE (a) && DR_IS_READ (b)
2230           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2231                                      get_alias_set (DR_REF (b))))
2232         return false;
2233       if (TREE_CODE (addr_b) == MEM_REF)
2234         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2235                                        TREE_OPERAND (addr_b, 0));
2236       else
2237         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2238                                        build_fold_addr_expr (addr_b));
2239     }
2240   else if (TREE_CODE (addr_b) == MEM_REF
2241            && (DR_UNCONSTRAINED_BASE (b)
2242                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
2243     {
2244       /* For true dependences we can apply TBAA.  */
2245       if (flag_strict_aliasing
2246           && DR_IS_WRITE (a) && DR_IS_READ (b)
2247           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
2248                                      get_alias_set (DR_REF (b))))
2249         return false;
2250       if (TREE_CODE (addr_a) == MEM_REF)
2251         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
2252                                        TREE_OPERAND (addr_b, 0));
2253       else
2254         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
2255                                        TREE_OPERAND (addr_b, 0));
2256     }
2257
2258   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
2259      that is being subsetted in the loop nest.  */
2260   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
2261     return refs_output_dependent_p (addr_a, addr_b);
2262   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
2263     return refs_anti_dependent_p (addr_a, addr_b);
2264   return refs_may_alias_p (addr_a, addr_b);
2265 }
2266
2267 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
2268    if it is meaningful to compare their associated access functions
2269    when checking for dependencies.  */
2270
2271 static bool
2272 access_fn_components_comparable_p (tree ref_a, tree ref_b)
2273 {
2274   /* Allow pairs of component refs from the following sets:
2275
2276        { REALPART_EXPR, IMAGPART_EXPR }
2277        { COMPONENT_REF }
2278        { ARRAY_REF }.  */
2279   tree_code code_a = TREE_CODE (ref_a);
2280   tree_code code_b = TREE_CODE (ref_b);
2281   if (code_a == IMAGPART_EXPR)
2282     code_a = REALPART_EXPR;
2283   if (code_b == IMAGPART_EXPR)
2284     code_b = REALPART_EXPR;
2285   if (code_a != code_b)
2286     return false;
2287
2288   if (TREE_CODE (ref_a) == COMPONENT_REF)
2289     /* ??? We cannot simply use the type of operand #0 of the refs here as
2290        the Fortran compiler smuggles type punning into COMPONENT_REFs.
2291        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
2292     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
2293             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
2294
2295   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
2296                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
2297 }
2298
2299 /* Initialize a data dependence relation between data accesses A and
2300    B.  NB_LOOPS is the number of loops surrounding the references: the
2301    size of the classic distance/direction vectors.  */
2302
2303 struct data_dependence_relation *
2304 initialize_data_dependence_relation (struct data_reference *a,
2305                                      struct data_reference *b,
2306                                      vec<loop_p> loop_nest)
2307 {
2308   struct data_dependence_relation *res;
2309   unsigned int i;
2310
2311   res = XCNEW (struct data_dependence_relation);
2312   DDR_A (res) = a;
2313   DDR_B (res) = b;
2314   DDR_LOOP_NEST (res).create (0);
2315   DDR_SUBSCRIPTS (res).create (0);
2316   DDR_DIR_VECTS (res).create (0);
2317   DDR_DIST_VECTS (res).create (0);
2318
2319   if (a == NULL || b == NULL)
2320     {
2321       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2322       return res;
2323     }
2324
2325   /* If the data references do not alias, then they are independent.  */
2326   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
2327     {
2328       DDR_ARE_DEPENDENT (res) = chrec_known;
2329       return res;
2330     }
2331
2332   unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
2333   unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
2334   if (num_dimensions_a == 0 || num_dimensions_b == 0)
2335     {
2336       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2337       return res;
2338     }
2339
2340   /* For unconstrained bases, the root (highest-indexed) subscript
2341      describes a variation in the base of the original DR_REF rather
2342      than a component access.  We have no type that accurately describes
2343      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
2344      applying this subscript) so limit the search to the last real
2345      component access.
2346
2347      E.g. for:
2348
2349         void
2350         f (int a[][8], int b[][8])
2351         {
2352           for (int i = 0; i < 8; ++i)
2353             a[i * 2][0] = b[i][0];
2354         }
2355
2356      the a and b accesses have a single ARRAY_REF component reference [0]
2357      but have two subscripts.  */
2358   if (DR_UNCONSTRAINED_BASE (a))
2359     num_dimensions_a -= 1;
2360   if (DR_UNCONSTRAINED_BASE (b))
2361     num_dimensions_b -= 1;
2362
2363   /* These structures describe sequences of component references in
2364      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
2365      specific access function.  */
2366   struct {
2367     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
2368        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
2369        indices.  In C notation, these are the indices of the rightmost
2370        component references; e.g. for a sequence .b.c.d, the start
2371        index is for .d.  */
2372     unsigned int start_a;
2373     unsigned int start_b;
2374
2375     /* The sequence contains LENGTH consecutive access functions from
2376        each DR.  */
2377     unsigned int length;
2378
2379     /* The enclosing objects for the A and B sequences respectively,
2380        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
2381        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
2382     tree object_a;
2383     tree object_b;
2384   } full_seq = {}, struct_seq = {};
2385
2386   /* Before each iteration of the loop:
2387
2388      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
2389      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
2390   unsigned int index_a = 0;
2391   unsigned int index_b = 0;
2392   tree ref_a = DR_REF (a);
2393   tree ref_b = DR_REF (b);
2394
2395   /* Now walk the component references from the final DR_REFs back up to
2396      the enclosing base objects.  Each component reference corresponds
2397      to one access function in the DR, with access function 0 being for
2398      the final DR_REF and the highest-indexed access function being the
2399      one that is applied to the base of the DR.
2400
2401      Look for a sequence of component references whose access functions
2402      are comparable (see access_fn_components_comparable_p).  If more
2403      than one such sequence exists, pick the one nearest the base
2404      (which is the leftmost sequence in C notation).  Store this sequence
2405      in FULL_SEQ.
2406
2407      For example, if we have:
2408
2409         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
2410
2411         A: a[0][i].s.c.d
2412         B: __real b[0][i].s.e[i].f
2413
2414      (where d is the same type as the real component of f) then the access
2415      functions would be:
2416
2417                          0   1   2   3
2418         A:              .d  .c  .s [i]
2419
2420                  0   1   2   3   4   5
2421         B:  __real  .f [i]  .e  .s [i]
2422
2423      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
2424      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
2425      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
2426      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
2427      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
2428      index foo[10] arrays, so is again comparable.  The sequence is
2429      therefore:
2430
2431         A: [1, 3]  (i.e. [i].s.c)
2432         B: [3, 5]  (i.e. [i].s.e)
2433
2434      Also look for sequences of component references whose access
2435      functions are comparable and whose enclosing objects have the same
2436      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
2437      example, STRUCT_SEQ would be:
2438
2439         A: [1, 2]  (i.e. s.c)
2440         B: [3, 4]  (i.e. s.e)  */
2441   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
2442     {
2443       /* REF_A and REF_B must be one of the component access types
2444          allowed by dr_analyze_indices.  */
2445       gcc_checking_assert (access_fn_component_p (ref_a));
2446       gcc_checking_assert (access_fn_component_p (ref_b));
2447
2448       /* Get the immediately-enclosing objects for REF_A and REF_B,
2449          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
2450          and DR_ACCESS_FN (B, INDEX_B).  */
2451       tree object_a = TREE_OPERAND (ref_a, 0);
2452       tree object_b = TREE_OPERAND (ref_b, 0);
2453
2454       tree type_a = TREE_TYPE (object_a);
2455       tree type_b = TREE_TYPE (object_b);
2456       if (access_fn_components_comparable_p (ref_a, ref_b))
2457         {
2458           /* This pair of component accesses is comparable for dependence
2459              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
2460              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
2461           if (full_seq.start_a + full_seq.length != index_a
2462               || full_seq.start_b + full_seq.length != index_b)
2463             {
2464               /* The accesses don't extend the current sequence,
2465                  so start a new one here.  */
2466               full_seq.start_a = index_a;
2467               full_seq.start_b = index_b;
2468               full_seq.length = 0;
2469             }
2470
2471           /* Add this pair of references to the sequence.  */
2472           full_seq.length += 1;
2473           full_seq.object_a = object_a;
2474           full_seq.object_b = object_b;
2475
2476           /* If the enclosing objects are structures (and thus have the
2477              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
2478           if (TREE_CODE (type_a) == RECORD_TYPE)
2479             struct_seq = full_seq;
2480
2481           /* Move to the next containing reference for both A and B.  */
2482           ref_a = object_a;
2483           ref_b = object_b;
2484           index_a += 1;
2485           index_b += 1;
2486           continue;
2487         }
2488
2489       /* Try to approach equal type sizes.  */
2490       if (!COMPLETE_TYPE_P (type_a)
2491           || !COMPLETE_TYPE_P (type_b)
2492           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
2493           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
2494         break;
2495
2496       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
2497       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
2498       if (size_a <= size_b)
2499         {
2500           index_a += 1;
2501           ref_a = object_a;
2502         }
2503       if (size_b <= size_a)
2504         {
2505           index_b += 1;
2506           ref_b = object_b;
2507         }
2508     }
2509
2510   /* See whether FULL_SEQ ends at the base and whether the two bases
2511      are equal.  We do not care about TBAA or alignment info so we can
2512      use OEP_ADDRESS_OF to avoid false negatives.  */
2513   tree base_a = DR_BASE_OBJECT (a);
2514   tree base_b = DR_BASE_OBJECT (b);
2515   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
2516                       && full_seq.start_b + full_seq.length == num_dimensions_b
2517                       && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
2518                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
2519                       && types_compatible_p (TREE_TYPE (base_a),
2520                                              TREE_TYPE (base_b))
2521                       && (!loop_nest.exists ()
2522                           || (object_address_invariant_in_loop_p
2523                               (loop_nest[0], base_a))));
2524
2525   /* If the bases are the same, we can include the base variation too.
2526      E.g. the b accesses in:
2527
2528        for (int i = 0; i < n; ++i)
2529          b[i + 4][0] = b[i][0];
2530
2531      have a definite dependence distance of 4, while for:
2532
2533        for (int i = 0; i < n; ++i)
2534          a[i + 4][0] = b[i][0];
2535
2536      the dependence distance depends on the gap between a and b.
2537
2538      If the bases are different then we can only rely on the sequence
2539      rooted at a structure access, since arrays are allowed to overlap
2540      arbitrarily and change shape arbitrarily.  E.g. we treat this as
2541      valid code:
2542
2543        int a[256];
2544        ...
2545        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
2546
2547      where two lvalues with the same int[4][3] type overlap, and where
2548      both lvalues are distinct from the object's declared type.  */
2549   if (same_base_p)
2550     {
2551       if (DR_UNCONSTRAINED_BASE (a))
2552         full_seq.length += 1;
2553     }
2554   else
2555     full_seq = struct_seq;
2556
2557   /* Punt if we didn't find a suitable sequence.  */
2558   if (full_seq.length == 0)
2559     {
2560       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2561       return res;
2562     }
2563
2564   if (!same_base_p)
2565     {
2566       /* Partial overlap is possible for different bases when strict aliasing
2567          is not in effect.  It's also possible if either base involves a union
2568          access; e.g. for:
2569
2570            struct s1 { int a[2]; };
2571            struct s2 { struct s1 b; int c; };
2572            struct s3 { int d; struct s1 e; };
2573            union u { struct s2 f; struct s3 g; } *p, *q;
2574
2575          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
2576          "p->g.e" (base "p->g") and might partially overlap the s1 at
2577          "q->g.e" (base "q->g").  */
2578       if (!flag_strict_aliasing
2579           || ref_contains_union_access_p (full_seq.object_a)
2580           || ref_contains_union_access_p (full_seq.object_b))
2581         {
2582           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
2583           return res;
2584         }
2585
2586       DDR_COULD_BE_INDEPENDENT_P (res) = true;
2587       if (!loop_nest.exists ()
2588           || (object_address_invariant_in_loop_p (loop_nest[0],
2589                                                   full_seq.object_a)
2590               && object_address_invariant_in_loop_p (loop_nest[0],
2591                                                      full_seq.object_b)))
2592         {
2593           DDR_OBJECT_A (res) = full_seq.object_a;
2594           DDR_OBJECT_B (res) = full_seq.object_b;
2595         }
2596     }
2597
2598   DDR_AFFINE_P (res) = true;
2599   DDR_ARE_DEPENDENT (res) = NULL_TREE;
2600   DDR_SUBSCRIPTS (res).create (full_seq.length);
2601   DDR_LOOP_NEST (res) = loop_nest;
2602   DDR_INNER_LOOP (res) = 0;
2603   DDR_SELF_REFERENCE (res) = false;
2604
2605   for (i = 0; i < full_seq.length; ++i)
2606     {
2607       struct subscript *subscript;
2608
2609       subscript = XNEW (struct subscript);
2610       SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
2611       SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
2612       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
2613       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
2614       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
2615       SUB_DISTANCE (subscript) = chrec_dont_know;
2616       DDR_SUBSCRIPTS (res).safe_push (subscript);
2617     }
2618
2619   return res;
2620 }
2621
2622 /* Frees memory used by the conflict function F.  */
2623
2624 static void
2625 free_conflict_function (conflict_function *f)
2626 {
2627   unsigned i;
2628
2629   if (CF_NONTRIVIAL_P (f))
2630     {
2631       for (i = 0; i < f->n; i++)
2632         affine_fn_free (f->fns[i]);
2633     }
2634   free (f);
2635 }
2636
2637 /* Frees memory used by SUBSCRIPTS.  */
2638
2639 static void
2640 free_subscripts (vec<subscript_p> subscripts)
2641 {
2642   unsigned i;
2643   subscript_p s;
2644
2645   FOR_EACH_VEC_ELT (subscripts, i, s)
2646     {
2647       free_conflict_function (s->conflicting_iterations_in_a);
2648       free_conflict_function (s->conflicting_iterations_in_b);
2649       free (s);
2650     }
2651   subscripts.release ();
2652 }
2653
2654 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
2655    description.  */
2656
2657 static inline void
2658 finalize_ddr_dependent (struct data_dependence_relation *ddr,
2659                         tree chrec)
2660 {
2661   DDR_ARE_DEPENDENT (ddr) = chrec;
2662   free_subscripts (DDR_SUBSCRIPTS (ddr));
2663   DDR_SUBSCRIPTS (ddr).create (0);
2664 }
2665
2666 /* The dependence relation DDR cannot be represented by a distance
2667    vector.  */
2668
2669 static inline void
2670 non_affine_dependence_relation (struct data_dependence_relation *ddr)
2671 {
2672   if (dump_file && (dump_flags & TDF_DETAILS))
2673     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
2674
2675   DDR_AFFINE_P (ddr) = false;
2676 }
2677
2678 \f
2679
2680 /* This section contains the classic Banerjee tests.  */
2681
2682 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
2683    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
2684
2685 static inline bool
2686 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2687 {
2688   return (evolution_function_is_constant_p (chrec_a)
2689           && evolution_function_is_constant_p (chrec_b));
2690 }
2691
2692 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
2693    variable, i.e., if the SIV (Single Index Variable) test is true.  */
2694
2695 static bool
2696 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
2697 {
2698   if ((evolution_function_is_constant_p (chrec_a)
2699        && evolution_function_is_univariate_p (chrec_b))
2700       || (evolution_function_is_constant_p (chrec_b)
2701           && evolution_function_is_univariate_p (chrec_a)))
2702     return true;
2703
2704   if (evolution_function_is_univariate_p (chrec_a)
2705       && evolution_function_is_univariate_p (chrec_b))
2706     {
2707       switch (TREE_CODE (chrec_a))
2708         {
2709         case POLYNOMIAL_CHREC:
2710           switch (TREE_CODE (chrec_b))
2711             {
2712             case POLYNOMIAL_CHREC:
2713               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
2714                 return false;
2715               /* FALLTHRU */
2716
2717             default:
2718               return true;
2719             }
2720
2721         default:
2722           return true;
2723         }
2724     }
2725
2726   return false;
2727 }
2728
2729 /* Creates a conflict function with N dimensions.  The affine functions
2730    in each dimension follow.  */
2731
2732 static conflict_function *
2733 conflict_fn (unsigned n, ...)
2734 {
2735   unsigned i;
2736   conflict_function *ret = XCNEW (conflict_function);
2737   va_list ap;
2738
2739   gcc_assert (n > 0 && n <= MAX_DIM);
2740   va_start (ap, n);
2741
2742   ret->n = n;
2743   for (i = 0; i < n; i++)
2744     ret->fns[i] = va_arg (ap, affine_fn);
2745   va_end (ap);
2746
2747   return ret;
2748 }
2749
2750 /* Returns constant affine function with value CST.  */
2751
2752 static affine_fn
2753 affine_fn_cst (tree cst)
2754 {
2755   affine_fn fn;
2756   fn.create (1);
2757   fn.quick_push (cst);
2758   return fn;
2759 }
2760
2761 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
2762
2763 static affine_fn
2764 affine_fn_univar (tree cst, unsigned dim, tree coef)
2765 {
2766   affine_fn fn;
2767   fn.create (dim + 1);
2768   unsigned i;
2769
2770   gcc_assert (dim > 0);
2771   fn.quick_push (cst);
2772   for (i = 1; i < dim; i++)
2773     fn.quick_push (integer_zero_node);
2774   fn.quick_push (coef);
2775   return fn;
2776 }
2777
2778 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
2779    *OVERLAPS_B are initialized to the functions that describe the
2780    relation between the elements accessed twice by CHREC_A and
2781    CHREC_B.  For k >= 0, the following property is verified:
2782
2783    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2784
2785 static void
2786 analyze_ziv_subscript (tree chrec_a,
2787                        tree chrec_b,
2788                        conflict_function **overlaps_a,
2789                        conflict_function **overlaps_b,
2790                        tree *last_conflicts)
2791 {
2792   tree type, difference;
2793   dependence_stats.num_ziv++;
2794
2795   if (dump_file && (dump_flags & TDF_DETAILS))
2796     fprintf (dump_file, "(analyze_ziv_subscript \n");
2797
2798   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2799   chrec_a = chrec_convert (type, chrec_a, NULL);
2800   chrec_b = chrec_convert (type, chrec_b, NULL);
2801   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2802
2803   switch (TREE_CODE (difference))
2804     {
2805     case INTEGER_CST:
2806       if (integer_zerop (difference))
2807         {
2808           /* The difference is equal to zero: the accessed index
2809              overlaps for each iteration in the loop.  */
2810           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2811           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2812           *last_conflicts = chrec_dont_know;
2813           dependence_stats.num_ziv_dependent++;
2814         }
2815       else
2816         {
2817           /* The accesses do not overlap.  */
2818           *overlaps_a = conflict_fn_no_dependence ();
2819           *overlaps_b = conflict_fn_no_dependence ();
2820           *last_conflicts = integer_zero_node;
2821           dependence_stats.num_ziv_independent++;
2822         }
2823       break;
2824
2825     default:
2826       /* We're not sure whether the indexes overlap.  For the moment,
2827          conservatively answer "don't know".  */
2828       if (dump_file && (dump_flags & TDF_DETAILS))
2829         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
2830
2831       *overlaps_a = conflict_fn_not_known ();
2832       *overlaps_b = conflict_fn_not_known ();
2833       *last_conflicts = chrec_dont_know;
2834       dependence_stats.num_ziv_unimplemented++;
2835       break;
2836     }
2837
2838   if (dump_file && (dump_flags & TDF_DETAILS))
2839     fprintf (dump_file, ")\n");
2840 }
2841
2842 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
2843    and only if it fits to the int type.  If this is not the case, or the
2844    bound  on the number of iterations of LOOP could not be derived, returns
2845    chrec_dont_know.  */
2846
2847 static tree
2848 max_stmt_executions_tree (struct loop *loop)
2849 {
2850   widest_int nit;
2851
2852   if (!max_stmt_executions (loop, &nit))
2853     return chrec_dont_know;
2854
2855   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
2856     return chrec_dont_know;
2857
2858   return wide_int_to_tree (unsigned_type_node, nit);
2859 }
2860
2861 /* Determine whether the CHREC is always positive/negative.  If the expression
2862    cannot be statically analyzed, return false, otherwise set the answer into
2863    VALUE.  */
2864
2865 static bool
2866 chrec_is_positive (tree chrec, bool *value)
2867 {
2868   bool value0, value1, value2;
2869   tree end_value, nb_iter;
2870
2871   switch (TREE_CODE (chrec))
2872     {
2873     case POLYNOMIAL_CHREC:
2874       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
2875           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
2876         return false;
2877
2878       /* FIXME -- overflows.  */
2879       if (value0 == value1)
2880         {
2881           *value = value0;
2882           return true;
2883         }
2884
2885       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
2886          and the proof consists in showing that the sign never
2887          changes during the execution of the loop, from 0 to
2888          loop->nb_iterations.  */
2889       if (!evolution_function_is_affine_p (chrec))
2890         return false;
2891
2892       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
2893       if (chrec_contains_undetermined (nb_iter))
2894         return false;
2895
2896 #if 0
2897       /* TODO -- If the test is after the exit, we may decrease the number of
2898          iterations by one.  */
2899       if (after_exit)
2900         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
2901 #endif
2902
2903       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
2904
2905       if (!chrec_is_positive (end_value, &value2))
2906         return false;
2907
2908       *value = value0;
2909       return value0 == value1;
2910
2911     case INTEGER_CST:
2912       switch (tree_int_cst_sgn (chrec))
2913         {
2914         case -1:
2915           *value = false;
2916           break;
2917         case 1:
2918           *value = true;
2919           break;
2920         default:
2921           return false;
2922         }
2923       return true;
2924
2925     default:
2926       return false;
2927     }
2928 }
2929
2930
2931 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
2932    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
2933    *OVERLAPS_B are initialized to the functions that describe the
2934    relation between the elements accessed twice by CHREC_A and
2935    CHREC_B.  For k >= 0, the following property is verified:
2936
2937    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2938
2939 static void
2940 analyze_siv_subscript_cst_affine (tree chrec_a,
2941                                   tree chrec_b,
2942                                   conflict_function **overlaps_a,
2943                                   conflict_function **overlaps_b,
2944                                   tree *last_conflicts)
2945 {
2946   bool value0, value1, value2;
2947   tree type, difference, tmp;
2948
2949   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2950   chrec_a = chrec_convert (type, chrec_a, NULL);
2951   chrec_b = chrec_convert (type, chrec_b, NULL);
2952   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
2953
2954   /* Special case overlap in the first iteration.  */
2955   if (integer_zerop (difference))
2956     {
2957       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2958       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2959       *last_conflicts = integer_one_node;
2960       return;
2961     }
2962
2963   if (!chrec_is_positive (initial_condition (difference), &value0))
2964     {
2965       if (dump_file && (dump_flags & TDF_DETAILS))
2966         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
2967
2968       dependence_stats.num_siv_unimplemented++;
2969       *overlaps_a = conflict_fn_not_known ();
2970       *overlaps_b = conflict_fn_not_known ();
2971       *last_conflicts = chrec_dont_know;
2972       return;
2973     }
2974   else
2975     {
2976       if (value0 == false)
2977         {
2978           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
2979               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
2980             {
2981               if (dump_file && (dump_flags & TDF_DETAILS))
2982                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2983
2984               *overlaps_a = conflict_fn_not_known ();
2985               *overlaps_b = conflict_fn_not_known ();
2986               *last_conflicts = chrec_dont_know;
2987               dependence_stats.num_siv_unimplemented++;
2988               return;
2989             }
2990           else
2991             {
2992               if (value1 == true)
2993                 {
2994                   /* Example:
2995                      chrec_a = 12
2996                      chrec_b = {10, +, 1}
2997                   */
2998
2999                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3000                     {
3001                       HOST_WIDE_INT numiter;
3002                       struct loop *loop = get_chrec_loop (chrec_b);
3003
3004                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3005                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3006                                          fold_build1 (ABS_EXPR, type, difference),
3007                                          CHREC_RIGHT (chrec_b));
3008                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3009                       *last_conflicts = integer_one_node;
3010
3011
3012                       /* Perform weak-zero siv test to see if overlap is
3013                          outside the loop bounds.  */
3014                       numiter = max_stmt_executions_int (loop);
3015
3016                       if (numiter >= 0
3017                           && compare_tree_int (tmp, numiter) > 0)
3018                         {
3019                           free_conflict_function (*overlaps_a);
3020                           free_conflict_function (*overlaps_b);
3021                           *overlaps_a = conflict_fn_no_dependence ();
3022                           *overlaps_b = conflict_fn_no_dependence ();
3023                           *last_conflicts = integer_zero_node;
3024                           dependence_stats.num_siv_independent++;
3025                           return;
3026                         }
3027                       dependence_stats.num_siv_dependent++;
3028                       return;
3029                     }
3030
3031                   /* When the step does not divide the difference, there are
3032                      no overlaps.  */
3033                   else
3034                     {
3035                       *overlaps_a = conflict_fn_no_dependence ();
3036                       *overlaps_b = conflict_fn_no_dependence ();
3037                       *last_conflicts = integer_zero_node;
3038                       dependence_stats.num_siv_independent++;
3039                       return;
3040                     }
3041                 }
3042
3043               else
3044                 {
3045                   /* Example:
3046                      chrec_a = 12
3047                      chrec_b = {10, +, -1}
3048
3049                      In this case, chrec_a will not overlap with chrec_b.  */
3050                   *overlaps_a = conflict_fn_no_dependence ();
3051                   *overlaps_b = conflict_fn_no_dependence ();
3052                   *last_conflicts = integer_zero_node;
3053                   dependence_stats.num_siv_independent++;
3054                   return;
3055                 }
3056             }
3057         }
3058       else
3059         {
3060           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3061               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3062             {
3063               if (dump_file && (dump_flags & TDF_DETAILS))
3064                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3065
3066               *overlaps_a = conflict_fn_not_known ();
3067               *overlaps_b = conflict_fn_not_known ();
3068               *last_conflicts = chrec_dont_know;
3069               dependence_stats.num_siv_unimplemented++;
3070               return;
3071             }
3072           else
3073             {
3074               if (value2 == false)
3075                 {
3076                   /* Example:
3077                      chrec_a = 3
3078                      chrec_b = {10, +, -1}
3079                   */
3080                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3081                     {
3082                       HOST_WIDE_INT numiter;
3083                       struct loop *loop = get_chrec_loop (chrec_b);
3084
3085                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3086                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3087                                          CHREC_RIGHT (chrec_b));
3088                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3089                       *last_conflicts = integer_one_node;
3090
3091                       /* Perform weak-zero siv test to see if overlap is
3092                          outside the loop bounds.  */
3093                       numiter = max_stmt_executions_int (loop);
3094
3095                       if (numiter >= 0
3096                           && compare_tree_int (tmp, numiter) > 0)
3097                         {
3098                           free_conflict_function (*overlaps_a);
3099                           free_conflict_function (*overlaps_b);
3100                           *overlaps_a = conflict_fn_no_dependence ();
3101                           *overlaps_b = conflict_fn_no_dependence ();
3102                           *last_conflicts = integer_zero_node;
3103                           dependence_stats.num_siv_independent++;
3104                           return;
3105                         }
3106                       dependence_stats.num_siv_dependent++;
3107                       return;
3108                     }
3109
3110                   /* When the step does not divide the difference, there
3111                      are no overlaps.  */
3112                   else
3113                     {
3114                       *overlaps_a = conflict_fn_no_dependence ();
3115                       *overlaps_b = conflict_fn_no_dependence ();
3116                       *last_conflicts = integer_zero_node;
3117                       dependence_stats.num_siv_independent++;
3118                       return;
3119                     }
3120                 }
3121               else
3122                 {
3123                   /* Example:
3124                      chrec_a = 3
3125                      chrec_b = {4, +, 1}
3126
3127                      In this case, chrec_a will not overlap with chrec_b.  */
3128                   *overlaps_a = conflict_fn_no_dependence ();
3129                   *overlaps_b = conflict_fn_no_dependence ();
3130                   *last_conflicts = integer_zero_node;
3131                   dependence_stats.num_siv_independent++;
3132                   return;
3133                 }
3134             }
3135         }
3136     }
3137 }
3138
3139 /* Helper recursive function for initializing the matrix A.  Returns
3140    the initial value of CHREC.  */
3141
3142 static tree
3143 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3144 {
3145   gcc_assert (chrec);
3146
3147   switch (TREE_CODE (chrec))
3148     {
3149     case POLYNOMIAL_CHREC:
3150       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
3151       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3152
3153     case PLUS_EXPR:
3154     case MULT_EXPR:
3155     case MINUS_EXPR:
3156       {
3157         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3158         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
3159
3160         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
3161       }
3162
3163     CASE_CONVERT:
3164       {
3165         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3166         return chrec_convert (chrec_type (chrec), op, NULL);
3167       }
3168
3169     case BIT_NOT_EXPR:
3170       {
3171         /* Handle ~X as -1 - X.  */
3172         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
3173         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
3174                               build_int_cst (TREE_TYPE (chrec), -1), op);
3175       }
3176
3177     case INTEGER_CST:
3178       return chrec;
3179
3180     default:
3181       gcc_unreachable ();
3182       return NULL_TREE;
3183     }
3184 }
3185
3186 #define FLOOR_DIV(x,y) ((x) / (y))
3187
3188 /* Solves the special case of the Diophantine equation:
3189    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
3190
3191    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
3192    number of iterations that loops X and Y run.  The overlaps will be
3193    constructed as evolutions in dimension DIM.  */
3194
3195 static void
3196 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
3197                                          HOST_WIDE_INT step_a,
3198                                          HOST_WIDE_INT step_b,
3199                                          affine_fn *overlaps_a,
3200                                          affine_fn *overlaps_b,
3201                                          tree *last_conflicts, int dim)
3202 {
3203   if (((step_a > 0 && step_b > 0)
3204        || (step_a < 0 && step_b < 0)))
3205     {
3206       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
3207       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
3208
3209       gcd_steps_a_b = gcd (step_a, step_b);
3210       step_overlaps_a = step_b / gcd_steps_a_b;
3211       step_overlaps_b = step_a / gcd_steps_a_b;
3212
3213       if (niter > 0)
3214         {
3215           tau2 = FLOOR_DIV (niter, step_overlaps_a);
3216           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
3217           last_conflict = tau2;
3218           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3219         }
3220       else
3221         *last_conflicts = chrec_dont_know;
3222
3223       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
3224                                       build_int_cst (NULL_TREE,
3225                                                      step_overlaps_a));
3226       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
3227                                       build_int_cst (NULL_TREE,
3228                                                      step_overlaps_b));
3229     }
3230
3231   else
3232     {
3233       *overlaps_a = affine_fn_cst (integer_zero_node);
3234       *overlaps_b = affine_fn_cst (integer_zero_node);
3235       *last_conflicts = integer_zero_node;
3236     }
3237 }
3238
3239 /* Solves the special case of a Diophantine equation where CHREC_A is
3240    an affine bivariate function, and CHREC_B is an affine univariate
3241    function.  For example,
3242
3243    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
3244
3245    has the following overlapping functions:
3246
3247    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
3248    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
3249    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
3250
3251    FORNOW: This is a specialized implementation for a case occurring in
3252    a common benchmark.  Implement the general algorithm.  */
3253
3254 static void
3255 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
3256                                       conflict_function **overlaps_a,
3257                                       conflict_function **overlaps_b,
3258                                       tree *last_conflicts)
3259 {
3260   bool xz_p, yz_p, xyz_p;
3261   HOST_WIDE_INT step_x, step_y, step_z;
3262   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
3263   affine_fn overlaps_a_xz, overlaps_b_xz;
3264   affine_fn overlaps_a_yz, overlaps_b_yz;
3265   affine_fn overlaps_a_xyz, overlaps_b_xyz;
3266   affine_fn ova1, ova2, ovb;
3267   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
3268
3269   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
3270   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
3271   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
3272
3273   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
3274   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
3275   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
3276
3277   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
3278     {
3279       if (dump_file && (dump_flags & TDF_DETAILS))
3280         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
3281
3282       *overlaps_a = conflict_fn_not_known ();
3283       *overlaps_b = conflict_fn_not_known ();
3284       *last_conflicts = chrec_dont_know;
3285       return;
3286     }
3287
3288   niter = MIN (niter_x, niter_z);
3289   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
3290                                            &overlaps_a_xz,
3291                                            &overlaps_b_xz,
3292                                            &last_conflicts_xz, 1);
3293   niter = MIN (niter_y, niter_z);
3294   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
3295                                            &overlaps_a_yz,
3296                                            &overlaps_b_yz,
3297                                            &last_conflicts_yz, 2);
3298   niter = MIN (niter_x, niter_z);
3299   niter = MIN (niter_y, niter);
3300   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
3301                                            &overlaps_a_xyz,
3302                                            &overlaps_b_xyz,
3303                                            &last_conflicts_xyz, 3);
3304
3305   xz_p = !integer_zerop (last_conflicts_xz);
3306   yz_p = !integer_zerop (last_conflicts_yz);
3307   xyz_p = !integer_zerop (last_conflicts_xyz);
3308
3309   if (xz_p || yz_p || xyz_p)
3310     {
3311       ova1 = affine_fn_cst (integer_zero_node);
3312       ova2 = affine_fn_cst (integer_zero_node);
3313       ovb = affine_fn_cst (integer_zero_node);
3314       if (xz_p)
3315         {
3316           affine_fn t0 = ova1;
3317           affine_fn t2 = ovb;
3318
3319           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
3320           ovb = affine_fn_plus (ovb, overlaps_b_xz);
3321           affine_fn_free (t0);
3322           affine_fn_free (t2);
3323           *last_conflicts = last_conflicts_xz;
3324         }
3325       if (yz_p)
3326         {
3327           affine_fn t0 = ova2;
3328           affine_fn t2 = ovb;
3329
3330           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
3331           ovb = affine_fn_plus (ovb, overlaps_b_yz);
3332           affine_fn_free (t0);
3333           affine_fn_free (t2);
3334           *last_conflicts = last_conflicts_yz;
3335         }
3336       if (xyz_p)
3337         {
3338           affine_fn t0 = ova1;
3339           affine_fn t2 = ova2;
3340           affine_fn t4 = ovb;
3341
3342           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
3343           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
3344           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
3345           affine_fn_free (t0);
3346           affine_fn_free (t2);
3347           affine_fn_free (t4);
3348           *last_conflicts = last_conflicts_xyz;
3349         }
3350       *overlaps_a = conflict_fn (2, ova1, ova2);
3351       *overlaps_b = conflict_fn (1, ovb);
3352     }
3353   else
3354     {
3355       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3356       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3357       *last_conflicts = integer_zero_node;
3358     }
3359
3360   affine_fn_free (overlaps_a_xz);
3361   affine_fn_free (overlaps_b_xz);
3362   affine_fn_free (overlaps_a_yz);
3363   affine_fn_free (overlaps_b_yz);
3364   affine_fn_free (overlaps_a_xyz);
3365   affine_fn_free (overlaps_b_xyz);
3366 }
3367
3368 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
3369
3370 static void
3371 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
3372                     int size)
3373 {
3374   memcpy (vec2, vec1, size * sizeof (*vec1));
3375 }
3376
3377 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
3378
3379 static void
3380 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
3381                     int m, int n)
3382 {
3383   int i;
3384
3385   for (i = 0; i < m; i++)
3386     lambda_vector_copy (mat1[i], mat2[i], n);
3387 }
3388
3389 /* Store the N x N identity matrix in MAT.  */
3390
3391 static void
3392 lambda_matrix_id (lambda_matrix mat, int size)
3393 {
3394   int i, j;
3395
3396   for (i = 0; i < size; i++)
3397     for (j = 0; j < size; j++)
3398       mat[i][j] = (i == j) ? 1 : 0;
3399 }
3400
3401 /* Return the index of the first nonzero element of vector VEC1 between
3402    START and N.  We must have START <= N.
3403    Returns N if VEC1 is the zero vector.  */
3404
3405 static int
3406 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
3407 {
3408   int j = start;
3409   while (j < n && vec1[j] == 0)
3410     j++;
3411   return j;
3412 }
3413
3414 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
3415    R2 = R2 + CONST1 * R1.  */
3416
3417 static void
3418 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
3419                        lambda_int const1)
3420 {
3421   int i;
3422
3423   if (const1 == 0)
3424     return;
3425
3426   for (i = 0; i < n; i++)
3427     mat[r2][i] += const1 * mat[r1][i];
3428 }
3429
3430 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
3431    and store the result in VEC2.  */
3432
3433 static void
3434 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
3435                           int size, lambda_int const1)
3436 {
3437   int i;
3438
3439   if (const1 == 0)
3440     lambda_vector_clear (vec2, size);
3441   else
3442     for (i = 0; i < size; i++)
3443       vec2[i] = const1 * vec1[i];
3444 }
3445
3446 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
3447
3448 static void
3449 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
3450                       int size)
3451 {
3452   lambda_vector_mult_const (vec1, vec2, size, -1);
3453 }
3454
3455 /* Negate row R1 of matrix MAT which has N columns.  */
3456
3457 static void
3458 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
3459 {
3460   lambda_vector_negate (mat[r1], mat[r1], n);
3461 }
3462
3463 /* Return true if two vectors are equal.  */
3464
3465 static bool
3466 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
3467 {
3468   int i;
3469   for (i = 0; i < size; i++)
3470     if (vec1[i] != vec2[i])
3471       return false;
3472   return true;
3473 }
3474
3475 /* Given an M x N integer matrix A, this function determines an M x
3476    M unimodular matrix U, and an M x N echelon matrix S such that
3477    "U.A = S".  This decomposition is also known as "right Hermite".
3478
3479    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
3480    Restructuring Compilers" Utpal Banerjee.  */
3481
3482 static void
3483 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
3484                              lambda_matrix S, lambda_matrix U)
3485 {
3486   int i, j, i0 = 0;
3487
3488   lambda_matrix_copy (A, S, m, n);
3489   lambda_matrix_id (U, m);
3490
3491   for (j = 0; j < n; j++)
3492     {
3493       if (lambda_vector_first_nz (S[j], m, i0) < m)
3494         {
3495           ++i0;
3496           for (i = m - 1; i >= i0; i--)
3497             {
3498               while (S[i][j] != 0)
3499                 {
3500                   lambda_int sigma, factor, a, b;
3501
3502                   a = S[i-1][j];
3503                   b = S[i][j];
3504                   sigma = (a * b < 0) ? -1: 1;
3505                   a = abs (a);
3506                   b = abs (b);
3507                   factor = sigma * (a / b);
3508
3509                   lambda_matrix_row_add (S, n, i, i-1, -factor);
3510                   std::swap (S[i], S[i-1]);
3511
3512                   lambda_matrix_row_add (U, m, i, i-1, -factor);
3513                   std::swap (U[i], U[i-1]);
3514                 }
3515             }
3516         }
3517     }
3518 }
3519
3520 /* Determines the overlapping elements due to accesses CHREC_A and
3521    CHREC_B, that are affine functions.  This function cannot handle
3522    symbolic evolution functions, ie. when initial conditions are
3523    parameters, because it uses lambda matrices of integers.  */
3524
3525 static void
3526 analyze_subscript_affine_affine (tree chrec_a,
3527                                  tree chrec_b,
3528                                  conflict_function **overlaps_a,
3529                                  conflict_function **overlaps_b,
3530                                  tree *last_conflicts)
3531 {
3532   unsigned nb_vars_a, nb_vars_b, dim;
3533   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
3534   lambda_matrix A, U, S;
3535   struct obstack scratch_obstack;
3536
3537   if (eq_evolutions_p (chrec_a, chrec_b))
3538     {
3539       /* The accessed index overlaps for each iteration in the
3540          loop.  */
3541       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3542       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3543       *last_conflicts = chrec_dont_know;
3544       return;
3545     }
3546   if (dump_file && (dump_flags & TDF_DETAILS))
3547     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
3548
3549   /* For determining the initial intersection, we have to solve a
3550      Diophantine equation.  This is the most time consuming part.
3551
3552      For answering to the question: "Is there a dependence?" we have
3553      to prove that there exists a solution to the Diophantine
3554      equation, and that the solution is in the iteration domain,
3555      i.e. the solution is positive or zero, and that the solution
3556      happens before the upper bound loop.nb_iterations.  Otherwise
3557      there is no dependence.  This function outputs a description of
3558      the iterations that hold the intersections.  */
3559
3560   nb_vars_a = nb_vars_in_chrec (chrec_a);
3561   nb_vars_b = nb_vars_in_chrec (chrec_b);
3562
3563   gcc_obstack_init (&scratch_obstack);
3564
3565   dim = nb_vars_a + nb_vars_b;
3566   U = lambda_matrix_new (dim, dim, &scratch_obstack);
3567   A = lambda_matrix_new (dim, 1, &scratch_obstack);
3568   S = lambda_matrix_new (dim, 1, &scratch_obstack);
3569
3570   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
3571   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
3572   gamma = init_b - init_a;
3573
3574   /* Don't do all the hard work of solving the Diophantine equation
3575      when we already know the solution: for example,
3576      | {3, +, 1}_1
3577      | {3, +, 4}_2
3578      | gamma = 3 - 3 = 0.
3579      Then the first overlap occurs during the first iterations:
3580      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
3581   */
3582   if (gamma == 0)
3583     {
3584       if (nb_vars_a == 1 && nb_vars_b == 1)
3585         {
3586           HOST_WIDE_INT step_a, step_b;
3587           HOST_WIDE_INT niter, niter_a, niter_b;
3588           affine_fn ova, ovb;
3589
3590           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
3591           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
3592           niter = MIN (niter_a, niter_b);
3593           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
3594           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
3595
3596           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
3597                                                    &ova, &ovb,
3598                                                    last_conflicts, 1);
3599           *overlaps_a = conflict_fn (1, ova);
3600           *overlaps_b = conflict_fn (1, ovb);
3601         }
3602
3603       else if (nb_vars_a == 2 && nb_vars_b == 1)
3604         compute_overlap_steps_for_affine_1_2
3605           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
3606
3607       else if (nb_vars_a == 1 && nb_vars_b == 2)
3608         compute_overlap_steps_for_affine_1_2
3609           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
3610
3611       else
3612         {
3613           if (dump_file && (dump_flags & TDF_DETAILS))
3614             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
3615           *overlaps_a = conflict_fn_not_known ();
3616           *overlaps_b = conflict_fn_not_known ();
3617           *last_conflicts = chrec_dont_know;
3618         }
3619       goto end_analyze_subs_aa;
3620     }
3621
3622   /* U.A = S */
3623   lambda_matrix_right_hermite (A, dim, 1, S, U);
3624
3625   if (S[0][0] < 0)
3626     {
3627       S[0][0] *= -1;
3628       lambda_matrix_row_negate (U, dim, 0);
3629     }
3630   gcd_alpha_beta = S[0][0];
3631
3632   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
3633      but that is a quite strange case.  Instead of ICEing, answer
3634      don't know.  */
3635   if (gcd_alpha_beta == 0)
3636     {
3637       *overlaps_a = conflict_fn_not_known ();
3638       *overlaps_b = conflict_fn_not_known ();
3639       *last_conflicts = chrec_dont_know;
3640       goto end_analyze_subs_aa;
3641     }
3642
3643   /* The classic "gcd-test".  */
3644   if (!int_divides_p (gcd_alpha_beta, gamma))
3645     {
3646       /* The "gcd-test" has determined that there is no integer
3647          solution, i.e. there is no dependence.  */
3648       *overlaps_a = conflict_fn_no_dependence ();
3649       *overlaps_b = conflict_fn_no_dependence ();
3650       *last_conflicts = integer_zero_node;
3651     }
3652
3653   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
3654   else if (nb_vars_a == 1 && nb_vars_b == 1)
3655     {
3656       /* Both functions should have the same evolution sign.  */
3657       if (((A[0][0] > 0 && -A[1][0] > 0)
3658            || (A[0][0] < 0 && -A[1][0] < 0)))
3659         {
3660           /* The solutions are given by:
3661              |
3662              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
3663              |                           [u21 u22]    [y0]
3664
3665              For a given integer t.  Using the following variables,
3666
3667              | i0 = u11 * gamma / gcd_alpha_beta
3668              | j0 = u12 * gamma / gcd_alpha_beta
3669              | i1 = u21
3670              | j1 = u22
3671
3672              the solutions are:
3673
3674              | x0 = i0 + i1 * t,
3675              | y0 = j0 + j1 * t.  */
3676           HOST_WIDE_INT i0, j0, i1, j1;
3677
3678           i0 = U[0][0] * gamma / gcd_alpha_beta;
3679           j0 = U[0][1] * gamma / gcd_alpha_beta;
3680           i1 = U[1][0];
3681           j1 = U[1][1];
3682
3683           if ((i1 == 0 && i0 < 0)
3684               || (j1 == 0 && j0 < 0))
3685             {
3686               /* There is no solution.
3687                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
3688                  falls in here, but for the moment we don't look at the
3689                  upper bound of the iteration domain.  */
3690               *overlaps_a = conflict_fn_no_dependence ();
3691               *overlaps_b = conflict_fn_no_dependence ();
3692               *last_conflicts = integer_zero_node;
3693               goto end_analyze_subs_aa;
3694             }
3695
3696           if (i1 > 0 && j1 > 0)
3697             {
3698               HOST_WIDE_INT niter_a
3699                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
3700               HOST_WIDE_INT niter_b
3701                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
3702               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
3703
3704               /* (X0, Y0) is a solution of the Diophantine equation:
3705                  "chrec_a (X0) = chrec_b (Y0)".  */
3706               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
3707                                         CEIL (-j0, j1));
3708               HOST_WIDE_INT x0 = i1 * tau1 + i0;
3709               HOST_WIDE_INT y0 = j1 * tau1 + j0;
3710
3711               /* (X1, Y1) is the smallest positive solution of the eq
3712                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
3713                  first conflict occurs.  */
3714               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
3715               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
3716               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
3717
3718               if (niter > 0)
3719                 {
3720                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter_a - i0, i1),
3721                                             FLOOR_DIV (niter_b - j0, j1));
3722                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
3723
3724                   /* If the overlap occurs outside of the bounds of the
3725                      loop, there is no dependence.  */
3726                   if (x1 >= niter_a || y1 >= niter_b)
3727                     {
3728                       *overlaps_a = conflict_fn_no_dependence ();
3729                       *overlaps_b = conflict_fn_no_dependence ();
3730                       *last_conflicts = integer_zero_node;
3731                       goto end_analyze_subs_aa;
3732                     }
3733                   else
3734                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
3735                 }
3736               else
3737                 *last_conflicts = chrec_dont_know;
3738
3739               *overlaps_a
3740                 = conflict_fn (1,
3741                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
3742                                                  1,
3743                                                  build_int_cst (NULL_TREE, i1)));
3744               *overlaps_b
3745                 = conflict_fn (1,
3746                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
3747                                                  1,
3748                                                  build_int_cst (NULL_TREE, j1)));
3749             }
3750           else
3751             {
3752               /* FIXME: For the moment, the upper bound of the
3753                  iteration domain for i and j is not checked.  */
3754               if (dump_file && (dump_flags & TDF_DETAILS))
3755                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3756               *overlaps_a = conflict_fn_not_known ();
3757               *overlaps_b = conflict_fn_not_known ();
3758               *last_conflicts = chrec_dont_know;
3759             }
3760         }
3761       else
3762         {
3763           if (dump_file && (dump_flags & TDF_DETAILS))
3764             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3765           *overlaps_a = conflict_fn_not_known ();
3766           *overlaps_b = conflict_fn_not_known ();
3767           *last_conflicts = chrec_dont_know;
3768         }
3769     }
3770   else
3771     {
3772       if (dump_file && (dump_flags & TDF_DETAILS))
3773         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
3774       *overlaps_a = conflict_fn_not_known ();
3775       *overlaps_b = conflict_fn_not_known ();
3776       *last_conflicts = chrec_dont_know;
3777     }
3778
3779 end_analyze_subs_aa:
3780   obstack_free (&scratch_obstack, NULL);
3781   if (dump_file && (dump_flags & TDF_DETAILS))
3782     {
3783       fprintf (dump_file, "  (overlaps_a = ");
3784       dump_conflict_function (dump_file, *overlaps_a);
3785       fprintf (dump_file, ")\n  (overlaps_b = ");
3786       dump_conflict_function (dump_file, *overlaps_b);
3787       fprintf (dump_file, "))\n");
3788     }
3789 }
3790
3791 /* Returns true when analyze_subscript_affine_affine can be used for
3792    determining the dependence relation between chrec_a and chrec_b,
3793    that contain symbols.  This function modifies chrec_a and chrec_b
3794    such that the analysis result is the same, and such that they don't
3795    contain symbols, and then can safely be passed to the analyzer.
3796
3797    Example: The analysis of the following tuples of evolutions produce
3798    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
3799    vs. {0, +, 1}_1
3800
3801    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
3802    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
3803 */
3804
3805 static bool
3806 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
3807 {
3808   tree diff, type, left_a, left_b, right_b;
3809
3810   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
3811       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
3812     /* FIXME: For the moment not handled.  Might be refined later.  */
3813     return false;
3814
3815   type = chrec_type (*chrec_a);
3816   left_a = CHREC_LEFT (*chrec_a);
3817   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
3818   diff = chrec_fold_minus (type, left_a, left_b);
3819
3820   if (!evolution_function_is_constant_p (diff))
3821     return false;
3822
3823   if (dump_file && (dump_flags & TDF_DETAILS))
3824     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
3825
3826   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
3827                                      diff, CHREC_RIGHT (*chrec_a));
3828   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
3829   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
3830                                      build_int_cst (type, 0),
3831                                      right_b);
3832   return true;
3833 }
3834
3835 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
3836    *OVERLAPS_B are initialized to the functions that describe the
3837    relation between the elements accessed twice by CHREC_A and
3838    CHREC_B.  For k >= 0, the following property is verified:
3839
3840    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3841
3842 static void
3843 analyze_siv_subscript (tree chrec_a,
3844                        tree chrec_b,
3845                        conflict_function **overlaps_a,
3846                        conflict_function **overlaps_b,
3847                        tree *last_conflicts,
3848                        int loop_nest_num)
3849 {
3850   dependence_stats.num_siv++;
3851
3852   if (dump_file && (dump_flags & TDF_DETAILS))
3853     fprintf (dump_file, "(analyze_siv_subscript \n");
3854
3855   if (evolution_function_is_constant_p (chrec_a)
3856       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3857     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
3858                                       overlaps_a, overlaps_b, last_conflicts);
3859
3860   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3861            && evolution_function_is_constant_p (chrec_b))
3862     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
3863                                       overlaps_b, overlaps_a, last_conflicts);
3864
3865   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
3866            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
3867     {
3868       if (!chrec_contains_symbols (chrec_a)
3869           && !chrec_contains_symbols (chrec_b))
3870         {
3871           analyze_subscript_affine_affine (chrec_a, chrec_b,
3872                                            overlaps_a, overlaps_b,
3873                                            last_conflicts);
3874
3875           if (CF_NOT_KNOWN_P (*overlaps_a)
3876               || CF_NOT_KNOWN_P (*overlaps_b))
3877             dependence_stats.num_siv_unimplemented++;
3878           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3879                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3880             dependence_stats.num_siv_independent++;
3881           else
3882             dependence_stats.num_siv_dependent++;
3883         }
3884       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
3885                                                         &chrec_b))
3886         {
3887           analyze_subscript_affine_affine (chrec_a, chrec_b,
3888                                            overlaps_a, overlaps_b,
3889                                            last_conflicts);
3890
3891           if (CF_NOT_KNOWN_P (*overlaps_a)
3892               || CF_NOT_KNOWN_P (*overlaps_b))
3893             dependence_stats.num_siv_unimplemented++;
3894           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
3895                    || CF_NO_DEPENDENCE_P (*overlaps_b))
3896             dependence_stats.num_siv_independent++;
3897           else
3898             dependence_stats.num_siv_dependent++;
3899         }
3900       else
3901         goto siv_subscript_dontknow;
3902     }
3903
3904   else
3905     {
3906     siv_subscript_dontknow:;
3907       if (dump_file && (dump_flags & TDF_DETAILS))
3908         fprintf (dump_file, "  siv test failed: unimplemented");
3909       *overlaps_a = conflict_fn_not_known ();
3910       *overlaps_b = conflict_fn_not_known ();
3911       *last_conflicts = chrec_dont_know;
3912       dependence_stats.num_siv_unimplemented++;
3913     }
3914
3915   if (dump_file && (dump_flags & TDF_DETAILS))
3916     fprintf (dump_file, ")\n");
3917 }
3918
3919 /* Returns false if we can prove that the greatest common divisor of the steps
3920    of CHREC does not divide CST, false otherwise.  */
3921
3922 static bool
3923 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
3924 {
3925   HOST_WIDE_INT cd = 0, val;
3926   tree step;
3927
3928   if (!tree_fits_shwi_p (cst))
3929     return true;
3930   val = tree_to_shwi (cst);
3931
3932   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
3933     {
3934       step = CHREC_RIGHT (chrec);
3935       if (!tree_fits_shwi_p (step))
3936         return true;
3937       cd = gcd (cd, tree_to_shwi (step));
3938       chrec = CHREC_LEFT (chrec);
3939     }
3940
3941   return val % cd == 0;
3942 }
3943
3944 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
3945    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
3946    functions that describe the relation between the elements accessed
3947    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
3948    is verified:
3949
3950    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3951
3952 static void
3953 analyze_miv_subscript (tree chrec_a,
3954                        tree chrec_b,
3955                        conflict_function **overlaps_a,
3956                        conflict_function **overlaps_b,
3957                        tree *last_conflicts,
3958                        struct loop *loop_nest)
3959 {
3960   tree type, difference;
3961
3962   dependence_stats.num_miv++;
3963   if (dump_file && (dump_flags & TDF_DETAILS))
3964     fprintf (dump_file, "(analyze_miv_subscript \n");
3965
3966   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3967   chrec_a = chrec_convert (type, chrec_a, NULL);
3968   chrec_b = chrec_convert (type, chrec_b, NULL);
3969   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3970
3971   if (eq_evolutions_p (chrec_a, chrec_b))
3972     {
3973       /* Access functions are the same: all the elements are accessed
3974          in the same order.  */
3975       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3976       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3977       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
3978       dependence_stats.num_miv_dependent++;
3979     }
3980
3981   else if (evolution_function_is_constant_p (difference)
3982            && evolution_function_is_affine_multivariate_p (chrec_a,
3983                                                            loop_nest->num)
3984            && !gcd_of_steps_may_divide_p (chrec_a, difference))
3985     {
3986       /* testsuite/.../ssa-chrec-33.c
3987          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
3988
3989          The difference is 1, and all the evolution steps are multiples
3990          of 2, consequently there are no overlapping elements.  */
3991       *overlaps_a = conflict_fn_no_dependence ();
3992       *overlaps_b = conflict_fn_no_dependence ();
3993       *last_conflicts = integer_zero_node;
3994       dependence_stats.num_miv_independent++;
3995     }
3996
3997   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
3998            && !chrec_contains_symbols (chrec_a)
3999            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
4000            && !chrec_contains_symbols (chrec_b))
4001     {
4002       /* testsuite/.../ssa-chrec-35.c
4003          {0, +, 1}_2  vs.  {0, +, 1}_3
4004          the overlapping elements are respectively located at iterations:
4005          {0, +, 1}_x and {0, +, 1}_x,
4006          in other words, we have the equality:
4007          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4008
4009          Other examples:
4010          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4011          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4012
4013          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4014          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4015       */
4016       analyze_subscript_affine_affine (chrec_a, chrec_b,
4017                                        overlaps_a, overlaps_b, last_conflicts);
4018
4019       if (CF_NOT_KNOWN_P (*overlaps_a)
4020           || CF_NOT_KNOWN_P (*overlaps_b))
4021         dependence_stats.num_miv_unimplemented++;
4022       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4023                || CF_NO_DEPENDENCE_P (*overlaps_b))
4024         dependence_stats.num_miv_independent++;
4025       else
4026         dependence_stats.num_miv_dependent++;
4027     }
4028
4029   else
4030     {
4031       /* When the analysis is too difficult, answer "don't know".  */
4032       if (dump_file && (dump_flags & TDF_DETAILS))
4033         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4034
4035       *overlaps_a = conflict_fn_not_known ();
4036       *overlaps_b = conflict_fn_not_known ();
4037       *last_conflicts = chrec_dont_know;
4038       dependence_stats.num_miv_unimplemented++;
4039     }
4040
4041   if (dump_file && (dump_flags & TDF_DETAILS))
4042     fprintf (dump_file, ")\n");
4043 }
4044
4045 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4046    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4047    OVERLAP_ITERATIONS_B are initialized with two functions that
4048    describe the iterations that contain conflicting elements.
4049
4050    Remark: For an integer k >= 0, the following equality is true:
4051
4052    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4053 */
4054
4055 static void
4056 analyze_overlapping_iterations (tree chrec_a,
4057                                 tree chrec_b,
4058                                 conflict_function **overlap_iterations_a,
4059                                 conflict_function **overlap_iterations_b,
4060                                 tree *last_conflicts, struct loop *loop_nest)
4061 {
4062   unsigned int lnn = loop_nest->num;
4063
4064   dependence_stats.num_subscript_tests++;
4065
4066   if (dump_file && (dump_flags & TDF_DETAILS))
4067     {
4068       fprintf (dump_file, "(analyze_overlapping_iterations \n");
4069       fprintf (dump_file, "  (chrec_a = ");
4070       print_generic_expr (dump_file, chrec_a);
4071       fprintf (dump_file, ")\n  (chrec_b = ");
4072       print_generic_expr (dump_file, chrec_b);
4073       fprintf (dump_file, ")\n");
4074     }
4075
4076   if (chrec_a == NULL_TREE
4077       || chrec_b == NULL_TREE
4078       || chrec_contains_undetermined (chrec_a)
4079       || chrec_contains_undetermined (chrec_b))
4080     {
4081       dependence_stats.num_subscript_undetermined++;
4082
4083       *overlap_iterations_a = conflict_fn_not_known ();
4084       *overlap_iterations_b = conflict_fn_not_known ();
4085     }
4086
4087   /* If they are the same chrec, and are affine, they overlap
4088      on every iteration.  */
4089   else if (eq_evolutions_p (chrec_a, chrec_b)
4090            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4091                || operand_equal_p (chrec_a, chrec_b, 0)))
4092     {
4093       dependence_stats.num_same_subscript_function++;
4094       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4095       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4096       *last_conflicts = chrec_dont_know;
4097     }
4098
4099   /* If they aren't the same, and aren't affine, we can't do anything
4100      yet.  */
4101   else if ((chrec_contains_symbols (chrec_a)
4102             || chrec_contains_symbols (chrec_b))
4103            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4104                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4105     {
4106       dependence_stats.num_subscript_undetermined++;
4107       *overlap_iterations_a = conflict_fn_not_known ();
4108       *overlap_iterations_b = conflict_fn_not_known ();
4109     }
4110
4111   else if (ziv_subscript_p (chrec_a, chrec_b))
4112     analyze_ziv_subscript (chrec_a, chrec_b,
4113                            overlap_iterations_a, overlap_iterations_b,
4114                            last_conflicts);
4115
4116   else if (siv_subscript_p (chrec_a, chrec_b))
4117     analyze_siv_subscript (chrec_a, chrec_b,
4118                            overlap_iterations_a, overlap_iterations_b,
4119                            last_conflicts, lnn);
4120
4121   else
4122     analyze_miv_subscript (chrec_a, chrec_b,
4123                            overlap_iterations_a, overlap_iterations_b,
4124                            last_conflicts, loop_nest);
4125
4126   if (dump_file && (dump_flags & TDF_DETAILS))
4127     {
4128       fprintf (dump_file, "  (overlap_iterations_a = ");
4129       dump_conflict_function (dump_file, *overlap_iterations_a);
4130       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
4131       dump_conflict_function (dump_file, *overlap_iterations_b);
4132       fprintf (dump_file, "))\n");
4133     }
4134 }
4135
4136 /* Helper function for uniquely inserting distance vectors.  */
4137
4138 static void
4139 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
4140 {
4141   unsigned i;
4142   lambda_vector v;
4143
4144   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
4145     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
4146       return;
4147
4148   DDR_DIST_VECTS (ddr).safe_push (dist_v);
4149 }
4150
4151 /* Helper function for uniquely inserting direction vectors.  */
4152
4153 static void
4154 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
4155 {
4156   unsigned i;
4157   lambda_vector v;
4158
4159   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
4160     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
4161       return;
4162
4163   DDR_DIR_VECTS (ddr).safe_push (dir_v);
4164 }
4165
4166 /* Add a distance of 1 on all the loops outer than INDEX.  If we
4167    haven't yet determined a distance for this outer loop, push a new
4168    distance vector composed of the previous distance, and a distance
4169    of 1 for this outer loop.  Example:
4170
4171    | loop_1
4172    |   loop_2
4173    |     A[10]
4174    |   endloop_2
4175    | endloop_1
4176
4177    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
4178    save (0, 1), then we have to save (1, 0).  */
4179
4180 static void
4181 add_outer_distances (struct data_dependence_relation *ddr,
4182                      lambda_vector dist_v, int index)
4183 {
4184   /* For each outer loop where init_v is not set, the accesses are
4185      in dependence of distance 1 in the loop.  */
4186   while (--index >= 0)
4187     {
4188       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4189       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4190       save_v[index] = 1;
4191       save_dist_v (ddr, save_v);
4192     }
4193 }
4194
4195 /* Return false when fail to represent the data dependence as a
4196    distance vector.  A_INDEX is the index of the first reference
4197    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
4198    second reference.  INIT_B is set to true when a component has been
4199    added to the distance vector DIST_V.  INDEX_CARRY is then set to
4200    the index in DIST_V that carries the dependence.  */
4201
4202 static bool
4203 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
4204                              unsigned int a_index, unsigned int b_index,
4205                              lambda_vector dist_v, bool *init_b,
4206                              int *index_carry)
4207 {
4208   unsigned i;
4209   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4210
4211   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4212     {
4213       tree access_fn_a, access_fn_b;
4214       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
4215
4216       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4217         {
4218           non_affine_dependence_relation (ddr);
4219           return false;
4220         }
4221
4222       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
4223       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
4224
4225       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
4226           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
4227         {
4228           HOST_WIDE_INT dist;
4229           int index;
4230           int var_a = CHREC_VARIABLE (access_fn_a);
4231           int var_b = CHREC_VARIABLE (access_fn_b);
4232
4233           if (var_a != var_b
4234               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
4235             {
4236               non_affine_dependence_relation (ddr);
4237               return false;
4238             }
4239
4240           dist = int_cst_value (SUB_DISTANCE (subscript));
4241           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
4242           *index_carry = MIN (index, *index_carry);
4243
4244           /* This is the subscript coupling test.  If we have already
4245              recorded a distance for this loop (a distance coming from
4246              another subscript), it should be the same.  For example,
4247              in the following code, there is no dependence:
4248
4249              | loop i = 0, N, 1
4250              |   T[i+1][i] = ...
4251              |   ... = T[i][i]
4252              | endloop
4253           */
4254           if (init_v[index] != 0 && dist_v[index] != dist)
4255             {
4256               finalize_ddr_dependent (ddr, chrec_known);
4257               return false;
4258             }
4259
4260           dist_v[index] = dist;
4261           init_v[index] = 1;
4262           *init_b = true;
4263         }
4264       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
4265         {
4266           /* This can be for example an affine vs. constant dependence
4267              (T[i] vs. T[3]) that is not an affine dependence and is
4268              not representable as a distance vector.  */
4269           non_affine_dependence_relation (ddr);
4270           return false;
4271         }
4272     }
4273
4274   return true;
4275 }
4276
4277 /* Return true when the DDR contains only constant access functions.  */
4278
4279 static bool
4280 constant_access_functions (const struct data_dependence_relation *ddr)
4281 {
4282   unsigned i;
4283   subscript *sub;
4284
4285   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4286     if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
4287         || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
4288       return false;
4289
4290   return true;
4291 }
4292
4293 /* Helper function for the case where DDR_A and DDR_B are the same
4294    multivariate access function with a constant step.  For an example
4295    see pr34635-1.c.  */
4296
4297 static void
4298 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
4299 {
4300   int x_1, x_2;
4301   tree c_1 = CHREC_LEFT (c_2);
4302   tree c_0 = CHREC_LEFT (c_1);
4303   lambda_vector dist_v;
4304   HOST_WIDE_INT v1, v2, cd;
4305
4306   /* Polynomials with more than 2 variables are not handled yet.  When
4307      the evolution steps are parameters, it is not possible to
4308      represent the dependence using classical distance vectors.  */
4309   if (TREE_CODE (c_0) != INTEGER_CST
4310       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
4311       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
4312     {
4313       DDR_AFFINE_P (ddr) = false;
4314       return;
4315     }
4316
4317   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
4318   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
4319
4320   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
4321   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4322   v1 = int_cst_value (CHREC_RIGHT (c_1));
4323   v2 = int_cst_value (CHREC_RIGHT (c_2));
4324   cd = gcd (v1, v2);
4325   v1 /= cd;
4326   v2 /= cd;
4327
4328   if (v2 < 0)
4329     {
4330       v2 = -v2;
4331       v1 = -v1;
4332     }
4333
4334   dist_v[x_1] = v2;
4335   dist_v[x_2] = -v1;
4336   save_dist_v (ddr, dist_v);
4337
4338   add_outer_distances (ddr, dist_v, x_1);
4339 }
4340
4341 /* Helper function for the case where DDR_A and DDR_B are the same
4342    access functions.  */
4343
4344 static void
4345 add_other_self_distances (struct data_dependence_relation *ddr)
4346 {
4347   lambda_vector dist_v;
4348   unsigned i;
4349   int index_carry = DDR_NB_LOOPS (ddr);
4350   subscript *sub;
4351
4352   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4353     {
4354       tree access_fun = SUB_ACCESS_FN (sub, 0);
4355
4356       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
4357         {
4358           if (!evolution_function_is_univariate_p (access_fun))
4359             {
4360               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
4361                 {
4362                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
4363                   return;
4364                 }
4365
4366               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
4367
4368               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
4369                 add_multivariate_self_dist (ddr, access_fun);
4370               else
4371                 /* The evolution step is not constant: it varies in
4372                    the outer loop, so this cannot be represented by a
4373                    distance vector.  For example in pr34635.c the
4374                    evolution is {0, +, {0, +, 4}_1}_2.  */
4375                 DDR_AFFINE_P (ddr) = false;
4376
4377               return;
4378             }
4379
4380           index_carry = MIN (index_carry,
4381                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
4382                                                  DDR_LOOP_NEST (ddr)));
4383         }
4384     }
4385
4386   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4387   add_outer_distances (ddr, dist_v, index_carry);
4388 }
4389
4390 static void
4391 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
4392 {
4393   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4394
4395   dist_v[DDR_INNER_LOOP (ddr)] = 1;
4396   save_dist_v (ddr, dist_v);
4397 }
4398
4399 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
4400    is the case for example when access functions are the same and
4401    equal to a constant, as in:
4402
4403    | loop_1
4404    |   A[3] = ...
4405    |   ... = A[3]
4406    | endloop_1
4407
4408    in which case the distance vectors are (0) and (1).  */
4409
4410 static void
4411 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
4412 {
4413   unsigned i, j;
4414
4415   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
4416     {
4417       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
4418       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
4419       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
4420
4421       for (j = 0; j < ca->n; j++)
4422         if (affine_function_zero_p (ca->fns[j]))
4423           {
4424             insert_innermost_unit_dist_vector (ddr);
4425             return;
4426           }
4427
4428       for (j = 0; j < cb->n; j++)
4429         if (affine_function_zero_p (cb->fns[j]))
4430           {
4431             insert_innermost_unit_dist_vector (ddr);
4432             return;
4433           }
4434     }
4435 }
4436
4437 /* Return true when the DDR contains two data references that have the
4438    same access functions.  */
4439
4440 static inline bool
4441 same_access_functions (const struct data_dependence_relation *ddr)
4442 {
4443   unsigned i;
4444   subscript *sub;
4445
4446   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
4447     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
4448                           SUB_ACCESS_FN (sub, 1)))
4449       return false;
4450
4451   return true;
4452 }
4453
4454 /* Compute the classic per loop distance vector.  DDR is the data
4455    dependence relation to build a vector from.  Return false when fail
4456    to represent the data dependence as a distance vector.  */
4457
4458 static bool
4459 build_classic_dist_vector (struct data_dependence_relation *ddr,
4460                            struct loop *loop_nest)
4461 {
4462   bool init_b = false;
4463   int index_carry = DDR_NB_LOOPS (ddr);
4464   lambda_vector dist_v;
4465
4466   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4467     return false;
4468
4469   if (same_access_functions (ddr))
4470     {
4471       /* Save the 0 vector.  */
4472       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4473       save_dist_v (ddr, dist_v);
4474
4475       if (constant_access_functions (ddr))
4476         add_distance_for_zero_overlaps (ddr);
4477
4478       if (DDR_NB_LOOPS (ddr) > 1)
4479         add_other_self_distances (ddr);
4480
4481       return true;
4482     }
4483
4484   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4485   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
4486     return false;
4487
4488   /* Save the distance vector if we initialized one.  */
4489   if (init_b)
4490     {
4491       /* Verify a basic constraint: classic distance vectors should
4492          always be lexicographically positive.
4493
4494          Data references are collected in the order of execution of
4495          the program, thus for the following loop
4496
4497          | for (i = 1; i < 100; i++)
4498          |   for (j = 1; j < 100; j++)
4499          |     {
4500          |       t = T[j+1][i-1];  // A
4501          |       T[j][i] = t + 2;  // B
4502          |     }
4503
4504          references are collected following the direction of the wind:
4505          A then B.  The data dependence tests are performed also
4506          following this order, such that we're looking at the distance
4507          separating the elements accessed by A from the elements later
4508          accessed by B.  But in this example, the distance returned by
4509          test_dep (A, B) is lexicographically negative (-1, 1), that
4510          means that the access A occurs later than B with respect to
4511          the outer loop, ie. we're actually looking upwind.  In this
4512          case we solve test_dep (B, A) looking downwind to the
4513          lexicographically positive solution, that returns the
4514          distance vector (1, -1).  */
4515       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
4516         {
4517           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4518           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4519             return false;
4520           compute_subscript_distance (ddr);
4521           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
4522                                             &index_carry))
4523             return false;
4524           save_dist_v (ddr, save_v);
4525           DDR_REVERSED_P (ddr) = true;
4526
4527           /* In this case there is a dependence forward for all the
4528              outer loops:
4529
4530              | for (k = 1; k < 100; k++)
4531              |  for (i = 1; i < 100; i++)
4532              |   for (j = 1; j < 100; j++)
4533              |     {
4534              |       t = T[j+1][i-1];  // A
4535              |       T[j][i] = t + 2;  // B
4536              |     }
4537
4538              the vectors are:
4539              (0,  1, -1)
4540              (1,  1, -1)
4541              (1, -1,  1)
4542           */
4543           if (DDR_NB_LOOPS (ddr) > 1)
4544             {
4545               add_outer_distances (ddr, save_v, index_carry);
4546               add_outer_distances (ddr, dist_v, index_carry);
4547             }
4548         }
4549       else
4550         {
4551           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4552           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
4553
4554           if (DDR_NB_LOOPS (ddr) > 1)
4555             {
4556               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4557
4558               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
4559                 return false;
4560               compute_subscript_distance (ddr);
4561               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
4562                                                 &index_carry))
4563                 return false;
4564
4565               save_dist_v (ddr, save_v);
4566               add_outer_distances (ddr, dist_v, index_carry);
4567               add_outer_distances (ddr, opposite_v, index_carry);
4568             }
4569           else
4570             save_dist_v (ddr, save_v);
4571         }
4572     }
4573   else
4574     {
4575       /* There is a distance of 1 on all the outer loops: Example:
4576          there is a dependence of distance 1 on loop_1 for the array A.
4577
4578          | loop_1
4579          |   A[5] = ...
4580          | endloop
4581       */
4582       add_outer_distances (ddr, dist_v,
4583                            lambda_vector_first_nz (dist_v,
4584                                                    DDR_NB_LOOPS (ddr), 0));
4585     }
4586
4587   if (dump_file && (dump_flags & TDF_DETAILS))
4588     {
4589       unsigned i;
4590
4591       fprintf (dump_file, "(build_classic_dist_vector\n");
4592       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4593         {
4594           fprintf (dump_file, "  dist_vector = (");
4595           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
4596                                DDR_NB_LOOPS (ddr));
4597           fprintf (dump_file, "  )\n");
4598         }
4599       fprintf (dump_file, ")\n");
4600     }
4601
4602   return true;
4603 }
4604
4605 /* Return the direction for a given distance.
4606    FIXME: Computing dir this way is suboptimal, since dir can catch
4607    cases that dist is unable to represent.  */
4608
4609 static inline enum data_dependence_direction
4610 dir_from_dist (int dist)
4611 {
4612   if (dist > 0)
4613     return dir_positive;
4614   else if (dist < 0)
4615     return dir_negative;
4616   else
4617     return dir_equal;
4618 }
4619
4620 /* Compute the classic per loop direction vector.  DDR is the data
4621    dependence relation to build a vector from.  */
4622
4623 static void
4624 build_classic_dir_vector (struct data_dependence_relation *ddr)
4625 {
4626   unsigned i, j;
4627   lambda_vector dist_v;
4628
4629   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
4630     {
4631       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4632
4633       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4634         dir_v[j] = dir_from_dist (dist_v[j]);
4635
4636       save_dir_v (ddr, dir_v);
4637     }
4638 }
4639
4640 /* Helper function.  Returns true when there is a dependence between the
4641    data references.  A_INDEX is the index of the first reference (0 for
4642    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
4643
4644 static bool
4645 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
4646                                unsigned int a_index, unsigned int b_index,
4647                                struct loop *loop_nest)
4648 {
4649   unsigned int i;
4650   tree last_conflicts;
4651   struct subscript *subscript;
4652   tree res = NULL_TREE;
4653
4654   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
4655     {
4656       conflict_function *overlaps_a, *overlaps_b;
4657
4658       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
4659                                       SUB_ACCESS_FN (subscript, b_index),
4660                                       &overlaps_a, &overlaps_b,
4661                                       &last_conflicts, loop_nest);
4662
4663       if (SUB_CONFLICTS_IN_A (subscript))
4664         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4665       if (SUB_CONFLICTS_IN_B (subscript))
4666         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4667
4668       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
4669       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
4670       SUB_LAST_CONFLICT (subscript) = last_conflicts;
4671
4672       /* If there is any undetermined conflict function we have to
4673          give a conservative answer in case we cannot prove that
4674          no dependence exists when analyzing another subscript.  */
4675       if (CF_NOT_KNOWN_P (overlaps_a)
4676           || CF_NOT_KNOWN_P (overlaps_b))
4677         {
4678           res = chrec_dont_know;
4679           continue;
4680         }
4681
4682       /* When there is a subscript with no dependence we can stop.  */
4683       else if (CF_NO_DEPENDENCE_P (overlaps_a)
4684                || CF_NO_DEPENDENCE_P (overlaps_b))
4685         {
4686           res = chrec_known;
4687           break;
4688         }
4689     }
4690
4691   if (res == NULL_TREE)
4692     return true;
4693
4694   if (res == chrec_known)
4695     dependence_stats.num_dependence_independent++;
4696   else
4697     dependence_stats.num_dependence_undetermined++;
4698   finalize_ddr_dependent (ddr, res);
4699   return false;
4700 }
4701
4702 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
4703
4704 static void
4705 subscript_dependence_tester (struct data_dependence_relation *ddr,
4706                              struct loop *loop_nest)
4707 {
4708   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
4709     dependence_stats.num_dependence_dependent++;
4710
4711   compute_subscript_distance (ddr);
4712   if (build_classic_dist_vector (ddr, loop_nest))
4713     build_classic_dir_vector (ddr);
4714 }
4715
4716 /* Returns true when all the access functions of A are affine or
4717    constant with respect to LOOP_NEST.  */
4718
4719 static bool
4720 access_functions_are_affine_or_constant_p (const struct data_reference *a,
4721                                            const struct loop *loop_nest)
4722 {
4723   unsigned int i;
4724   vec<tree> fns = DR_ACCESS_FNS (a);
4725   tree t;
4726
4727   FOR_EACH_VEC_ELT (fns, i, t)
4728     if (!evolution_function_is_invariant_p (t, loop_nest->num)
4729         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
4730       return false;
4731
4732   return true;
4733 }
4734
4735 /* This computes the affine dependence relation between A and B with
4736    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4737    independence between two accesses, while CHREC_DONT_KNOW is used
4738    for representing the unknown relation.
4739
4740    Note that it is possible to stop the computation of the dependence
4741    relation the first time we detect a CHREC_KNOWN element for a given
4742    subscript.  */
4743
4744 void
4745 compute_affine_dependence (struct data_dependence_relation *ddr,
4746                            struct loop *loop_nest)
4747 {
4748   struct data_reference *dra = DDR_A (ddr);
4749   struct data_reference *drb = DDR_B (ddr);
4750
4751   if (dump_file && (dump_flags & TDF_DETAILS))
4752     {
4753       fprintf (dump_file, "(compute_affine_dependence\n");
4754       fprintf (dump_file, "  stmt_a: ");
4755       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4756       fprintf (dump_file, "  stmt_b: ");
4757       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4758     }
4759
4760   /* Analyze only when the dependence relation is not yet known.  */
4761   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4762     {
4763       dependence_stats.num_dependence_tests++;
4764
4765       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4766           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4767         subscript_dependence_tester (ddr, loop_nest);
4768
4769       /* As a last case, if the dependence cannot be determined, or if
4770          the dependence is considered too difficult to determine, answer
4771          "don't know".  */
4772       else
4773         {
4774           dependence_stats.num_dependence_undetermined++;
4775
4776           if (dump_file && (dump_flags & TDF_DETAILS))
4777             {
4778               fprintf (dump_file, "Data ref a:\n");
4779               dump_data_reference (dump_file, dra);
4780               fprintf (dump_file, "Data ref b:\n");
4781               dump_data_reference (dump_file, drb);
4782               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4783             }
4784           finalize_ddr_dependent (ddr, chrec_dont_know);
4785         }
4786     }
4787
4788   if (dump_file && (dump_flags & TDF_DETAILS))
4789     {
4790       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4791         fprintf (dump_file, ") -> no dependence\n");
4792       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4793         fprintf (dump_file, ") -> dependence analysis failed\n");
4794       else
4795         fprintf (dump_file, ")\n");
4796     }
4797 }
4798
4799 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4800    the data references in DATAREFS, in the LOOP_NEST.  When
4801    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4802    relations.  Return true when successful, i.e. data references number
4803    is small enough to be handled.  */
4804
4805 bool
4806 compute_all_dependences (vec<data_reference_p> datarefs,
4807                          vec<ddr_p> *dependence_relations,
4808                          vec<loop_p> loop_nest,
4809                          bool compute_self_and_rr)
4810 {
4811   struct data_dependence_relation *ddr;
4812   struct data_reference *a, *b;
4813   unsigned int i, j;
4814
4815   if ((int) datarefs.length ()
4816       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4817     {
4818       struct data_dependence_relation *ddr;
4819
4820       /* Insert a single relation into dependence_relations:
4821          chrec_dont_know.  */
4822       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4823       dependence_relations->safe_push (ddr);
4824       return false;
4825     }
4826
4827   FOR_EACH_VEC_ELT (datarefs, i, a)
4828     for (j = i + 1; datarefs.iterate (j, &b); j++)
4829       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4830         {
4831           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4832           dependence_relations->safe_push (ddr);
4833           if (loop_nest.exists ())
4834             compute_affine_dependence (ddr, loop_nest[0]);
4835         }
4836
4837   if (compute_self_and_rr)
4838     FOR_EACH_VEC_ELT (datarefs, i, a)
4839       {
4840         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4841         dependence_relations->safe_push (ddr);
4842         if (loop_nest.exists ())
4843           compute_affine_dependence (ddr, loop_nest[0]);
4844       }
4845
4846   return true;
4847 }
4848
4849 /* Describes a location of a memory reference.  */
4850
4851 struct data_ref_loc
4852 {
4853   /* The memory reference.  */
4854   tree ref;
4855
4856   /* True if the memory reference is read.  */
4857   bool is_read;
4858
4859   /* True if the data reference is conditional within the containing
4860      statement, i.e. if it might not occur even when the statement
4861      is executed and runs to completion.  */
4862   bool is_conditional_in_stmt;
4863 };
4864
4865
4866 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4867    true if STMT clobbers memory, false otherwise.  */
4868
4869 static bool
4870 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
4871 {
4872   bool clobbers_memory = false;
4873   data_ref_loc ref;
4874   tree op0, op1;
4875   enum gimple_code stmt_code = gimple_code (stmt);
4876
4877   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4878      As we cannot model data-references to not spelled out
4879      accesses give up if they may occur.  */
4880   if (stmt_code == GIMPLE_CALL
4881       && !(gimple_call_flags (stmt) & ECF_CONST))
4882     {
4883       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4884       if (gimple_call_internal_p (stmt))
4885         switch (gimple_call_internal_fn (stmt))
4886           {
4887           case IFN_GOMP_SIMD_LANE:
4888             {
4889               struct loop *loop = gimple_bb (stmt)->loop_father;
4890               tree uid = gimple_call_arg (stmt, 0);
4891               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4892               if (loop == NULL
4893                   || loop->simduid != SSA_NAME_VAR (uid))
4894                 clobbers_memory = true;
4895               break;
4896             }
4897           case IFN_MASK_LOAD:
4898           case IFN_MASK_STORE:
4899             break;
4900           default:
4901             clobbers_memory = true;
4902             break;
4903           }
4904       else
4905         clobbers_memory = true;
4906     }
4907   else if (stmt_code == GIMPLE_ASM
4908            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4909                || gimple_vuse (stmt)))
4910     clobbers_memory = true;
4911
4912   if (!gimple_vuse (stmt))
4913     return clobbers_memory;
4914
4915   if (stmt_code == GIMPLE_ASSIGN)
4916     {
4917       tree base;
4918       op0 = gimple_assign_lhs (stmt);
4919       op1 = gimple_assign_rhs1 (stmt);
4920
4921       if (DECL_P (op1)
4922           || (REFERENCE_CLASS_P (op1)
4923               && (base = get_base_address (op1))
4924               && TREE_CODE (base) != SSA_NAME
4925               && !is_gimple_min_invariant (base)))
4926         {
4927           ref.ref = op1;
4928           ref.is_read = true;
4929           ref.is_conditional_in_stmt = false;
4930           references->safe_push (ref);
4931         }
4932     }
4933   else if (stmt_code == GIMPLE_CALL)
4934     {
4935       unsigned i, n;
4936       tree ptr, type;
4937       unsigned int align;
4938
4939       ref.is_read = false;
4940       if (gimple_call_internal_p (stmt))
4941         switch (gimple_call_internal_fn (stmt))
4942           {
4943           case IFN_MASK_LOAD:
4944             if (gimple_call_lhs (stmt) == NULL_TREE)
4945               break;
4946             ref.is_read = true;
4947             /* FALLTHRU */
4948           case IFN_MASK_STORE:
4949             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
4950             align = tree_to_shwi (gimple_call_arg (stmt, 1));
4951             if (ref.is_read)
4952               type = TREE_TYPE (gimple_call_lhs (stmt));
4953             else
4954               type = TREE_TYPE (gimple_call_arg (stmt, 3));
4955             if (TYPE_ALIGN (type) != align)
4956               type = build_aligned_type (type, align);
4957             ref.is_conditional_in_stmt = true;
4958             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
4959                                    ptr);
4960             references->safe_push (ref);
4961             return false;
4962           default:
4963             break;
4964           }
4965
4966       op0 = gimple_call_lhs (stmt);
4967       n = gimple_call_num_args (stmt);
4968       for (i = 0; i < n; i++)
4969         {
4970           op1 = gimple_call_arg (stmt, i);
4971
4972           if (DECL_P (op1)
4973               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4974             {
4975               ref.ref = op1;
4976               ref.is_read = true;
4977               ref.is_conditional_in_stmt = false;
4978               references->safe_push (ref);
4979             }
4980         }
4981     }
4982   else
4983     return clobbers_memory;
4984
4985   if (op0
4986       && (DECL_P (op0)
4987           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4988     {
4989       ref.ref = op0;
4990       ref.is_read = false;
4991       ref.is_conditional_in_stmt = false;
4992       references->safe_push (ref);
4993     }
4994   return clobbers_memory;
4995 }
4996
4997
4998 /* Returns true if the loop-nest has any data reference.  */
4999
5000 bool
5001 loop_nest_has_data_refs (loop_p loop)
5002 {
5003   basic_block *bbs = get_loop_body (loop);
5004   auto_vec<data_ref_loc, 3> references;
5005
5006   for (unsigned i = 0; i < loop->num_nodes; i++)
5007     {
5008       basic_block bb = bbs[i];
5009       gimple_stmt_iterator bsi;
5010
5011       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5012         {
5013           gimple *stmt = gsi_stmt (bsi);
5014           get_references_in_stmt (stmt, &references);
5015           if (references.length ())
5016             {
5017               free (bbs);
5018               return true;
5019             }
5020         }
5021     }
5022   free (bbs);
5023   return false;
5024 }
5025
5026 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5027    reference, returns false, otherwise returns true.  NEST is the outermost
5028    loop of the loop nest in which the references should be analyzed.  */
5029
5030 opt_result
5031 find_data_references_in_stmt (struct loop *nest, gimple *stmt,
5032                               vec<data_reference_p> *datarefs)
5033 {
5034   unsigned i;
5035   auto_vec<data_ref_loc, 2> references;
5036   data_ref_loc *ref;
5037   data_reference_p dr;
5038
5039   if (get_references_in_stmt (stmt, &references))
5040     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5041                                    stmt);
5042
5043   FOR_EACH_VEC_ELT (references, i, ref)
5044     {
5045       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5046                             loop_containing_stmt (stmt), ref->ref,
5047                             stmt, ref->is_read, ref->is_conditional_in_stmt);
5048       gcc_assert (dr != NULL);
5049       datarefs->safe_push (dr);
5050     }
5051
5052   return opt_result::success ();
5053 }
5054
5055 /* Stores the data references in STMT to DATAREFS.  If there is an
5056    unanalyzable reference, returns false, otherwise returns true.
5057    NEST is the outermost loop of the loop nest in which the references
5058    should be instantiated, LOOP is the loop in which the references
5059    should be analyzed.  */
5060
5061 bool
5062 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5063                                        vec<data_reference_p> *datarefs)
5064 {
5065   unsigned i;
5066   auto_vec<data_ref_loc, 2> references;
5067   data_ref_loc *ref;
5068   bool ret = true;
5069   data_reference_p dr;
5070
5071   if (get_references_in_stmt (stmt, &references))
5072     return false;
5073
5074   FOR_EACH_VEC_ELT (references, i, ref)
5075     {
5076       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read,
5077                             ref->is_conditional_in_stmt);
5078       gcc_assert (dr != NULL);
5079       datarefs->safe_push (dr);
5080     }
5081
5082   return ret;
5083 }
5084
5085 /* Search the data references in LOOP, and record the information into
5086    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5087    difficult case, returns NULL_TREE otherwise.  */
5088
5089 tree
5090 find_data_references_in_bb (struct loop *loop, basic_block bb,
5091                             vec<data_reference_p> *datarefs)
5092 {
5093   gimple_stmt_iterator bsi;
5094
5095   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5096     {
5097       gimple *stmt = gsi_stmt (bsi);
5098
5099       if (!find_data_references_in_stmt (loop, stmt, datarefs))
5100         {
5101           struct data_reference *res;
5102           res = XCNEW (struct data_reference);
5103           datarefs->safe_push (res);
5104
5105           return chrec_dont_know;
5106         }
5107     }
5108
5109   return NULL_TREE;
5110 }
5111
5112 /* Search the data references in LOOP, and record the information into
5113    DATAREFS.  Returns chrec_dont_know when failing to analyze a
5114    difficult case, returns NULL_TREE otherwise.
5115
5116    TODO: This function should be made smarter so that it can handle address
5117    arithmetic as if they were array accesses, etc.  */
5118
5119 tree
5120 find_data_references_in_loop (struct loop *loop,
5121                               vec<data_reference_p> *datarefs)
5122 {
5123   basic_block bb, *bbs;
5124   unsigned int i;
5125
5126   bbs = get_loop_body_in_dom_order (loop);
5127
5128   for (i = 0; i < loop->num_nodes; i++)
5129     {
5130       bb = bbs[i];
5131
5132       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
5133         {
5134           free (bbs);
5135           return chrec_dont_know;
5136         }
5137     }
5138   free (bbs);
5139
5140   return NULL_TREE;
5141 }
5142
5143 /* Return the alignment in bytes that DRB is guaranteed to have at all
5144    times.  */
5145
5146 unsigned int
5147 dr_alignment (innermost_loop_behavior *drb)
5148 {
5149   /* Get the alignment of BASE_ADDRESS + INIT.  */
5150   unsigned int alignment = drb->base_alignment;
5151   unsigned int misalignment = (drb->base_misalignment
5152                                + TREE_INT_CST_LOW (drb->init));
5153   if (misalignment != 0)
5154     alignment = MIN (alignment, misalignment & -misalignment);
5155
5156   /* Cap it to the alignment of OFFSET.  */
5157   if (!integer_zerop (drb->offset))
5158     alignment = MIN (alignment, drb->offset_alignment);
5159
5160   /* Cap it to the alignment of STEP.  */
5161   if (!integer_zerop (drb->step))
5162     alignment = MIN (alignment, drb->step_alignment);
5163
5164   return alignment;
5165 }
5166
5167 /* If BASE is a pointer-typed SSA name, try to find the object that it
5168    is based on.  Return this object X on success and store the alignment
5169    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
5170
5171 static tree
5172 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
5173 {
5174   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
5175     return NULL_TREE;
5176
5177   gimple *def = SSA_NAME_DEF_STMT (base);
5178   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
5179
5180   /* Peel chrecs and record the minimum alignment preserved by
5181      all steps.  */
5182   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5183   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
5184     {
5185       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
5186       alignment = MIN (alignment, step_alignment);
5187       base = CHREC_LEFT (base);
5188     }
5189
5190   /* Punt if the expression is too complicated to handle.  */
5191   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
5192     return NULL_TREE;
5193
5194   /* The only useful cases are those for which a dereference folds to something
5195      other than an INDIRECT_REF.  */
5196   tree ref_type = TREE_TYPE (TREE_TYPE (base));
5197   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
5198   if (!ref)
5199     return NULL_TREE;
5200
5201   /* Analyze the base to which the steps we peeled were applied.  */
5202   poly_int64 bitsize, bitpos, bytepos;
5203   machine_mode mode;
5204   int unsignedp, reversep, volatilep;
5205   tree offset;
5206   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
5207                               &unsignedp, &reversep, &volatilep);
5208   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
5209     return NULL_TREE;
5210
5211   /* Restrict the alignment to that guaranteed by the offsets.  */
5212   unsigned int bytepos_alignment = known_alignment (bytepos);
5213   if (bytepos_alignment != 0)
5214     alignment = MIN (alignment, bytepos_alignment);
5215   if (offset)
5216     {
5217       unsigned int offset_alignment = highest_pow2_factor (offset);
5218       alignment = MIN (alignment, offset_alignment);
5219     }
5220
5221   *alignment_out = alignment;
5222   return base;
5223 }
5224
5225 /* Return the object whose alignment would need to be changed in order
5226    to increase the alignment of ADDR.  Store the maximum achievable
5227    alignment in *MAX_ALIGNMENT.  */
5228
5229 tree
5230 get_base_for_alignment (tree addr, unsigned int *max_alignment)
5231 {
5232   tree base = get_base_for_alignment_1 (addr, max_alignment);
5233   if (base)
5234     return base;
5235
5236   if (TREE_CODE (addr) == ADDR_EXPR)
5237     addr = TREE_OPERAND (addr, 0);
5238   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
5239   return addr;
5240 }
5241
5242 /* Recursive helper function.  */
5243
5244 static bool
5245 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
5246 {
5247   /* Inner loops of the nest should not contain siblings.  Example:
5248      when there are two consecutive loops,
5249
5250      | loop_0
5251      |   loop_1
5252      |     A[{0, +, 1}_1]
5253      |   endloop_1
5254      |   loop_2
5255      |     A[{0, +, 1}_2]
5256      |   endloop_2
5257      | endloop_0
5258
5259      the dependence relation cannot be captured by the distance
5260      abstraction.  */
5261   if (loop->next)
5262     return false;
5263
5264   loop_nest->safe_push (loop);
5265   if (loop->inner)
5266     return find_loop_nest_1 (loop->inner, loop_nest);
5267   return true;
5268 }
5269
5270 /* Return false when the LOOP is not well nested.  Otherwise return
5271    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
5272    contain the loops from the outermost to the innermost, as they will
5273    appear in the classic distance vector.  */
5274
5275 bool
5276 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
5277 {
5278   loop_nest->safe_push (loop);
5279   if (loop->inner)
5280     return find_loop_nest_1 (loop->inner, loop_nest);
5281   return true;
5282 }
5283
5284 /* Returns true when the data dependences have been computed, false otherwise.
5285    Given a loop nest LOOP, the following vectors are returned:
5286    DATAREFS is initialized to all the array elements contained in this loop,
5287    DEPENDENCE_RELATIONS contains the relations between the data references.
5288    Compute read-read and self relations if
5289    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
5290
5291 bool
5292 compute_data_dependences_for_loop (struct loop *loop,
5293                                    bool compute_self_and_read_read_dependences,
5294                                    vec<loop_p> *loop_nest,
5295                                    vec<data_reference_p> *datarefs,
5296                                    vec<ddr_p> *dependence_relations)
5297 {
5298   bool res = true;
5299
5300   memset (&dependence_stats, 0, sizeof (dependence_stats));
5301
5302   /* If the loop nest is not well formed, or one of the data references
5303      is not computable, give up without spending time to compute other
5304      dependences.  */
5305   if (!loop
5306       || !find_loop_nest (loop, loop_nest)
5307       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
5308       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
5309                                    compute_self_and_read_read_dependences))
5310     res = false;
5311
5312   if (dump_file && (dump_flags & TDF_STATS))
5313     {
5314       fprintf (dump_file, "Dependence tester statistics:\n");
5315
5316       fprintf (dump_file, "Number of dependence tests: %d\n",
5317                dependence_stats.num_dependence_tests);
5318       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
5319                dependence_stats.num_dependence_dependent);
5320       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
5321                dependence_stats.num_dependence_independent);
5322       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
5323                dependence_stats.num_dependence_undetermined);
5324
5325       fprintf (dump_file, "Number of subscript tests: %d\n",
5326                dependence_stats.num_subscript_tests);
5327       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
5328                dependence_stats.num_subscript_undetermined);
5329       fprintf (dump_file, "Number of same subscript function: %d\n",
5330                dependence_stats.num_same_subscript_function);
5331
5332       fprintf (dump_file, "Number of ziv tests: %d\n",
5333                dependence_stats.num_ziv);
5334       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
5335                dependence_stats.num_ziv_dependent);
5336       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
5337                dependence_stats.num_ziv_independent);
5338       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
5339                dependence_stats.num_ziv_unimplemented);
5340
5341       fprintf (dump_file, "Number of siv tests: %d\n",
5342                dependence_stats.num_siv);
5343       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
5344                dependence_stats.num_siv_dependent);
5345       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
5346                dependence_stats.num_siv_independent);
5347       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
5348                dependence_stats.num_siv_unimplemented);
5349
5350       fprintf (dump_file, "Number of miv tests: %d\n",
5351                dependence_stats.num_miv);
5352       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
5353                dependence_stats.num_miv_dependent);
5354       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
5355                dependence_stats.num_miv_independent);
5356       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
5357                dependence_stats.num_miv_unimplemented);
5358     }
5359
5360   return res;
5361 }
5362
5363 /* Free the memory used by a data dependence relation DDR.  */
5364
5365 void
5366 free_dependence_relation (struct data_dependence_relation *ddr)
5367 {
5368   if (ddr == NULL)
5369     return;
5370
5371   if (DDR_SUBSCRIPTS (ddr).exists ())
5372     free_subscripts (DDR_SUBSCRIPTS (ddr));
5373   DDR_DIST_VECTS (ddr).release ();
5374   DDR_DIR_VECTS (ddr).release ();
5375
5376   free (ddr);
5377 }
5378
5379 /* Free the memory used by the data dependence relations from
5380    DEPENDENCE_RELATIONS.  */
5381
5382 void
5383 free_dependence_relations (vec<ddr_p> dependence_relations)
5384 {
5385   unsigned int i;
5386   struct data_dependence_relation *ddr;
5387
5388   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
5389     if (ddr)
5390       free_dependence_relation (ddr);
5391
5392   dependence_relations.release ();
5393 }
5394
5395 /* Free the memory used by the data references from DATAREFS.  */
5396
5397 void
5398 free_data_refs (vec<data_reference_p> datarefs)
5399 {
5400   unsigned int i;
5401   struct data_reference *dr;
5402
5403   FOR_EACH_VEC_ELT (datarefs, i, dr)
5404     free_data_ref (dr);
5405   datarefs.release ();
5406 }
5407
5408 /* Common routine implementing both dr_direction_indicator and
5409    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
5410    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
5411    Return the step as the indicator otherwise.  */
5412
5413 static tree
5414 dr_step_indicator (struct data_reference *dr, int useful_min)
5415 {
5416   tree step = DR_STEP (dr);
5417   if (!step)
5418     return NULL_TREE;
5419   STRIP_NOPS (step);
5420   /* Look for cases where the step is scaled by a positive constant
5421      integer, which will often be the access size.  If the multiplication
5422      doesn't change the sign (due to overflow effects) then we can
5423      test the unscaled value instead.  */
5424   if (TREE_CODE (step) == MULT_EXPR
5425       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
5426       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
5427     {
5428       tree factor = TREE_OPERAND (step, 1);
5429       step = TREE_OPERAND (step, 0);
5430
5431       /* Strip widening and truncating conversions as well as nops.  */
5432       if (CONVERT_EXPR_P (step)
5433           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
5434         step = TREE_OPERAND (step, 0);
5435       tree type = TREE_TYPE (step);
5436
5437       /* Get the range of step values that would not cause overflow.  */
5438       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
5439                          / wi::to_widest (factor));
5440       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
5441                          / wi::to_widest (factor));
5442
5443       /* Get the range of values that the unconverted step actually has.  */
5444       wide_int step_min, step_max;
5445       if (TREE_CODE (step) != SSA_NAME
5446           || get_range_info (step, &step_min, &step_max) != VR_RANGE)
5447         {
5448           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
5449           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
5450         }
5451
5452       /* Check whether the unconverted step has an acceptable range.  */
5453       signop sgn = TYPE_SIGN (type);
5454       if (wi::les_p (minv, widest_int::from (step_min, sgn))
5455           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
5456         {
5457           if (wi::ge_p (step_min, useful_min, sgn))
5458             return ssize_int (useful_min);
5459           else if (wi::lt_p (step_max, 0, sgn))
5460             return ssize_int (-1);
5461           else
5462             return fold_convert (ssizetype, step);
5463         }
5464     }
5465   return DR_STEP (dr);
5466 }
5467
5468 /* Return a value that is negative iff DR has a negative step.  */
5469
5470 tree
5471 dr_direction_indicator (struct data_reference *dr)
5472 {
5473   return dr_step_indicator (dr, 0);
5474 }
5475
5476 /* Return a value that is zero iff DR has a zero step.  */
5477
5478 tree
5479 dr_zero_step_indicator (struct data_reference *dr)
5480 {
5481   return dr_step_indicator (dr, 1);
5482 }
5483
5484 /* Return true if DR is known to have a nonnegative (but possibly zero)
5485    step.  */
5486
5487 bool
5488 dr_known_forward_stride_p (struct data_reference *dr)
5489 {
5490   tree indicator = dr_direction_indicator (dr);
5491   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
5492                                    fold_convert (ssizetype, indicator),
5493                                    ssize_int (0));
5494   return neg_step_val && integer_zerop (neg_step_val);
5495 }